Merge branch 'paper'

This commit is contained in:
carry 2025-05-26 01:41:59 +08:00
commit d5652beb01
14 changed files with 1227 additions and 1 deletions

View File

@ -16,7 +16,7 @@
\node[title, above=0.1cm of business.north west, anchor=west] {业务逻辑层};
\node[layer, fill=orange!10, below=0.5cm of business] (data) {};
\node[title, above=0.1cm of data.north west, anchor=west] {数据访问层模块};
\node[title, above=0.1cm of data.north west, anchor=west] {数据访问层};
% 表现层模块
\node[module, fill=blue!20] (ui1) at (-3.5, 1.5) {模型管理};

Binary file not shown.

Before

Width:  |  Height:  |  Size: 151 KiB

21
report/.gitignore vendored Normal file
View File

@ -0,0 +1,21 @@
# LaTeX临时文件
*.aux
*.log
*.out
*.toc
*.synctex.gz
*.bbl
*.blg
*.dvi
*.fdb_latexmk
*.fls
*.lof
*.lot
*.idx
*.ilg
*.ind
*.nav
*.snm
*.vrb
*.xdv
*.pdf

View File

@ -0,0 +1,7 @@
\begin{frame}
\frametitle{致谢}
\begin{center}
\LARGE{\textbf{感谢聆听!}}\\[0.5cm]
\large{欢迎各位老师提出宝贵意见}
\end{center}
\end{frame}

View File

@ -0,0 +1,10 @@
% 研究背景
\section{研究背景}
\begin{frame}
\frametitle{研究背景}
\begin{itemize}
\item[\faCheckCircle] 研究领域现状:大语言模型在公开数据集与开源项目中表现优异,但在处理企业私有库时存在局限性
\item[\faExclamationTriangle] 存在的问题:缺乏对私有库专有函数、类及其交互细节的深度理解,无法精准引用库中的类、方法或属性
\item[\faFlask] 研究意义:通过深度解析私有库文档,对大语言模型进行微调与优化,提升生成代码的准确性和实用性
\end{itemize}
\end{frame}

View File

@ -0,0 +1,26 @@
% 总结与展望
\section{总结与展望}
\begin{frame}
\frametitle{总结与展望}
\begin{itemize}
\item[\faBook] 主要工作总结
\begin{itemize}
\item 提出了文档驱动的自适应编码大模型微调框架
\item 实现了三层架构设计(表现层、业务逻辑层和数据访问层)
\item 实现了大语言模型的微调
\end{itemize}
\item[\faLightbulb] 创新点总结
\begin{itemize}
\item 开发了基于栈结构的Markdown文档解析器
\item 采用了QLoRA参数高效微调方法
\item 构建了基于提示工程的数据集生成工具
\end{itemize}
\item[\faRoad] 未来工作展望
\begin{itemize}
\item 边缘计算部署
\item 更多的格式输入支持
\item CICD集成
\end{itemize}
\item[\faGithub] 软件仓库地址:\href{https://gitea.carry.fit/carry/gzhu-biyesheji}{https://gitea.carry.fit/carry/gzhu-biyesheji}
\end{itemize}
\end{frame}

View File

@ -0,0 +1,6 @@
% 系统设计
\section{系统设计}
\begin{frame}
\frametitle{系统架构}
\begin{center}\includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{./pic/system_architecture.png}\end{center}
\end{frame}

View File

@ -0,0 +1,85 @@
% 实验结果
\section{实验结果}
\begin{frame}
\frametitle{实验条件介绍}
\begin{itemize}
\item[\faLaptop] 硬件配置
\begin{itemize}
\item 笔记本型号Lenovo Legion R7000P 2021H
\item CPUAMD Ryzen 7 5800H
\item GPUNVIDIA GeForce RTX 3060 Laptop GPU6GB显存
\item 内存16GB DDR4
\end{itemize}
\item[\faUbuntu] 软件环境
\begin{itemize}
\item 操作系统Ubuntu 22.04 LTS通过WSL2运行
\item Python版本3.11.6
\item 深度学习框架PyTorch 2.1.0+cu121
\end{itemize}
\item[\faRobot] 微调模型unsloth官方qwen-2.5-3b-4bit量化模型\href{https://huggingface.co/unsloth/Qwen2.5-3B-Instruct-bnb-4bit}{https://huggingface.co/unsloth/Qwen2.5-3B-Instruct-bnb-4bit})
\item[\faBook] 仓库文档unsloth官方仓库文档\href{https://docs.unsloth.ai/}{https://docs.unsloth.ai/}
\end{itemize}
\end{frame}
\begin{frame}
\frametitle{训练指标变化}
\begin{figure}[h]
\centering
\begin{tikzpicture}
\begin{axis}[
xlabel=Step,
ylabel=Loss,
width=\textwidth,
height=0.8\textheight]
\addplot[mark=none] table[x=Step,y=loss,col sep=comma] {./figures/training_data.csv};
\end{axis}
\end{tikzpicture}
\caption{训练损失变化曲线}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{训练指标变化}
\begin{figure}[h]
\centering
\begin{tikzpicture}
\begin{axis}[
xlabel=Step,
ylabel=Grad Norm,
width=\textwidth,
height=0.8\textheight]
\addplot[mark=none] table[x=Step,y=grad_norm,col sep=comma] {./figures/training_data.csv};
\end{axis}
\end{tikzpicture}
\caption{梯度范数变化曲线}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{训练指标变化}
\begin{figure}[h]
\centering
\begin{tikzpicture}
\begin{axis}[
xlabel=Step,
ylabel=Learning Rate,
width=\textwidth,
height=0.8\textheight]
\addplot[mark=none] table[x=Step,y=learning_rate,col sep=comma] {./figures/training_data.csv};
\end{axis}
\end{tikzpicture}
\caption{学习率变化曲线}
\end{figure}
\end{frame}
\begin{frame}
\frametitle{微调效果验证}
\begin{figure}[htbp]
\centering
\includegraphics[width=0.45\textwidth]{./pic/before_train.png}
\hspace{0.05\textwidth}
\includegraphics[width=0.45\textwidth]{./pic/after_train.png}
\caption{训练前后对比}
\end{figure}
\end{frame}

View File

@ -0,0 +1,24 @@
% 技术介绍
\section{技术介绍}
\begin{frame}
\frametitle{技术介绍}
\begin{itemize}
\item \faDesktop 前端技术栈
\begin{itemize}
\item Gradio - Python 机器学习前端框架
\end{itemize}
\item \faServer 后端技术栈
\begin{itemize}
\item Python - 主要开发语言
\item SQLite - 轻量级关系型数据库
\item TinyDB - 轻量级非关系型数据库
\item LangChain - 大语言模型应用框架
\end{itemize}
\item \faBrain 人工智能技术
\begin{itemize}
\item Qwen2.5 - 开源大语言模型
\item Unsloth - 开源微调工具
\item QLoRA - 低秩适应微调技术
\end{itemize}
\end{itemize}
\end{frame}

File diff suppressed because it is too large Load Diff

46
report/main.tex Normal file
View File

@ -0,0 +1,46 @@
\documentclass[aspectratio=43]{beamer}
\usepackage{ctex}
\usepackage{graphicx}
\usepackage{amsmath}
\usepackage{hyperref}
\usepackage{listings}
\usepackage{xcolor}
\usepackage{fontawesome5}
\usepackage{subcaption}
\usepackage{booktabs}
\usepackage{tikz}
\usepackage{pgfplots}
\pgfplotsset{compat=1.18}
% 设置主题
\usetheme{Madrid}
\usecolortheme{default}
\setbeamertemplate{navigation symbols}{}
% 标题信息
\title{基于文档驱动的自适应编码大模型微调框架}
\subtitle{毕业设计答辩}
\author[XXX]{姓名XXX \\ 学号XXX \\ 指导教师XXX}
\institute{广州大学}
\date{\today}
\begin{document}
% 标题页
\frame{\titlepage}
% 目录
\begin{frame}
\frametitle{目录}
\tableofcontents
\end{frame}
% 引入各章节内容
\input{chapters/background}
\input{chapters/technology}
\input{chapters/design}
\input{chapters/results}
\input{chapters/conclusion}
\input{chapters/acknowledgements}
\end{document}

BIN
report/pic/after_train.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 56 KiB

BIN
report/pic/before_train.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 73 KiB

Binary file not shown.

After

Width:  |  Height:  |  Size: 113 KiB