Merge branch 'paper'

2025-05-26 01:41:59 +08:00
parent aa4758af4e fa0f95f21d
commit d5652beb01
14 changed files with 1227 additions and 1 deletions
--- a/paper/latex/figures/system_architecture.tex
+++ b/paper/latex/figures/system_architecture.tex
@@ -16,7 +16,7 @@
    \node[title, above=0.1cm of business.north west, anchor=west] {业务逻辑层};
    
    \node[layer, fill=orange!10, below=0.5cm of business] (data) {};
-    \node[title, above=0.1cm of data.north west, anchor=west] {数据访问层模块};
+    \node[title, above=0.1cm of data.north west, anchor=west] {数据访问层};
    
    % 表现层模块
    \node[module, fill=blue!20] (ui1) at (-3.5, 1.5) {模型管理};
--- a/paper/latex/pic/training_metrics.png
+++ b/paper/latex/pic/training_metrics.png
--- a/report/.gitignore
+++ b/report/.gitignore
@@ -0,0 +1,21 @@
+# LaTeX临时文件
+*.aux
+*.log
+*.out
+*.toc
+*.synctex.gz
+*.bbl
+*.blg
+*.dvi
+*.fdb_latexmk
+*.fls
+*.lof
+*.lot
+*.idx
+*.ilg
+*.ind
+*.nav
+*.snm
+*.vrb
+*.xdv
+*.pdf
--- a/report/chapters/acknowledgements.tex
+++ b/report/chapters/acknowledgements.tex
@@ -0,0 +1,7 @@
+\begin{frame}
+\frametitle{致谢}
+\begin{center}
+\LARGE{\textbf{感谢聆听！}}\\[0.5cm]
+\large{欢迎各位老师提出宝贵意见}
+\end{center}
+\end{frame}
--- a/report/chapters/background.tex
+++ b/report/chapters/background.tex
@@ -0,0 +1,10 @@
+% 研究背景
+\section{研究背景}
+\begin{frame}
+\frametitle{研究背景}
+\begin{itemize}
+\item[\faCheckCircle] 研究领域现状：大语言模型在公开数据集与开源项目中表现优异，但在处理企业私有库时存在局限性
+\item[\faExclamationTriangle] 存在的问题：缺乏对私有库专有函数、类及其交互细节的深度理解，无法精准引用库中的类、方法或属性
+\item[\faFlask] 研究意义：通过深度解析私有库文档，对大语言模型进行微调与优化，提升生成代码的准确性和实用性
+\end{itemize}
+\end{frame}
--- a/report/chapters/conclusion.tex
+++ b/report/chapters/conclusion.tex
@@ -0,0 +1,26 @@
+% 总结与展望
+\section{总结与展望}
+\begin{frame}
+\frametitle{总结与展望}
+\begin{itemize}
+\item[\faBook] 主要工作总结
+  \begin{itemize}
+    \item 提出了文档驱动的自适应编码大模型微调框架
+    \item 实现了三层架构设计（表现层、业务逻辑层和数据访问层）
+    \item 实现了大语言模型的微调
+  \end{itemize}
+\item[\faLightbulb] 创新点总结
+  \begin{itemize}
+    \item 开发了基于栈结构的Markdown文档解析器
+    \item 采用了QLoRA参数高效微调方法
+    \item 构建了基于提示工程的数据集生成工具
+  \end{itemize}
+\item[\faRoad] 未来工作展望
+  \begin{itemize}
+    \item 边缘计算部署
+    \item 更多的格式输入支持
+    \item CICD集成
+  \end{itemize}
+\item[\faGithub] 软件仓库地址：\href{https://gitea.carry.fit/carry/gzhu-biyesheji}{https://gitea.carry.fit/carry/gzhu-biyesheji}
+\end{itemize}
+\end{frame}
--- a/report/chapters/design.tex
+++ b/report/chapters/design.tex
@@ -0,0 +1,6 @@
+% 系统设计
+\section{系统设计}
+\begin{frame}
+\frametitle{系统架构}
+\begin{center}\includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{./pic/system_architecture.png}\end{center}
+\end{frame}
--- a/report/chapters/results.tex
+++ b/report/chapters/results.tex
@@ -0,0 +1,85 @@
+% 实验结果
+\section{实验结果}
+
+\begin{frame}
+\frametitle{实验条件介绍}
+\begin{itemize}
+\item[\faLaptop] 硬件配置
+    \begin{itemize}
+        \item 笔记本型号：Lenovo Legion R7000P 2021H
+        \item CPU：AMD Ryzen 7 5800H
+        \item GPU：NVIDIA GeForce RTX 3060 Laptop GPU（6GB显存）
+        \item 内存：16GB DDR4
+    \end{itemize}
+\item[\faUbuntu] 软件环境
+    \begin{itemize}
+        \item 操作系统：Ubuntu 22.04 LTS（通过WSL2运行）
+        \item Python版本：3.11.6
+        \item 深度学习框架：PyTorch 2.1.0+cu121
+    \end{itemize}
+\item[\faRobot] 微调模型：unsloth官方qwen-2.5-3b-4bit量化模型（\href{https://huggingface.co/unsloth/Qwen2.5-3B-Instruct-bnb-4bit}{https://huggingface.co/unsloth/Qwen2.5-3B-Instruct-bnb-4bit})
+\item[\faBook] 仓库文档：unsloth官方仓库文档（\href{https://docs.unsloth.ai/}{https://docs.unsloth.ai/}）
+\end{itemize}
+\end{frame}
+
+\begin{frame}
+\frametitle{训练指标变化}
+\begin{figure}[h]
+\centering
+\begin{tikzpicture}
+\begin{axis}[
+    xlabel=Step,
+    ylabel=Loss,
+    width=\textwidth,
+    height=0.8\textheight]
+\addplot[mark=none] table[x=Step,y=loss,col sep=comma] {./figures/training_data.csv};
+\end{axis}
+\end{tikzpicture}
+\caption{训练损失变化曲线}
+\end{figure}
+\end{frame}
+
+\begin{frame}
+\frametitle{训练指标变化}
+\begin{figure}[h]
+\centering
+\begin{tikzpicture}
+\begin{axis}[
+    xlabel=Step,
+    ylabel=Grad Norm,
+    width=\textwidth,
+    height=0.8\textheight]
+\addplot[mark=none] table[x=Step,y=grad_norm,col sep=comma] {./figures/training_data.csv};
+\end{axis}
+\end{tikzpicture}
+\caption{梯度范数变化曲线}
+\end{figure}
+\end{frame}
+
+\begin{frame}
+\frametitle{训练指标变化}
+\begin{figure}[h]
+\centering
+\begin{tikzpicture}
+\begin{axis}[
+    xlabel=Step,
+    ylabel=Learning Rate,
+    width=\textwidth,
+    height=0.8\textheight]
+\addplot[mark=none] table[x=Step,y=learning_rate,col sep=comma] {./figures/training_data.csv};
+\end{axis}
+\end{tikzpicture}
+\caption{学习率变化曲线}
+\end{figure}
+\end{frame}
+
+\begin{frame}
+\frametitle{微调效果验证}
+\begin{figure}[htbp]
+\centering
+\includegraphics[width=0.45\textwidth]{./pic/before_train.png}
+\hspace{0.05\textwidth}
+\includegraphics[width=0.45\textwidth]{./pic/after_train.png}
+\caption{训练前后对比}
+\end{figure}
+\end{frame}
--- a/report/chapters/technology.tex
+++ b/report/chapters/technology.tex
@@ -0,0 +1,24 @@
+% 技术介绍
+\section{技术介绍}
+\begin{frame}
+\frametitle{技术介绍}
+\begin{itemize}
+\item \faDesktop 前端技术栈
+  \begin{itemize}
+  \item Gradio - Python 机器学习前端框架
+  \end{itemize}
+\item \faServer 后端技术栈
+  \begin{itemize}
+  \item Python - 主要开发语言
+  \item SQLite - 轻量级关系型数据库
+  \item TinyDB - 轻量级非关系型数据库
+  \item LangChain - 大语言模型应用框架
+  \end{itemize}
+\item \faBrain 人工智能技术
+  \begin{itemize}
+  \item Qwen2.5 - 开源大语言模型
+  \item Unsloth - 开源微调工具
+  \item QLoRA - 低秩适应微调技术
+  \end{itemize}
+\end{itemize}
+\end{frame}
--- a/report/figures/training_data.csv
+++ b/report/figures/training_data.csv
--- a/report/main.tex
+++ b/report/main.tex
@@ -0,0 +1,46 @@
+\documentclass[aspectratio=43]{beamer}
+\usepackage{ctex}
+\usepackage{graphicx}
+\usepackage{amsmath}
+\usepackage{hyperref}
+\usepackage{listings}
+\usepackage{xcolor}
+\usepackage{fontawesome5}
+\usepackage{subcaption}
+\usepackage{booktabs}
+\usepackage{tikz}
+\usepackage{pgfplots}
+\pgfplotsset{compat=1.18}
+
+% 设置主题
+\usetheme{Madrid}
+\usecolortheme{default}
+\setbeamertemplate{navigation symbols}{}
+
+% 标题信息
+\title{基于文档驱动的自适应编码大模型微调框架}
+\subtitle{毕业设计答辩}
+\author[XXX]{姓名：XXX \\ 学号：XXX \\ 指导教师：XXX}
+\institute{广州大学}
+\date{\today}
+
+\begin{document}
+
+% 标题页
+\frame{\titlepage}
+
+% 目录
+\begin{frame}
+\frametitle{目录}
+\tableofcontents
+\end{frame}
+
+% 引入各章节内容
+\input{chapters/background}
+\input{chapters/technology}
+\input{chapters/design}
+\input{chapters/results}
+\input{chapters/conclusion}
+\input{chapters/acknowledgements}
+
+\end{document}
--- a/report/pic/after_train.png
+++ b/report/pic/after_train.png
--- a/report/pic/before_train.png
+++ b/report/pic/before_train.png
--- a/report/pic/system_architecture.png
+++ b/report/pic/system_architecture.png