Compare commits
17 Commits
aa4758af4e
...
67c5ed310a
Author | SHA1 | Date | |
---|---|---|---|
![]() |
67c5ed310a | ||
![]() |
4785985b6f | ||
![]() |
47fd47dab1 | ||
![]() |
eaf3d5f4ca | ||
![]() |
d5652beb01 | ||
![]() |
fa0f95f21d | ||
![]() |
b6cba12aa0 | ||
![]() |
76c494d086 | ||
![]() |
122cdcfd9c | ||
![]() |
0f338db935 | ||
![]() |
ce8e0b9b6a | ||
![]() |
8219b0f68f | ||
![]() |
5fb6faa59a | ||
![]() |
e3843a1853 | ||
![]() |
1955a4d76b | ||
![]() |
3898290e16 | ||
![]() |
228b3b9e64 |
@ -2,13 +2,17 @@
|
|||||||
## 项目背景
|
## 项目背景
|
||||||
本项目是广州大学计算机科学与网络工程学院计算机科学与技术专业某2025届本科生的毕业设计。
|
本项目是广州大学计算机科学与网络工程学院计算机科学与技术专业某2025届本科生的毕业设计。
|
||||||
|
|
||||||
## 论文编译
|
## Latex编译要求
|
||||||
论文LaTeX源码位于`paper/`目录,编译环境要求:
|
编译环境要求:
|
||||||
- 操作系统:Windows 11
|
- 操作系统:Windows 11
|
||||||
- TeX发行版:TeXLive 2023+
|
- TeX发行版:TeXLive 2023+
|
||||||
- 编译引擎:XeLaTeX
|
- 编译引擎:XeLaTeX
|
||||||
- 字体要求:方正楷体简体(需预先安装)
|
- 字体要求:方正楷体简体(需预先安装)
|
||||||
|
|
||||||
|
## 相关latex资源
|
||||||
|
- 论文LaTeX源码位于`paper/`目录,
|
||||||
|
- 答辩PPT相关资源位于`report/`目录
|
||||||
|
|
||||||
## 项目简介
|
## 项目简介
|
||||||
|
|
||||||
### 项目概述
|
### 项目概述
|
||||||
|
@ -16,7 +16,7 @@
|
|||||||
\node[title, above=0.1cm of business.north west, anchor=west] {业务逻辑层};
|
\node[title, above=0.1cm of business.north west, anchor=west] {业务逻辑层};
|
||||||
|
|
||||||
\node[layer, fill=orange!10, below=0.5cm of business] (data) {};
|
\node[layer, fill=orange!10, below=0.5cm of business] (data) {};
|
||||||
\node[title, above=0.1cm of data.north west, anchor=west] {数据访问层模块};
|
\node[title, above=0.1cm of data.north west, anchor=west] {数据访问层};
|
||||||
|
|
||||||
% 表现层模块
|
% 表现层模块
|
||||||
\node[module, fill=blue!20] (ui1) at (-3.5, 1.5) {模型管理};
|
\node[module, fill=blue!20] (ui1) at (-3.5, 1.5) {模型管理};
|
||||||
|
Binary file not shown.
Before Width: | Height: | Size: 151 KiB |
21
report/.gitignore
vendored
Normal file
21
report/.gitignore
vendored
Normal file
@ -0,0 +1,21 @@
|
|||||||
|
# LaTeX临时文件
|
||||||
|
*.aux
|
||||||
|
*.log
|
||||||
|
*.out
|
||||||
|
*.toc
|
||||||
|
*.synctex.gz
|
||||||
|
*.bbl
|
||||||
|
*.blg
|
||||||
|
*.dvi
|
||||||
|
*.fdb_latexmk
|
||||||
|
*.fls
|
||||||
|
*.lof
|
||||||
|
*.lot
|
||||||
|
*.idx
|
||||||
|
*.ilg
|
||||||
|
*.ind
|
||||||
|
*.nav
|
||||||
|
*.snm
|
||||||
|
*.vrb
|
||||||
|
*.xdv
|
||||||
|
*.pdf
|
7
report/chapters/acknowledgements.tex
Normal file
7
report/chapters/acknowledgements.tex
Normal file
@ -0,0 +1,7 @@
|
|||||||
|
\begin{frame}
|
||||||
|
\frametitle{致谢}
|
||||||
|
\begin{center}
|
||||||
|
\LARGE{\textbf{感谢聆听!}}\\[0.5cm]
|
||||||
|
\large{欢迎各位老师提出宝贵意见}
|
||||||
|
\end{center}
|
||||||
|
\end{frame}
|
10
report/chapters/background.tex
Normal file
10
report/chapters/background.tex
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
% 研究背景
|
||||||
|
\section{研究背景}
|
||||||
|
\begin{frame}
|
||||||
|
\frametitle{研究背景}
|
||||||
|
\begin{itemize}
|
||||||
|
\item[\faCheckCircle] 研究领域现状:大语言模型在公开数据集与开源项目中表现优异,但在处理企业私有库时存在局限性
|
||||||
|
\item[\faExclamationTriangle] 存在的问题:缺乏对私有库专有函数、类及其交互细节的深度理解,无法精准引用库中的类、方法或属性
|
||||||
|
\item[\faFlask] 研究意义:通过深度解析私有库文档,对大语言模型进行微调与优化,提升生成代码的准确性和实用性
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
26
report/chapters/conclusion.tex
Normal file
26
report/chapters/conclusion.tex
Normal file
@ -0,0 +1,26 @@
|
|||||||
|
% 总结与展望
|
||||||
|
\section{总结与展望}
|
||||||
|
\begin{frame}
|
||||||
|
\frametitle{总结与展望}
|
||||||
|
\begin{itemize}
|
||||||
|
\item[\faBook] 主要工作总结
|
||||||
|
\begin{itemize}
|
||||||
|
\item 提出了文档驱动的自适应编码大模型微调框架
|
||||||
|
\item 实现了三层架构设计(表现层、业务逻辑层和数据访问层)
|
||||||
|
\item 实现了大语言模型的微调
|
||||||
|
\end{itemize}
|
||||||
|
\item[\faLightbulb] 创新点总结
|
||||||
|
\begin{itemize}
|
||||||
|
\item 开发了基于栈结构的Markdown文档解析器
|
||||||
|
\item 采用了QLoRA参数高效微调方法
|
||||||
|
\item 构建了基于提示工程的数据集生成工具
|
||||||
|
\end{itemize}
|
||||||
|
\item[\faRoad] 未来工作展望
|
||||||
|
\begin{itemize}
|
||||||
|
\item 边缘计算部署
|
||||||
|
\item 更多的格式输入支持
|
||||||
|
\item CICD集成
|
||||||
|
\end{itemize}
|
||||||
|
\item[\faGithub] 软件仓库地址:\href{https://gitea.carry.fit/carry/gzhu-biyesheji}{https://gitea.carry.fit/carry/gzhu-biyesheji}
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
6
report/chapters/design.tex
Normal file
6
report/chapters/design.tex
Normal file
@ -0,0 +1,6 @@
|
|||||||
|
% 系统设计
|
||||||
|
\section{系统设计}
|
||||||
|
\begin{frame}
|
||||||
|
\frametitle{系统架构}
|
||||||
|
\begin{center}\includegraphics[width=\textwidth,height=0.7\textheight,keepaspectratio]{./pic/system_architecture.png}\end{center}
|
||||||
|
\end{frame}
|
85
report/chapters/results.tex
Normal file
85
report/chapters/results.tex
Normal file
@ -0,0 +1,85 @@
|
|||||||
|
% 实验结果
|
||||||
|
\section{实验结果}
|
||||||
|
|
||||||
|
\begin{frame}
|
||||||
|
\frametitle{实验条件介绍}
|
||||||
|
\begin{itemize}
|
||||||
|
\item[\faLaptop] 硬件配置
|
||||||
|
\begin{itemize}
|
||||||
|
\item 笔记本型号:Lenovo Legion R7000P 2021H
|
||||||
|
\item CPU:AMD Ryzen 7 5800H
|
||||||
|
\item GPU:NVIDIA GeForce RTX 3060 Laptop GPU(6GB显存)
|
||||||
|
\item 内存:16GB DDR4
|
||||||
|
\end{itemize}
|
||||||
|
\item[\faUbuntu] 软件环境
|
||||||
|
\begin{itemize}
|
||||||
|
\item 操作系统:Ubuntu 22.04 LTS(通过WSL2运行)
|
||||||
|
\item Python版本:3.11.6
|
||||||
|
\item 深度学习框架:PyTorch 2.1.0+cu121
|
||||||
|
\end{itemize}
|
||||||
|
\item[\faRobot] 微调模型:unsloth官方qwen-2.5-3b-4bit量化模型(\href{https://huggingface.co/unsloth/Qwen2.5-3B-Instruct-bnb-4bit}{https://huggingface.co/unsloth/Qwen2.5-3B-Instruct-bnb-4bit})
|
||||||
|
\item[\faBook] 仓库文档:unsloth官方仓库文档(\href{https://docs.unsloth.ai/}{https://docs.unsloth.ai/})
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\begin{frame}
|
||||||
|
\frametitle{训练指标变化}
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[
|
||||||
|
xlabel=Step,
|
||||||
|
ylabel=Loss,
|
||||||
|
width=\textwidth,
|
||||||
|
height=0.8\textheight]
|
||||||
|
\addplot[mark=none] table[x=Step,y=loss,col sep=comma] {./figures/training_data.csv};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\caption{训练损失变化曲线}
|
||||||
|
\end{figure}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\begin{frame}
|
||||||
|
\frametitle{训练指标变化}
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[
|
||||||
|
xlabel=Step,
|
||||||
|
ylabel=Grad Norm,
|
||||||
|
width=\textwidth,
|
||||||
|
height=0.8\textheight]
|
||||||
|
\addplot[mark=none] table[x=Step,y=grad_norm,col sep=comma] {./figures/training_data.csv};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\caption{梯度范数变化曲线}
|
||||||
|
\end{figure}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\begin{frame}
|
||||||
|
\frametitle{训练指标变化}
|
||||||
|
\begin{figure}[h]
|
||||||
|
\centering
|
||||||
|
\begin{tikzpicture}
|
||||||
|
\begin{axis}[
|
||||||
|
xlabel=Step,
|
||||||
|
ylabel=Learning Rate,
|
||||||
|
width=\textwidth,
|
||||||
|
height=0.8\textheight]
|
||||||
|
\addplot[mark=none] table[x=Step,y=learning_rate,col sep=comma] {./figures/training_data.csv};
|
||||||
|
\end{axis}
|
||||||
|
\end{tikzpicture}
|
||||||
|
\caption{学习率变化曲线}
|
||||||
|
\end{figure}
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
\begin{frame}
|
||||||
|
\frametitle{微调效果验证}
|
||||||
|
\begin{figure}[htbp]
|
||||||
|
\centering
|
||||||
|
\includegraphics[width=0.45\textwidth]{./pic/before_train.png}
|
||||||
|
\hspace{0.05\textwidth}
|
||||||
|
\includegraphics[width=0.45\textwidth]{./pic/after_train.png}
|
||||||
|
\caption{训练前后对比}
|
||||||
|
\end{figure}
|
||||||
|
\end{frame}
|
24
report/chapters/technology.tex
Normal file
24
report/chapters/technology.tex
Normal file
@ -0,0 +1,24 @@
|
|||||||
|
% 技术介绍
|
||||||
|
\section{技术介绍}
|
||||||
|
\begin{frame}
|
||||||
|
\frametitle{技术介绍}
|
||||||
|
\begin{itemize}
|
||||||
|
\item[\faDesktop] 前端技术栈
|
||||||
|
\begin{itemize}
|
||||||
|
\item Gradio - Python 机器学习前端框架
|
||||||
|
\end{itemize}
|
||||||
|
\item[\faServer] 后端技术栈
|
||||||
|
\begin{itemize}
|
||||||
|
\item Python - 主要开发语言
|
||||||
|
\item SQLite - 轻量级关系型数据库
|
||||||
|
\item TinyDB - 轻量级非关系型数据库
|
||||||
|
\item LangChain - 大语言模型应用框架
|
||||||
|
\end{itemize}
|
||||||
|
\item[\faBrain] 人工智能技术
|
||||||
|
\begin{itemize}
|
||||||
|
\item Qwen2.5 - 开源大语言模型
|
||||||
|
\item Unsloth - 开源微调工具
|
||||||
|
\item QLoRA - 低秩适应微调技术
|
||||||
|
\end{itemize}
|
||||||
|
\end{itemize}
|
||||||
|
\end{frame}
|
1001
report/figures/training_data.csv
Normal file
1001
report/figures/training_data.csv
Normal file
File diff suppressed because it is too large
Load Diff
46
report/main.tex
Normal file
46
report/main.tex
Normal file
@ -0,0 +1,46 @@
|
|||||||
|
\documentclass[aspectratio=43]{beamer}
|
||||||
|
\usepackage{ctex}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
\usepackage{amsmath}
|
||||||
|
\usepackage{hyperref}
|
||||||
|
\usepackage{listings}
|
||||||
|
\usepackage{xcolor}
|
||||||
|
\usepackage{fontawesome5}
|
||||||
|
\usepackage{subcaption}
|
||||||
|
\usepackage{booktabs}
|
||||||
|
\usepackage{tikz}
|
||||||
|
\usepackage{pgfplots}
|
||||||
|
\pgfplotsset{compat=1.18}
|
||||||
|
|
||||||
|
% 设置主题
|
||||||
|
\usetheme{Madrid}
|
||||||
|
\usecolortheme{default}
|
||||||
|
\setbeamertemplate{navigation symbols}{}
|
||||||
|
|
||||||
|
% 标题信息
|
||||||
|
\title{基于文档驱动的自适应编码大模型微调框架}
|
||||||
|
\subtitle{毕业设计答辩}
|
||||||
|
\author[XXX]{姓名:XXX \\ 学号:XXX \\ 指导教师:XXX}
|
||||||
|
\institute{广州大学}
|
||||||
|
\date{\today}
|
||||||
|
|
||||||
|
\begin{document}
|
||||||
|
|
||||||
|
% 标题页
|
||||||
|
\frame{\titlepage}
|
||||||
|
|
||||||
|
% 目录
|
||||||
|
\begin{frame}
|
||||||
|
\frametitle{目录}
|
||||||
|
\tableofcontents
|
||||||
|
\end{frame}
|
||||||
|
|
||||||
|
% 引入各章节内容
|
||||||
|
\input{chapters/background}
|
||||||
|
\input{chapters/technology}
|
||||||
|
\input{chapters/design}
|
||||||
|
\input{chapters/results}
|
||||||
|
\input{chapters/conclusion}
|
||||||
|
\input{chapters/acknowledgements}
|
||||||
|
|
||||||
|
\end{document}
|
BIN
report/pic/after_train.png
Normal file
BIN
report/pic/after_train.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 56 KiB |
BIN
report/pic/before_train.png
Normal file
BIN
report/pic/before_train.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 73 KiB |
BIN
report/pic/system_architecture.png
Normal file
BIN
report/pic/system_architecture.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 113 KiB |
607
毕业论文.pdf
607
毕业论文.pdf
@ -202,12 +202,10 @@ i
|
|||||||
endstream
|
endstream
|
||||||
endobj
|
endobj
|
||||||
13 0 obj
|
13 0 obj
|
||||||
<</Filter/FlateDecode/Length 682>>
|
<</Filter/FlateDecode/Length 681>>
|
||||||
stream
|
stream
|
||||||
xÚ<EFBFBD>UÉŽÔ0½óþ<>öÔêEõ±n#ú†8$vš#4\ø}ÊYz’ÌPdÅzq•_½Zâž:°]$Cö)»òhÀG[ßÿù6s³¡Ñ…$<24><>!;”à#Dwš<77>æïîÓ#¸w?ÝÃÍòíÅÝ}@GÁ]®ŽÜå—ÏÈÑý6.Ôdð¨ö±~½Ç¢é|â¨÷4\ë¼CÀi,aÚ ó»žUSoH·|+Õ<>rþvùl$ìÎ÷—!<21>ÄØajÄbðÀÙ<C380>0“<30>’G&B,2”:a
|
xÚ<EFBFBD>UÉŽÔ0½óþ<>öÔêEõ±n#ú†8$vš#4\ø}ÊYz’ÌP+ŠõÚU~﹪âž:°ºH.†ìSvåÑ€<C391>ö|ÿçÛÂ-†Æ’<B†ìP‚<50>ÝiBZ¾»O<C2BB>àÞýt·È·w÷w¹:r—_<#cD÷Û¸xPK<50>Á£ÚŸõë=MçG½§áZçN+` ÓJ`˜WØõ¬šzCºå¿R
)ço—ÏFÂÎ|y!i$ÆS#ƒÎî„™|”<2b‘¡Ð 3P³Y¢'çÓì%£¡kÒ—|£Pä p›<06>"Ø÷‡œº©TúŽ<13>N(¥c•BG¸j%ójAÔAŒPÆ!¬CYÈ<¬®ÈKLd®b |