feat(frontend): 优化 TensorBoard 端口占用问题

- 新增端口检测逻辑,动态分配可用端口
- 修改 TensorBoard 启动过程,使用动态分配的端口
- 添加 socket 模块,用于端口检测
This commit is contained in:
carry 2025-04-14 17:06:44 +08:00
parent 9298438f98
commit 664944f0c5
3 changed files with 17 additions and 4 deletions

View File

@ -8,7 +8,7 @@ from transformers import TrainerCallback
sys.path.append(str(Path(__file__).resolve().parent.parent))
from global_var import get_model, get_tokenizer, get_datasets, get_workdir
from tools import train_model
from tools import train_model, find_available_port
def train_page():
with gr.Blocks() as demo:
@ -56,11 +56,13 @@ def train_page():
next_dir_number = max([int(d) for d in existing_dirs], default=0) + 1
new_training_dir = os.path.join(training_dir, str(next_dir_number))
# 启动 TensorBoard 子进程
tensorboard_port = find_available_port(6006) # 从默认端口 6006 开始检测
print(f"TensorBoard 将使用端口: {tensorboard_port}")
tensorboard_logdir = os.path.join(new_training_dir, "logs")
os.makedirs(tensorboard_logdir, exist_ok=True) # 确保日志目录存在
tensorboard_process = subprocess.Popen(
["tensorboard", "--logdir", tensorboard_logdir, "--port", "6006"],
["tensorboard", "--logdir", tensorboard_logdir, "--port", str(tensorboard_port)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)

View File

@ -2,3 +2,4 @@ from .parse_markdown import parse_markdown
from .scan_doc_dir import *
from .json_example import generate_example_json
from .model import *
from .socket import *

10
tools/socket.py Normal file
View File

@ -0,0 +1,10 @@
import socket
# 启动 TensorBoard 子进程前添加端口检测逻辑
def find_available_port(start_port):
port = start_port
while True:
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
if s.connect_ex(('localhost', port)) != 0: # 端口未被占用
return port
port += 1 # 如果端口被占用,尝试下一个端口