diff --git a/frontend/train_page.py b/frontend/train_page.py index c2e1d7c..188f901 100644 --- a/frontend/train_page.py +++ b/frontend/train_page.py @@ -8,7 +8,7 @@ from transformers import TrainerCallback sys.path.append(str(Path(__file__).resolve().parent.parent)) from global_var import get_model, get_tokenizer, get_datasets, get_workdir -from tools import train_model +from tools import train_model, find_available_port def train_page(): with gr.Blocks() as demo: @@ -56,11 +56,13 @@ def train_page(): next_dir_number = max([int(d) for d in existing_dirs], default=0) + 1 new_training_dir = os.path.join(training_dir, str(next_dir_number)) - # 启动 TensorBoard 子进程 + tensorboard_port = find_available_port(6006) # 从默认端口 6006 开始检测 + print(f"TensorBoard 将使用端口: {tensorboard_port}") + tensorboard_logdir = os.path.join(new_training_dir, "logs") os.makedirs(tensorboard_logdir, exist_ok=True) # 确保日志目录存在 tensorboard_process = subprocess.Popen( - ["tensorboard", "--logdir", tensorboard_logdir, "--port", "6006"], + ["tensorboard", "--logdir", tensorboard_logdir, "--port", str(tensorboard_port)], stdout=subprocess.PIPE, stderr=subprocess.PIPE ) diff --git a/tools/__init__.py b/tools/__init__.py index 9fbd1c4..cf8041c 100644 --- a/tools/__init__.py +++ b/tools/__init__.py @@ -1,4 +1,5 @@ from .parse_markdown import parse_markdown from .scan_doc_dir import * from .json_example import generate_example_json -from .model import * \ No newline at end of file +from .model import * +from .socket import * \ No newline at end of file diff --git a/tools/socket.py b/tools/socket.py new file mode 100644 index 0000000..91199a9 --- /dev/null +++ b/tools/socket.py @@ -0,0 +1,10 @@ +import socket + +# 启动 TensorBoard 子进程前添加端口检测逻辑 +def find_available_port(start_port): + port = start_port + while True: + with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s: + if s.connect_ex(('localhost', port)) != 0: # 端口未被占用 + return port + port += 1 # 如果端口被占用,尝试下一个端口 \ No newline at end of file