feat(frontend): 优化 TensorBoard 端口占用问题

- 新增端口检测逻辑,动态分配可用端口
- 修改 TensorBoard 启动过程,使用动态分配的端口
- 添加 socket 模块,用于端口检测
This commit is contained in:
carry
2025-04-14 17:06:44 +08:00
parent 9298438f98
commit 664944f0c5
3 changed files with 17 additions and 4 deletions

View File

@@ -8,7 +8,7 @@ from transformers import TrainerCallback
sys.path.append(str(Path(__file__).resolve().parent.parent))
from global_var import get_model, get_tokenizer, get_datasets, get_workdir
from tools import train_model
from tools import train_model, find_available_port
def train_page():
with gr.Blocks() as demo:
@@ -56,11 +56,13 @@ def train_page():
next_dir_number = max([int(d) for d in existing_dirs], default=0) + 1
new_training_dir = os.path.join(training_dir, str(next_dir_number))
# 启动 TensorBoard 子进程
tensorboard_port = find_available_port(6006) # 从默认端口 6006 开始检测
print(f"TensorBoard 将使用端口: {tensorboard_port}")
tensorboard_logdir = os.path.join(new_training_dir, "logs")
os.makedirs(tensorboard_logdir, exist_ok=True) # 确保日志目录存在
tensorboard_process = subprocess.Popen(
["tensorboard", "--logdir", tensorboard_logdir, "--port", "6006"],
["tensorboard", "--logdir", tensorboard_logdir, "--port", str(tensorboard_port)],
stdout=subprocess.PIPE,
stderr=subprocess.PIPE
)