feat(frontend): 优化 TensorBoard 端口占用问题
- 新增端口检测逻辑,动态分配可用端口 - 修改 TensorBoard 启动过程,使用动态分配的端口 - 添加 socket 模块,用于端口检测
This commit is contained in:
parent
9298438f98
commit
664944f0c5
@ -8,7 +8,7 @@ from transformers import TrainerCallback
|
|||||||
|
|
||||||
sys.path.append(str(Path(__file__).resolve().parent.parent))
|
sys.path.append(str(Path(__file__).resolve().parent.parent))
|
||||||
from global_var import get_model, get_tokenizer, get_datasets, get_workdir
|
from global_var import get_model, get_tokenizer, get_datasets, get_workdir
|
||||||
from tools import train_model
|
from tools import train_model, find_available_port
|
||||||
|
|
||||||
def train_page():
|
def train_page():
|
||||||
with gr.Blocks() as demo:
|
with gr.Blocks() as demo:
|
||||||
@ -56,11 +56,13 @@ def train_page():
|
|||||||
next_dir_number = max([int(d) for d in existing_dirs], default=0) + 1
|
next_dir_number = max([int(d) for d in existing_dirs], default=0) + 1
|
||||||
new_training_dir = os.path.join(training_dir, str(next_dir_number))
|
new_training_dir = os.path.join(training_dir, str(next_dir_number))
|
||||||
|
|
||||||
# 启动 TensorBoard 子进程
|
tensorboard_port = find_available_port(6006) # 从默认端口 6006 开始检测
|
||||||
|
print(f"TensorBoard 将使用端口: {tensorboard_port}")
|
||||||
|
|
||||||
tensorboard_logdir = os.path.join(new_training_dir, "logs")
|
tensorboard_logdir = os.path.join(new_training_dir, "logs")
|
||||||
os.makedirs(tensorboard_logdir, exist_ok=True) # 确保日志目录存在
|
os.makedirs(tensorboard_logdir, exist_ok=True) # 确保日志目录存在
|
||||||
tensorboard_process = subprocess.Popen(
|
tensorboard_process = subprocess.Popen(
|
||||||
["tensorboard", "--logdir", tensorboard_logdir, "--port", "6006"],
|
["tensorboard", "--logdir", tensorboard_logdir, "--port", str(tensorboard_port)],
|
||||||
stdout=subprocess.PIPE,
|
stdout=subprocess.PIPE,
|
||||||
stderr=subprocess.PIPE
|
stderr=subprocess.PIPE
|
||||||
)
|
)
|
||||||
|
@ -1,4 +1,5 @@
|
|||||||
from .parse_markdown import parse_markdown
|
from .parse_markdown import parse_markdown
|
||||||
from .scan_doc_dir import *
|
from .scan_doc_dir import *
|
||||||
from .json_example import generate_example_json
|
from .json_example import generate_example_json
|
||||||
from .model import *
|
from .model import *
|
||||||
|
from .socket import *
|
10
tools/socket.py
Normal file
10
tools/socket.py
Normal file
@ -0,0 +1,10 @@
|
|||||||
|
import socket
|
||||||
|
|
||||||
|
# 启动 TensorBoard 子进程前添加端口检测逻辑
|
||||||
|
def find_available_port(start_port):
|
||||||
|
port = start_port
|
||||||
|
while True:
|
||||||
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||||
|
if s.connect_ex(('localhost', port)) != 0: # 端口未被占用
|
||||||
|
return port
|
||||||
|
port += 1 # 如果端口被占用,尝试下一个端口
|
Loading…
x
Reference in New Issue
Block a user