feat(frontend): 添加文档切片和并发数功能
- 新增并发数输入框 - 实现文档切片处理 - 更新生成数据集的逻辑,支持并发处理
This commit is contained in:
parent
868fcd45ba
commit
9236f49b36
@ -6,7 +6,7 @@ from sqlmodel import Session, select
|
||||
|
||||
sys.path.append(str(Path(__file__).resolve().parent.parent))
|
||||
from schema import APIProvider
|
||||
from tools import call_openai_api
|
||||
from tools import call_openai_api,process_markdown_file
|
||||
from global_var import get_docs, get_prompt_store, get_sql_engine
|
||||
|
||||
def dataset_generate_page():
|
||||
@ -67,6 +67,15 @@ def dataset_generate_page():
|
||||
step=1,
|
||||
interactive=True
|
||||
)
|
||||
concurrency_input = gr.Number(
|
||||
value=1,
|
||||
label="并发数",
|
||||
minimum=1,
|
||||
maximum=10,
|
||||
step=1,
|
||||
interactive=True,
|
||||
visible=False
|
||||
)
|
||||
generate_button = gr.Button("生成数据集",variant="primary")
|
||||
|
||||
output_text = gr.Textbox(label="生成结果", interactive=False)
|
||||
@ -100,8 +109,9 @@ def dataset_generate_page():
|
||||
dataframe_value = [[var, ""] for var in input_variables]
|
||||
return selected_prompt, dataframe_value
|
||||
|
||||
def on_generate_click(doc_state, prompt_state, api_state, variables_dataframe, rounds, progress=gr.Progress()):
|
||||
doc = [i for i in get_docs() if i.name == doc_state][0].markdown_files
|
||||
def on_generate_click(doc_state, prompt_state, api_state, variables_dataframe, rounds, concurrency, progress=gr.Progress()):
|
||||
docs = [i for i in get_docs() if i.name == doc_state][0].markdown_files
|
||||
document_slice_list = [process_markdown_file(doc) for doc in docs]
|
||||
prompt = [i for i in get_prompt_store().all() if i["id"] == int(prompt_state.split(" ")[0])][0]
|
||||
prompt = PromptTemplate.from_template(prompt["content"])
|
||||
with Session(get_sql_engine()) as session:
|
||||
@ -117,18 +127,9 @@ def dataset_generate_page():
|
||||
# 注入除document_slice以外的所有参数
|
||||
prompt = prompt.partial(**variables_dict)
|
||||
|
||||
print(doc)
|
||||
print(prompt.format(document_slice="test"))
|
||||
print(variables_dict)
|
||||
|
||||
import time
|
||||
total_steps = rounds
|
||||
for i in range(total_steps):
|
||||
# 模拟每个步骤的工作负载
|
||||
time.sleep(0.5)
|
||||
|
||||
current_progress = (i + 1) / total_steps
|
||||
progress(current_progress, desc=f"处理步骤 {i + 1}/{total_steps}")
|
||||
for document_slice in document_slice_list:
|
||||
print("~"*20)
|
||||
print(prompt.format(document_slice=document_slice))
|
||||
|
||||
return "all done"
|
||||
|
||||
@ -138,7 +139,7 @@ def dataset_generate_page():
|
||||
|
||||
generate_button.click(
|
||||
on_generate_click,
|
||||
inputs=[doc_choice, prompt_choice, api_choice, variables_dataframe, rounds_input],
|
||||
inputs=[doc_choice, prompt_choice, api_choice, variables_dataframe, rounds_input, concurrency_input],
|
||||
outputs=output_text
|
||||
)
|
||||
|
||||
|
Loading…
x
Reference in New Issue
Block a user