import gradio as gr import sys from pathlib import Path from langchain.prompts import PromptTemplate sys.path.append(str(Path(__file__).resolve().parent.parent)) from global_var import get_docs, get_prompt_store def dataset_generate_page(): with gr.Blocks() as demo: gr.Markdown("## 数据集生成") with gr.Row(): with gr.Column(): docs_list = [str(doc.name) for doc in get_docs()] initial_doc = docs_list[0] if docs_list else None doc_dropdown = gr.Dropdown( choices=docs_list, value=initial_doc, label="选择文档", interactive=True ) doc_state = gr.State(value=initial_doc) with gr.Column(): prompts = get_prompt_store().all() prompt_list = [f"{p['id']} {p['name']}" for p in prompts] initial_prompt = prompt_list[0] if prompt_list else None # 初始化Dataframe的值 initial_dataframe_value = [] if initial_prompt: selected_prompt_id = int(initial_prompt.split(" ")[0]) prompt_data = get_prompt_store().get(doc_id=selected_prompt_id) prompt_content = prompt_data["content"] prompt_template = PromptTemplate.from_template(prompt_content) input_variables = prompt_template.input_variables initial_dataframe_value = [[var, ""] for var in input_variables] prompt_dropdown = gr.Dropdown( choices=prompt_list, value=initial_prompt, label="选择模板", interactive=True ) prompt_state = gr.State(value=initial_prompt) generate_button = gr.Button("生成数据集") variables_dataframe = gr.Dataframe( headers=["变量名", "变量值"], datatype=["str", "str"], interactive=True, label="变量列表", value=initial_dataframe_value # 设置初始化数据 ) output_text = gr.Textbox(label="生成结果", interactive=False) def on_doc_change(selected_doc): # print(f"文档选择已更改为: {selected_doc}") return selected_doc def on_prompt_change(selected_prompt): if not selected_prompt: return None, [] selected_prompt_id = int(selected_prompt.split(" ")[0]) prompt_data = get_prompt_store().get(doc_id=selected_prompt_id) prompt_content = prompt_data["content"] prompt_template = PromptTemplate.from_template(prompt_content) input_variables = prompt_template.input_variables dataframe_value = [[var, ""] for var in input_variables] return selected_prompt, dataframe_value def on_generate_click(doc_state, prompt_state, variables_dataframe): variables_dict = {} # 正确遍历DataFrame的行数据 for _, row in variables_dataframe.iterrows(): var_name = row['变量名'].strip() var_value = row['变量值'].strip() if var_name: variables_dict[var_name] = var_value doc_dropdown.change(on_doc_change, inputs=doc_dropdown, outputs=doc_state) prompt_dropdown.change(on_prompt_change, inputs=prompt_dropdown, outputs=[prompt_state, variables_dataframe]) generate_button.click( on_generate_click, inputs=[doc_state, prompt_state, variables_dataframe], outputs=output_text ) return demo if __name__ == "__main__": from global_var import init_global_var init_global_var("workdir") demo = dataset_generate_page() demo.launch()