Compare commits
No commits in common. "84fe78243a54ab9a2c2737ff65f128b1c89565cc" and "541d37c674de9d4f30d39e516e74228d6feeab51" have entirely different histories.
84fe78243a
...
541d37c674
@ -1,41 +1,9 @@
|
|||||||
import gradio as gr
|
import gradio as gr
|
||||||
from global_var import docs, scan_docs_directory, prompt_store
|
|
||||||
|
|
||||||
def dataset_generate_page():
|
def dataset_generate_page():
|
||||||
with gr.Blocks() as demo:
|
with gr.Blocks() as demo:
|
||||||
gr.Markdown("## 数据集生成")
|
gr.Markdown("## 数据集生成")
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
# 获取文档列表并设置初始值
|
pass
|
||||||
docs_list = [str(doc.name) for doc in scan_docs_directory("workdir")]
|
|
||||||
initial_doc = docs_list[0] if docs_list else None
|
|
||||||
|
|
||||||
doc_dropdown = gr.Dropdown(
|
|
||||||
choices=docs_list,
|
|
||||||
value=initial_doc, # 设置初始选中项
|
|
||||||
label="选择文档",
|
|
||||||
allow_custom_value=True,
|
|
||||||
interactive=True
|
|
||||||
)
|
|
||||||
doc_state = gr.State(value=initial_doc) # 用文档初始值初始化状态
|
|
||||||
|
|
||||||
with gr.Column():
|
|
||||||
# 获取模板列表并设置初始值
|
|
||||||
prompts = prompt_store.all()
|
|
||||||
prompt_choices = [f"{p['id']} {p['name']}" for p in prompts]
|
|
||||||
initial_prompt = prompt_choices[0] if prompt_choices else None
|
|
||||||
|
|
||||||
prompt_dropdown = gr.Dropdown(
|
|
||||||
choices=prompt_choices,
|
|
||||||
value=initial_prompt, # 设置初始选中项
|
|
||||||
label="选择模板",
|
|
||||||
allow_custom_value=True,
|
|
||||||
interactive=True
|
|
||||||
)
|
|
||||||
prompt_state = gr.State(value=initial_prompt) # 用模板初始值初始化状态
|
|
||||||
|
|
||||||
# 绑定事件(保留原有逻辑,确保交互时更新)
|
|
||||||
doc_dropdown.change(lambda x: x, inputs=doc_dropdown, outputs=doc_state)
|
|
||||||
prompt_dropdown.change(lambda x: x, inputs=prompt_dropdown, outputs=prompt_state)
|
|
||||||
|
|
||||||
return demo
|
return demo
|
@ -1,6 +1,4 @@
|
|||||||
from db import get_sqlite_engine,get_prompt_tinydb
|
from db import get_sqlite_engine,get_prompt_tinydb
|
||||||
from tools import scan_docs_directory
|
|
||||||
|
|
||||||
prompt_store = get_prompt_tinydb("workdir")
|
prompt_store = get_prompt_tinydb("workdir")
|
||||||
sql_engine = get_sqlite_engine("workdir")
|
sql_engine = get_sqlite_engine("workdir")
|
||||||
docs = scan_docs_directory("workdir")
|
|
@ -1,35 +0,0 @@
|
|||||||
from typing import List
|
|
||||||
from schema.dataset import dataset, dataset_item, Q_A
|
|
||||||
import json
|
|
||||||
|
|
||||||
def convert_json_to_dataset(json_data: List[dict]) -> dataset:
|
|
||||||
# 将JSON数据转换为dataset格式
|
|
||||||
dataset_items = []
|
|
||||||
item_id = 1 # 自增ID计数器
|
|
||||||
for item in json_data:
|
|
||||||
qa = Q_A(question=item["question"], answer=item["answer"])
|
|
||||||
dataset_item_obj = dataset_item(id=item_id, message=[qa])
|
|
||||||
dataset_items.append(dataset_item_obj)
|
|
||||||
item_id += 1 # ID自增
|
|
||||||
|
|
||||||
# 创建dataset对象
|
|
||||||
result_dataset = dataset(
|
|
||||||
name="Converted Dataset",
|
|
||||||
model_id=None,
|
|
||||||
description="Dataset converted from JSON",
|
|
||||||
dataset_items=dataset_items
|
|
||||||
)
|
|
||||||
return result_dataset
|
|
||||||
|
|
||||||
# 示例:从文件读取JSON并转换
|
|
||||||
if __name__ == "__main__":
|
|
||||||
# 假设JSON数据存储在文件中
|
|
||||||
with open(r"workdir\dataset_old\llamafactory.json", "r", encoding="utf-8") as file:
|
|
||||||
json_data = json.load(file)
|
|
||||||
|
|
||||||
# 转换为dataset格式
|
|
||||||
converted_dataset = convert_json_to_dataset(json_data)
|
|
||||||
|
|
||||||
# 输出结果到文件
|
|
||||||
with open("output.json", "w", encoding="utf-8") as file:
|
|
||||||
file.write(converted_dataset.model_dump_json(indent=4))
|
|
Loading…
x
Reference in New Issue
Block a user