上传文件至「Chat_history」

洋葱聊天记录
This commit is contained in:
2026-04-02 10:40:41 +08:00
commit 8419ed0ffb
13 changed files with 35004 additions and 0 deletions

74
Chat_history/json_data.py Normal file
View File

@@ -0,0 +1,74 @@
import pandas as pd
import json
import os
from typing import Dict, List, Optional
JSON_INPUT_DIR = "./output/json"
CSV_OUTPUT_PATH = "./家庭教育档案总表.csv"
def read_single_json(json_file_path: str) -> Optional[Dict]:
"""
读取单个JSON文件处理解析失败/文件损坏问题
:param json_file_path: JSON文件绝对路径
:return: 解析后的字典/None失败则返回None
"""
try:
with open(json_file_path, "r", encoding="utf-8") as f:
json_data = json.load(f)
# 简单校验:必须包含核心字段"文件名称"避免无效JSON
if "文件名称" not in json_data:
print(f"跳过无效JSON{os.path.basename(json_file_path)}(缺失核心字段「文件名称」)")
return None
return json_data
except json.JSONDecodeError:
print(f"解析失败:{os.path.basename(json_file_path)}JSON文件损坏/格式错误)")
return None
except Exception as e:
print(f"读取失败:{os.path.basename(json_file_path)} - 错误:{str(e)[:50]}")
return None
def batch_json_to_csv():
"""批量将JSON文件夹转为单张CSV数据表"""
json_files = [
f for f in os.listdir(JSON_INPUT_DIR)
if f.lower().endswith(".json")
]
all_json_data: List[Dict] = []
success_count = 0
fail_count = 0
for json_file in json_files:
json_file_path = os.path.join(JSON_INPUT_DIR, json_file)
json_data = read_single_json(json_file_path)
if json_data:
all_json_data.append(json_data)
success_count += 1
else:
fail_count += 1
df = pd.DataFrame(all_json_data)
EXTRACT_FIELDS = [
"监护人1姓名", "家庭角色", "文化程度", "职业", "年龄", "性格特征", "联系方式",
"监护人2姓名", "家庭角色_2", "文化程度_2", "职业_2", "年龄_2", "性格特征_2", "联系方式_2",
"孩子姓名", "性别", "孩子年龄", "年级", "孩子性格特征", "学习成绩",
"家庭地址", "家庭基本情况", "家庭氛围", "亲子关系", "家长有无教育分歧",
"是否经常否定孩子", "有无打骂教育", "孩子是否在父母身边长大",
"还有谁参与孩子的养育", "孩子成长过程中有何重大影响事件", "既往病史",
"孩子的优点", "孩子的缺点", "孩子目前情况的描述", "参加指导最想解决", "问卷评估"
]
# 固定列顺序:文件名称为首列,其余按提取顺序排列
df = df[["文件名称"] + EXTRACT_FIELDS]
try:
df.to_csv(
CSV_OUTPUT_PATH,
index=False, # 不生成行索引
encoding="utf-8-sig", # 兼容Excel/记事本的中文编码
na_rep="" # 缺失字段/Null转为空白单元格更整洁
)
except Exception as e:
print(f"保存CSV失败 - 错误:{str(e)[:60]}")
return
if __name__ == "__main__":
batch_json_to_csv()