Files
yangcong_data/Chat_history/json_data.py
2026-04-02 10:40:41 +08:00

74 lines
3.1 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import pandas as pd
import json
import os
from typing import Dict, List, Optional
JSON_INPUT_DIR = "./output/json"
CSV_OUTPUT_PATH = "./家庭教育档案总表.csv"
def read_single_json(json_file_path: str) -> Optional[Dict]:
"""
读取单个JSON文件处理解析失败/文件损坏问题
:param json_file_path: JSON文件绝对路径
:return: 解析后的字典/None失败则返回None
"""
try:
with open(json_file_path, "r", encoding="utf-8") as f:
json_data = json.load(f)
# 简单校验:必须包含核心字段"文件名称"避免无效JSON
if "文件名称" not in json_data:
print(f"跳过无效JSON{os.path.basename(json_file_path)}(缺失核心字段「文件名称」)")
return None
return json_data
except json.JSONDecodeError:
print(f"解析失败:{os.path.basename(json_file_path)}JSON文件损坏/格式错误)")
return None
except Exception as e:
print(f"读取失败:{os.path.basename(json_file_path)} - 错误:{str(e)[:50]}")
return None
def batch_json_to_csv():
"""批量将JSON文件夹转为单张CSV数据表"""
json_files = [
f for f in os.listdir(JSON_INPUT_DIR)
if f.lower().endswith(".json")
]
all_json_data: List[Dict] = []
success_count = 0
fail_count = 0
for json_file in json_files:
json_file_path = os.path.join(JSON_INPUT_DIR, json_file)
json_data = read_single_json(json_file_path)
if json_data:
all_json_data.append(json_data)
success_count += 1
else:
fail_count += 1
df = pd.DataFrame(all_json_data)
EXTRACT_FIELDS = [
"监护人1姓名", "家庭角色", "文化程度", "职业", "年龄", "性格特征", "联系方式",
"监护人2姓名", "家庭角色_2", "文化程度_2", "职业_2", "年龄_2", "性格特征_2", "联系方式_2",
"孩子姓名", "性别", "孩子年龄", "年级", "孩子性格特征", "学习成绩",
"家庭地址", "家庭基本情况", "家庭氛围", "亲子关系", "家长有无教育分歧",
"是否经常否定孩子", "有无打骂教育", "孩子是否在父母身边长大",
"还有谁参与孩子的养育", "孩子成长过程中有何重大影响事件", "既往病史",
"孩子的优点", "孩子的缺点", "孩子目前情况的描述", "参加指导最想解决", "问卷评估"
]
# 固定列顺序:文件名称为首列,其余按提取顺序排列
df = df[["文件名称"] + EXTRACT_FIELDS]
try:
df.to_csv(
CSV_OUTPUT_PATH,
index=False, # 不生成行索引
encoding="utf-8-sig", # 兼容Excel/记事本的中文编码
na_rep="" # 缺失字段/Null转为空白单元格更整洁
)
except Exception as e:
print(f"保存CSV失败 - 错误:{str(e)[:60]}")
return
if __name__ == "__main__":
batch_json_to_csv()