import pandas as pd import json import os from typing import Dict, List, Optional JSON_INPUT_DIR = "./output/json" CSV_OUTPUT_PATH = "./家庭教育档案总表.csv" def read_single_json(json_file_path: str) -> Optional[Dict]: """ 读取单个JSON文件,处理解析失败/文件损坏问题 :param json_file_path: JSON文件绝对路径 :return: 解析后的字典/None(失败则返回None) """ try: with open(json_file_path, "r", encoding="utf-8") as f: json_data = json.load(f) # 简单校验:必须包含核心字段"文件名称",避免无效JSON if "文件名称" not in json_data: print(f"跳过无效JSON:{os.path.basename(json_file_path)}(缺失核心字段「文件名称」)") return None return json_data except json.JSONDecodeError: print(f"解析失败:{os.path.basename(json_file_path)}(JSON文件损坏/格式错误)") return None except Exception as e: print(f"读取失败:{os.path.basename(json_file_path)} - 错误:{str(e)[:50]}") return None def batch_json_to_csv(): """批量将JSON文件夹转为单张CSV数据表""" json_files = [ f for f in os.listdir(JSON_INPUT_DIR) if f.lower().endswith(".json") ] all_json_data: List[Dict] = [] success_count = 0 fail_count = 0 for json_file in json_files: json_file_path = os.path.join(JSON_INPUT_DIR, json_file) json_data = read_single_json(json_file_path) if json_data: all_json_data.append(json_data) success_count += 1 else: fail_count += 1 df = pd.DataFrame(all_json_data) EXTRACT_FIELDS = [ "监护人1姓名", "家庭角色", "文化程度", "职业", "年龄", "性格特征", "联系方式", "监护人2姓名", "家庭角色_2", "文化程度_2", "职业_2", "年龄_2", "性格特征_2", "联系方式_2", "孩子姓名", "性别", "孩子年龄", "年级", "孩子性格特征", "学习成绩", "家庭地址", "家庭基本情况", "家庭氛围", "亲子关系", "家长有无教育分歧", "是否经常否定孩子", "有无打骂教育", "孩子是否在父母身边长大", "还有谁参与孩子的养育", "孩子成长过程中有何重大影响事件", "既往病史", "孩子的优点", "孩子的缺点", "孩子目前情况的描述", "参加指导最想解决", "问卷评估" ] # 固定列顺序:文件名称为首列,其余按提取顺序排列 df = df[["文件名称"] + EXTRACT_FIELDS] try: df.to_csv( CSV_OUTPUT_PATH, index=False, # 不生成行索引 encoding="utf-8-sig", # 兼容Excel/记事本的中文编码 na_rep="" # 缺失字段/Null转为空白单元格,更整洁 ) except Exception as e: print(f"保存CSV失败 - 错误:{str(e)[:60]}") return if __name__ == "__main__": batch_json_to_csv()