74 lines
3.1 KiB
Python
74 lines
3.1 KiB
Python
import pandas as pd
|
||
import json
|
||
import os
|
||
from typing import Dict, List, Optional
|
||
|
||
|
||
JSON_INPUT_DIR = "./output/json"
|
||
CSV_OUTPUT_PATH = "./家庭教育档案总表.csv"
|
||
|
||
|
||
def read_single_json(json_file_path: str) -> Optional[Dict]:
|
||
"""
|
||
读取单个JSON文件,处理解析失败/文件损坏问题
|
||
:param json_file_path: JSON文件绝对路径
|
||
:return: 解析后的字典/None(失败则返回None)
|
||
"""
|
||
try:
|
||
with open(json_file_path, "r", encoding="utf-8") as f:
|
||
json_data = json.load(f)
|
||
# 简单校验:必须包含核心字段"文件名称",避免无效JSON
|
||
if "文件名称" not in json_data:
|
||
print(f"跳过无效JSON:{os.path.basename(json_file_path)}(缺失核心字段「文件名称」)")
|
||
return None
|
||
return json_data
|
||
except json.JSONDecodeError:
|
||
print(f"解析失败:{os.path.basename(json_file_path)}(JSON文件损坏/格式错误)")
|
||
return None
|
||
except Exception as e:
|
||
print(f"读取失败:{os.path.basename(json_file_path)} - 错误:{str(e)[:50]}")
|
||
return None
|
||
|
||
def batch_json_to_csv():
|
||
"""批量将JSON文件夹转为单张CSV数据表"""
|
||
json_files = [
|
||
f for f in os.listdir(JSON_INPUT_DIR)
|
||
if f.lower().endswith(".json")
|
||
]
|
||
all_json_data: List[Dict] = []
|
||
success_count = 0
|
||
fail_count = 0
|
||
for json_file in json_files:
|
||
json_file_path = os.path.join(JSON_INPUT_DIR, json_file)
|
||
json_data = read_single_json(json_file_path)
|
||
if json_data:
|
||
all_json_data.append(json_data)
|
||
success_count += 1
|
||
else:
|
||
fail_count += 1
|
||
df = pd.DataFrame(all_json_data)
|
||
EXTRACT_FIELDS = [
|
||
"监护人1姓名", "家庭角色", "文化程度", "职业", "年龄", "性格特征", "联系方式",
|
||
"监护人2姓名", "家庭角色_2", "文化程度_2", "职业_2", "年龄_2", "性格特征_2", "联系方式_2",
|
||
"孩子姓名", "性别", "孩子年龄", "年级", "孩子性格特征", "学习成绩",
|
||
"家庭地址", "家庭基本情况", "家庭氛围", "亲子关系", "家长有无教育分歧",
|
||
"是否经常否定孩子", "有无打骂教育", "孩子是否在父母身边长大",
|
||
"还有谁参与孩子的养育", "孩子成长过程中有何重大影响事件", "既往病史",
|
||
"孩子的优点", "孩子的缺点", "孩子目前情况的描述", "参加指导最想解决", "问卷评估"
|
||
]
|
||
# 固定列顺序:文件名称为首列,其余按提取顺序排列
|
||
df = df[["文件名称"] + EXTRACT_FIELDS]
|
||
|
||
try:
|
||
df.to_csv(
|
||
CSV_OUTPUT_PATH,
|
||
index=False, # 不生成行索引
|
||
encoding="utf-8-sig", # 兼容Excel/记事本的中文编码
|
||
na_rep="" # 缺失字段/Null转为空白单元格,更整洁
|
||
)
|
||
except Exception as e:
|
||
print(f"保存CSV失败 - 错误:{str(e)[:60]}")
|
||
return
|
||
|
||
if __name__ == "__main__":
|
||
batch_json_to_csv() |