onion-dmp/analyze_excel.py

#!/usr/bin/env python3
import openpyxl

wb = openpyxl.load_workbook('家庭教育档案-天数.xlsx')
ws = wb.active
data = [list(row) for row in ws.iter_rows(values_only=True)]

# 检查关键字段
important_cols = {
    2: 'family_role1',
    3: 'education1',
    16: 'child_sex',
    20: 'learning_score',
    22: 'family_status',
    23: 'family_atmosphere',
    24: 'parent_child_relation',
    25: 'parent_education_diff',
    26: 'deny_often',
    27: 'hitting_education',
    28: 'child_with_parents',
    37: 'duration'
}

print('标签分类方案\n')
print('=' * 100)

for col_idx, col_key in important_cols.items():
    header = data[0][col_idx]
    values = {}
    for row in data[1:]:
        if col_idx < len(row) and row[col_idx]:
            v = str(row[col_idx]).strip()
            values[v] = values.get(v, 0) + 1

    print(f'\n{col_key}:')
    print(f'  header: {header}')
    print(f'  unique_values: {len(values)}')
    if len(values) <= 15:
        for v, count in sorted(values.items(), key=lambda x: -x[1]):
            print(f'    - "{v}" ({count}人)')
    else:
        for v, count in sorted(values.items(), key=lambda x: -x[1])[:10]:
            print(f'    - "{v}" ({count}人)')
        print(f'    ... 还有 {len(values) - 10} 个值')