#!/usr/bin/env python3 import openpyxl wb = openpyxl.load_workbook('家庭教育档案-天数.xlsx') ws = wb.active data = [list(row) for row in ws.iter_rows(values_only=True)] # 检查关键字段 important_cols = { 2: 'family_role1', 3: 'education1', 16: 'child_sex', 20: 'learning_score', 22: 'family_status', 23: 'family_atmosphere', 24: 'parent_child_relation', 25: 'parent_education_diff', 26: 'deny_often', 27: 'hitting_education', 28: 'child_with_parents', 37: 'duration' } print('标签分类方案\n') print('=' * 100) for col_idx, col_key in important_cols.items(): header = data[0][col_idx] values = {} for row in data[1:]: if col_idx < len(row) and row[col_idx]: v = str(row[col_idx]).strip() values[v] = values.get(v, 0) + 1 print(f'\n{col_key}:') print(f' header: {header}') print(f' unique_values: {len(values)}') if len(values) <= 15: for v, count in sorted(values.items(), key=lambda x: -x[1]): print(f' - "{v}" ({count}人)') else: for v, count in sorted(values.items(), key=lambda x: -x[1])[:10]: print(f' - "{v}" ({count}人)') print(f' ... 还有 {len(values) - 10} 个值')