45 lines
1.2 KiB
Python
45 lines
1.2 KiB
Python
#!/usr/bin/env python3
|
|
import openpyxl
|
|
|
|
wb = openpyxl.load_workbook('家庭教育档案-天数.xlsx')
|
|
ws = wb.active
|
|
data = [list(row) for row in ws.iter_rows(values_only=True)]
|
|
|
|
# 检查关键字段
|
|
important_cols = {
|
|
2: 'family_role1',
|
|
3: 'education1',
|
|
16: 'child_sex',
|
|
20: 'learning_score',
|
|
22: 'family_status',
|
|
23: 'family_atmosphere',
|
|
24: 'parent_child_relation',
|
|
25: 'parent_education_diff',
|
|
26: 'deny_often',
|
|
27: 'hitting_education',
|
|
28: 'child_with_parents',
|
|
37: 'duration'
|
|
}
|
|
|
|
print('标签分类方案\n')
|
|
print('=' * 100)
|
|
|
|
for col_idx, col_key in important_cols.items():
|
|
header = data[0][col_idx]
|
|
values = {}
|
|
for row in data[1:]:
|
|
if col_idx < len(row) and row[col_idx]:
|
|
v = str(row[col_idx]).strip()
|
|
values[v] = values.get(v, 0) + 1
|
|
|
|
print(f'\n{col_key}:')
|
|
print(f' header: {header}')
|
|
print(f' unique_values: {len(values)}')
|
|
if len(values) <= 15:
|
|
for v, count in sorted(values.items(), key=lambda x: -x[1]):
|
|
print(f' - "{v}" ({count}人)')
|
|
else:
|
|
for v, count in sorted(values.items(), key=lambda x: -x[1])[:10]:
|
|
print(f' - "{v}" ({count}人)')
|
|
print(f' ... 还有 {len(values) - 10} 个值')
|