Update README and project cleanup
This commit is contained in:
44
analyze_excel.py
Normal file
44
analyze_excel.py
Normal file
@@ -0,0 +1,44 @@
|
||||
#!/usr/bin/env python3
|
||||
import openpyxl
|
||||
|
||||
wb = openpyxl.load_workbook('家庭教育档案-天数.xlsx')
|
||||
ws = wb.active
|
||||
data = [list(row) for row in ws.iter_rows(values_only=True)]
|
||||
|
||||
# 检查关键字段
|
||||
important_cols = {
|
||||
2: 'family_role1',
|
||||
3: 'education1',
|
||||
16: 'child_sex',
|
||||
20: 'learning_score',
|
||||
22: 'family_status',
|
||||
23: 'family_atmosphere',
|
||||
24: 'parent_child_relation',
|
||||
25: 'parent_education_diff',
|
||||
26: 'deny_often',
|
||||
27: 'hitting_education',
|
||||
28: 'child_with_parents',
|
||||
37: 'duration'
|
||||
}
|
||||
|
||||
print('标签分类方案\n')
|
||||
print('=' * 100)
|
||||
|
||||
for col_idx, col_key in important_cols.items():
|
||||
header = data[0][col_idx]
|
||||
values = {}
|
||||
for row in data[1:]:
|
||||
if col_idx < len(row) and row[col_idx]:
|
||||
v = str(row[col_idx]).strip()
|
||||
values[v] = values.get(v, 0) + 1
|
||||
|
||||
print(f'\n{col_key}:')
|
||||
print(f' header: {header}')
|
||||
print(f' unique_values: {len(values)}')
|
||||
if len(values) <= 15:
|
||||
for v, count in sorted(values.items(), key=lambda x: -x[1]):
|
||||
print(f' - "{v}" ({count}人)')
|
||||
else:
|
||||
for v, count in sorted(values.items(), key=lambda x: -x[1])[:10]:
|
||||
print(f' - "{v}" ({count}人)')
|
||||
print(f' ... 还有 {len(values) - 10} 个值')
|
||||
Reference in New Issue
Block a user