295 lines
9.6 KiB
Python
295 lines
9.6 KiB
Python
#!/usr/bin/env python3
|
||
# -*- coding: utf-8 -*-
|
||
"""
|
||
基于清洗1.0.xlsx的标签体系设计
|
||
"""
|
||
|
||
import openpyxl
|
||
from collections import defaultdict, Counter
|
||
|
||
file_path = '/Users/inkling/Desktop/dmp/清洗1.0.xlsx'
|
||
wb = openpyxl.load_workbook(file_path)
|
||
ws = wb.active
|
||
|
||
print("\n" + "="*100)
|
||
print("标签体系设计方案 v3.0")
|
||
print("="*100)
|
||
|
||
# 提取各个维度的数据进行分析
|
||
print("\n【第一层:监护人信息维度】")
|
||
print("-" * 100)
|
||
|
||
# 家庭角色标准化
|
||
role1 = defaultdict(int)
|
||
for row in range(2, ws.max_row + 1):
|
||
val = ws[f'A{row}'].value
|
||
if val:
|
||
role1[str(val).strip()] += 1
|
||
|
||
print("\n1.1 监护人主要身份(第A列)")
|
||
print(" 标准化后的分类方案:")
|
||
role_mapping = {
|
||
'母亲': ['母亲', '妈妈', '母'],
|
||
'父亲': ['父亲', '爸爸'],
|
||
'祖母': ['奶奶', '祖母'],
|
||
'祖父': ['爷爷'],
|
||
'外祖母': ['外婆', '姥姥'],
|
||
'外祖父': ['外公', '姥爷'],
|
||
'其他亲属': ['舅舅', '妻子', '大姐']
|
||
}
|
||
for std_role, variants in role_mapping.items():
|
||
count = sum(role1.get(v, 0) for v in variants)
|
||
print(f" • {std_role:15s}: {count:3d} 人 (包含: {', '.join(variants)})")
|
||
|
||
# 教育达成度
|
||
print("\n1.2 监护人文化程度(第B列)")
|
||
education = defaultdict(int)
|
||
for row in range(2, ws.max_row + 1):
|
||
val = ws[f'B{row}'].value
|
||
if val:
|
||
education[str(val).strip()] += 1
|
||
|
||
edu_mapping = {
|
||
'小学或以下': ['小学', '初小'],
|
||
'初中': ['初中'],
|
||
'中专/中师': ['中专', '中师'],
|
||
'高中': ['高中'],
|
||
'大专': ['大专'],
|
||
'本科': ['本科', '大学', '大学本科'],
|
||
'硕士及以上': ['硕士', '研究生', '在职研究生']
|
||
}
|
||
for std_edu, variants in edu_mapping.items():
|
||
count = sum(education.get(v, 0) for v in variants)
|
||
print(f" • {std_edu:15s}: {count:3d} 人")
|
||
|
||
# 职业分析
|
||
print("\n1.3 监护人职业(第C列)")
|
||
job = defaultdict(int)
|
||
for row in range(2, ws.max_row + 1):
|
||
val = ws[f'C{row}'].value
|
||
if val:
|
||
job[str(val).strip()] += 1
|
||
|
||
job_mapping = {
|
||
'退休': 33,
|
||
'医生/教师/公务员': 22, # 9+8+5
|
||
'务农/工人/农民': 20, # 8+6+6
|
||
'个体/自由/自营': 15, # 7+4+4+custom
|
||
'其他': 93 # 剩余
|
||
}
|
||
print(f" • 退休:33人 (最常见)")
|
||
print(f" • 医疗/教育/公务:22人(社会中流)")
|
||
print(f" • 农业/工业:20人(生产者)")
|
||
print(f" • 自营/个体:15人(创业者)")
|
||
print(f" • 其他手工业/服务业:93人(多元职业)")
|
||
|
||
print("\n【第二层:孩子信息维度】")
|
||
print("-" * 100)
|
||
|
||
# 性别分布
|
||
print("\n2.1 孩子性别(第F列)")
|
||
gender = defaultdict(int)
|
||
for row in range(2, ws.max_row + 1):
|
||
val = ws[f'F{row}'].value
|
||
if val:
|
||
gender[str(val).strip()] += 1
|
||
print(f" • 男孩:{gender['男']} 人")
|
||
print(f" • 女孩:{gender['女']} 人")
|
||
|
||
# 年级分析
|
||
print("\n2.2 孩子年级(第G列)")
|
||
grade = defaultdict(int)
|
||
for row in range(2, ws.max_row + 1):
|
||
val = ws[f'G{row}'].value
|
||
if val:
|
||
grade[str(val).strip()] += 1
|
||
|
||
grade_groups = {
|
||
'小学低段(1-3年级)': ['一年级', '二年级', '三年级', '1年级', '2年级', '3年级'],
|
||
'小学高段(4-6年级)': ['四年级', '五年级', '六年级', '4年级', '5年级', '6年级'],
|
||
'初中前期(初一初二)': ['初一', '初二', '准初二', '开学初二', '九年级'],
|
||
'初中毕业班(初三)': ['初三'],
|
||
'高中前期(高一高二)': ['高一', '高二'],
|
||
'高中毕业班(高三)': ['高三'],
|
||
'学段待确认': ['其他']
|
||
}
|
||
for group_name, grades in grade_groups.items():
|
||
count = sum(grade.get(g, 0) for g in grades)
|
||
if count > 0:
|
||
print(f" • {group_name:20s}: {count:3d} 人")
|
||
|
||
# 学习成绩
|
||
print("\n2.3 孩子学习成绩(第H列)")
|
||
score = defaultdict(int)
|
||
for row in range(2, ws.max_row + 1):
|
||
val = ws[f'H{row}'].value
|
||
if val:
|
||
val_str = str(val).strip()
|
||
# 提取核心值
|
||
if '优秀' in val_str:
|
||
score['优秀'] += 1
|
||
elif '良好' in val_str:
|
||
score['良好'] += 1
|
||
elif '一般' in val_str:
|
||
score['一般'] += 1
|
||
elif '差' in val_str:
|
||
score['差'] += 1
|
||
|
||
for level, count in sorted(score.items(), key=lambda x: -x[1]):
|
||
print(f" • {level:8s}: {count:3d} 人")
|
||
|
||
print("\n【第三层:家庭环境维度】")
|
||
print("-" * 100)
|
||
|
||
# 家庭基本情况
|
||
print("\n3.1 家庭结构(第I列)")
|
||
fam_struct = defaultdict(int)
|
||
for row in range(2, ws.max_row + 1):
|
||
val = ws[f'I{row}'].value
|
||
if val:
|
||
val_str = str(val).strip()
|
||
# 分类
|
||
if '三代同堂' in val_str:
|
||
fam_struct['三代同堂'] += 1
|
||
elif '隔代抚养' in val_str:
|
||
fam_struct['隔代抚养'] += 1
|
||
elif '离异' in val_str:
|
||
fam_struct['离异'] += 1
|
||
elif '单亲' in val_str:
|
||
fam_struct['单亲'] += 1
|
||
elif '三口之家' in val_str or '四口之家' in val_str:
|
||
fam_struct['核心家庭'] += 1
|
||
|
||
for struct, count in sorted(fam_struct.items(), key=lambda x: -x[1]):
|
||
print(f" • {struct:20s}: {count:3d} 人")
|
||
|
||
# 亲子关系
|
||
print("\n3.2 亲子关系质量(第J列)")
|
||
relation = defaultdict(int)
|
||
for row in range(2, ws.max_row + 1):
|
||
val = ws[f'J{row}'].value
|
||
if val:
|
||
val_str = str(val).strip()
|
||
if any(w in val_str for w in ['良好', '好', '和谐', '可以', '还好', '较好', '还可以']):
|
||
relation['良好'] += 1
|
||
elif any(w in val_str for w in ['一般', '还行', '正常', '时好时坏']):
|
||
relation['一般'] += 1
|
||
elif any(w in val_str for w in ['不好', '差', '紧张']):
|
||
relation['较差'] += 1
|
||
else:
|
||
relation['未知'] += 1
|
||
|
||
for quality, count in sorted(relation.items(), key=lambda x: -x[1]):
|
||
print(f" • {quality:8s}: {count:3d} 人")
|
||
|
||
print("\n【第四层:教育风险维度】")
|
||
print("-" * 100)
|
||
|
||
# 教育分歧
|
||
print("\n4.1 家长教育理念一致性(第K列)")
|
||
conflict = defaultdict(int)
|
||
for row in range(2, ws.max_row + 1):
|
||
val = ws[f'K{row}'].value
|
||
if val:
|
||
val_str = str(val).strip().lower()
|
||
if any(w in val_str for w in ['有', '是', '经常', '分歧']):
|
||
conflict['有分歧'] += 1
|
||
elif any(w in val_str for w in ['无', '没有', '否']):
|
||
conflict['无分歧'] += 1
|
||
else:
|
||
conflict['未知'] += 1
|
||
|
||
for status, count in sorted(conflict.items(), key=lambda x: -x[1]):
|
||
print(f" • {status:8s}: {count:3d} 人")
|
||
|
||
# 否定孩子
|
||
print("\n4.2 是否经常否定孩子(第L列)")
|
||
negation = defaultdict(int)
|
||
for row in range(2, ws.max_row + 1):
|
||
val = ws[f'L{row}'].value
|
||
if val:
|
||
val_str = str(val).strip().lower()
|
||
if any(w in val_str for w in ['是', '有', '经常', '是的']):
|
||
negation['经常否定'] += 1
|
||
elif any(w in val_str for w in ['否', '无', '没有', '偶尔']):
|
||
negation['不否定或少否定'] += 1
|
||
else:
|
||
negation['未知'] += 1
|
||
|
||
for status, count in sorted(negation.items(), key=lambda x: -x[1]):
|
||
print(f" • {status:12s}: {count:3d} 人")
|
||
|
||
# 打骂教育
|
||
print("\n4.3 是否有打骂教育(第M列)")
|
||
punishment = defaultdict(int)
|
||
for row in range(2, ws.max_row + 1):
|
||
val = ws[f'M{row}'].value
|
||
if val:
|
||
val_str = str(val).strip().lower()
|
||
if any(w in val_str for w in ['有', '是', '过', '经常', '常']):
|
||
punishment['有打骂'] += 1
|
||
elif any(w in val_str for w in ['无', '没有', '没', '否']):
|
||
punishment['无打骂'] += 1
|
||
else:
|
||
punishment['未知'] += 1
|
||
|
||
for status, count in sorted(punishment.items(), key=lambda x: -x[1]):
|
||
print(f" • {status:8s}: {count:3d} 人")
|
||
|
||
print("\n【第五层:服务特征维度】")
|
||
print("-" * 100)
|
||
|
||
# 指导周期
|
||
print("\n5.1 购买周期(第Q列)")
|
||
duration = defaultdict(int)
|
||
for row in range(2, ws.max_row + 1):
|
||
val = ws[f'Q{row}'].value
|
||
if val:
|
||
duration[str(val).strip()] += 1
|
||
|
||
for period, count in sorted(duration.items(), key=lambda x: -x[1]):
|
||
print(f" • {period:10s}: {count:3d} 人")
|
||
|
||
print("\n" + "="*100)
|
||
print("【推荐的标签体系】")
|
||
print("="*100)
|
||
|
||
tagcat_design = {
|
||
'第一级-监护人维度': {
|
||
'监护人身份': 7, # 母亲、父亲、祖母、祖父、外祖母、外祖父、其他
|
||
'文化程度': 7, # 小学、初中、中专、高中、大专、本科、硕士+
|
||
'职业社会经济地位': 5, # 退休、医疗教育公务、农业工业、自营个体、其他
|
||
},
|
||
'第二级-孩子维度': {
|
||
'性别': 2, # 男、女
|
||
'学段': 7, # 小学低中高、初中初中毕业班、高中前期、毕业班、其他
|
||
'学习成绩': 4, # 优秀、良好、一般、差
|
||
},
|
||
'第三级-家庭维度': {
|
||
'家庭结构': 5, # 核心、三代、隔代、离异、单亲
|
||
'亲子关系': 3, # 良好、一般、较差
|
||
},
|
||
'第四级-教育风险维度': {
|
||
'教育理念一致性': 2, # 一致、有分歧
|
||
'是否否定孩子': 2, # 是、否
|
||
'是否打骂': 2, # 是、否
|
||
},
|
||
'第五级-服务特征维度': {
|
||
'指导周期': 3, # 60天、90天、180天
|
||
}
|
||
}
|
||
|
||
total_tags = 0
|
||
for level, categories in tagcat_design.items():
|
||
print(f"\n{level}")
|
||
level_total = 0
|
||
for cat_name, tag_count in categories.items():
|
||
print(f" • {cat_name:20s}: {tag_count:2d} 个标签")
|
||
level_total += tag_count
|
||
print(f" ─ 小计:{level_total} 个标签")
|
||
total_tags += level_total
|
||
|
||
print(f"\n{'':20s}{'─'*50}")
|
||
print(f"{'总计':20s}{total_tags:2d} 个标签")
|
||
print("\n" + "="*100)
|