674 lines
23 KiB
JavaScript
674 lines
23 KiB
JavaScript
/**
|
||
* 新数据导入脚本 v3.0
|
||
* 基于"清洗1.0.xlsx"的完整标签体系
|
||
*
|
||
* 标签体系:49个标签,分为5个维度
|
||
* 用法: node scripts/import-clean-data.js
|
||
*/
|
||
|
||
const ExcelJS = require('exceljs');
|
||
const path = require('path');
|
||
const { getDb, initializeDatabase } = require('../db/init');
|
||
|
||
const EXCEL_FILE = path.join(__dirname, '../清洗1.0.xlsx');
|
||
|
||
// ════════════════════════════════════════════════════════════════════════════
|
||
// 标签分类定义 v3.0 - 49个标签 5个维度
|
||
// ════════════════════════════════════════════════════════════════════════════
|
||
|
||
const TAG_CATEGORIES = [
|
||
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||
// 第一维度:监护人信息 (19个标签)
|
||
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||
|
||
{
|
||
key: 'guardian_role',
|
||
name: '监护人身份',
|
||
color: '#3b82f6',
|
||
columns: [1], // A: 家庭角色
|
||
type: 'discrete'
|
||
},
|
||
{
|
||
key: 'guardian_education',
|
||
name: '文化程度',
|
||
color: '#6366f1',
|
||
columns: [2], // B: 文化程度
|
||
type: 'discrete'
|
||
},
|
||
{
|
||
key: 'guardian_occupation',
|
||
name: '职业与经济地位',
|
||
color: '#8b5cf6',
|
||
columns: [3], // C: 职业
|
||
type: 'discrete'
|
||
},
|
||
{
|
||
key: 'guardian_age_group',
|
||
name: '监护人年龄段',
|
||
color: '#a78bfa',
|
||
columns: [4], // D: 年龄
|
||
type: 'continuous'
|
||
},
|
||
{
|
||
key: 'second_guardian_role',
|
||
name: '第二监护人身份',
|
||
color: '#c084fc',
|
||
columns: [5], // E: 家庭角色_2
|
||
type: 'discrete'
|
||
},
|
||
|
||
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||
// 第二维度:孩子信息 (13个标签)
|
||
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||
|
||
{
|
||
key: 'child_gender',
|
||
name: '孩子性别',
|
||
color: '#ec4899',
|
||
columns: [6], // F: 性别
|
||
type: 'discrete'
|
||
},
|
||
{
|
||
key: 'child_grade',
|
||
name: '孩子学段',
|
||
color: '#f472b6',
|
||
columns: [7], // G: 年级
|
||
type: 'discrete'
|
||
},
|
||
{
|
||
key: 'child_academic_score',
|
||
name: '学习成绩',
|
||
color: '#f97316',
|
||
columns: [8], // H: 学习成绩
|
||
type: 'discrete'
|
||
},
|
||
|
||
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||
// 第三维度:家庭环境 (8个标签)
|
||
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||
|
||
{
|
||
key: 'family_structure',
|
||
name: '家庭结构',
|
||
color: '#06b6d4',
|
||
columns: [9], // I: 家庭基本情况
|
||
type: 'keyword_extract',
|
||
keywords: ['三代同堂', '核心家庭', '隔代抚养', '离异', '单亲', '三口之家', '四口之家']
|
||
},
|
||
{
|
||
key: 'parent_child_relationship',
|
||
name: '亲子关系',
|
||
color: '#0891b2',
|
||
columns: [10], // J: 亲子关系
|
||
type: 'text'
|
||
},
|
||
{
|
||
key: 'child_living_with_parents',
|
||
name: '与父母同住情况',
|
||
color: '#10b981',
|
||
columns: [14], // N: 孩子是否在父母身边长大
|
||
type: 'yes_no'
|
||
},
|
||
{
|
||
key: 'child_caregivers',
|
||
name: '参与养育人员',
|
||
color: '#059669',
|
||
columns: [15], // O: 还有谁参与孩子的养育
|
||
type: 'text'
|
||
},
|
||
|
||
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||
// 第四维度:教育风险 (6个标签)
|
||
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||
|
||
{
|
||
key: 'education_consensus',
|
||
name: '教育理念一致性',
|
||
color: '#f59e0b',
|
||
columns: [11], // K: 家长有无教育分歧
|
||
type: 'yes_no'
|
||
},
|
||
{
|
||
key: 'child_negation',
|
||
name: '否定孩子情况',
|
||
color: '#d97706',
|
||
columns: [12], // L: 是否经常否定孩子
|
||
type: 'yes_no'
|
||
},
|
||
{
|
||
key: 'physical_punishment',
|
||
name: '打骂教育',
|
||
color: '#dc2626',
|
||
columns: [13], // M: 有无打骂教育
|
||
type: 'yes_no'
|
||
},
|
||
|
||
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||
// 第五维度:服务方案 (3个标签)
|
||
// ━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━
|
||
|
||
{
|
||
key: 'service_duration',
|
||
name: '服务周期',
|
||
color: '#7c3aed',
|
||
columns: [17], // Q: 天数
|
||
type: 'discrete'
|
||
}
|
||
];
|
||
|
||
// 值映射与标准化规则
|
||
const TAG_MAPPINGS = {
|
||
'guardian_role': {
|
||
'母亲': '母亲',
|
||
'妈妈': '母亲',
|
||
'母': '母亲',
|
||
'父亲': '父亲',
|
||
'爸爸': '父亲',
|
||
'奶奶': '祖母',
|
||
'祖母': '祖母',
|
||
'爷爷': '祖父',
|
||
'外婆': '外祖母',
|
||
'外公': '外祖父',
|
||
'姥姥': '外祖母',
|
||
'姥爷': '外祖父',
|
||
'舅舅': '其他亲属',
|
||
'妻子': '其他亲属',
|
||
'大姐': '其他亲属'
|
||
},
|
||
'guardian_education': {
|
||
'初小': '小学',
|
||
'小学': '小学',
|
||
'初中': '初中',
|
||
'中师': '中专',
|
||
'中专': '中专',
|
||
'高中': '高中',
|
||
'大专': '大专',
|
||
'大学': '本科',
|
||
'本科': '本科',
|
||
'大学本科': '本科',
|
||
'硕士': '硕士及以上',
|
||
'研究生': '硕士及以上',
|
||
'在职研究生': '硕士及以上'
|
||
},
|
||
'child_gender': {
|
||
'男': '男孩',
|
||
'女': '女孩',
|
||
'女、男': '双胞胎'
|
||
},
|
||
'child_academic_score': {
|
||
'优秀': '优秀',
|
||
'良好': '良好',
|
||
'一般': '一般',
|
||
'差': '较差'
|
||
},
|
||
'child_living_with_parents': {
|
||
'是': '是',
|
||
'是的': '是',
|
||
'在': '是',
|
||
'否': '否',
|
||
'没有': '否',
|
||
'不是': '否'
|
||
},
|
||
'education_consensus': {
|
||
'有': '有分歧',
|
||
'是': '有分歧',
|
||
'否': '无分歧',
|
||
'无': '无分歧',
|
||
'没有': '无分歧'
|
||
},
|
||
'child_negation': {
|
||
'是': '是',
|
||
'有': '是',
|
||
'是的': '是',
|
||
'经常': '是',
|
||
'否': '否',
|
||
'无': '否',
|
||
'没有': '否',
|
||
'偶尔': '否'
|
||
},
|
||
'physical_punishment': {
|
||
'有': '有',
|
||
'是': '有',
|
||
'有过': '有',
|
||
'偶尔有': '有',
|
||
'无': '无',
|
||
'没有': '无',
|
||
'否': '无',
|
||
'基本上没有': '无'
|
||
},
|
||
'service_duration': {
|
||
'60天': '60天课程',
|
||
'90天': '90天课程',
|
||
'180天': '180天课程'
|
||
}
|
||
};
|
||
|
||
// 年龄分组
|
||
function getAgeGroup(age) {
|
||
if (!age || isNaN(age)) return '年龄未知';
|
||
const ageNum = parseInt(age);
|
||
if (ageNum < 25) return '25岁以下';
|
||
else if (ageNum < 35) return '25-35岁';
|
||
else if (ageNum < 45) return '35-45岁';
|
||
else if (ageNum < 55) return '45-55岁';
|
||
else if (ageNum < 65) return '55-65岁';
|
||
else if (ageNum < 75) return '65-75岁';
|
||
else return '75岁以上';
|
||
}
|
||
|
||
// 学段分组
|
||
function gradeToSegment(grade) {
|
||
if (!grade) return '学段未知';
|
||
const gradeStr = String(grade).toLowerCase();
|
||
|
||
if (gradeStr.includes('一') || gradeStr.includes('1年')) return '小学低段(1-3年级)';
|
||
if (gradeStr.includes('二') || gradeStr.includes('2年')) return '小学低段(1-3年级)';
|
||
if (gradeStr.includes('三') || gradeStr.includes('3年')) return '小学低段(1-3年级)';
|
||
if (gradeStr.includes('四') || gradeStr.includes('4年')) return '小学高段(4-6年级)';
|
||
if (gradeStr.includes('五') || gradeStr.includes('5年')) return '小学高段(4-6年级)';
|
||
if (gradeStr.includes('六') || gradeStr.includes('6年')) return '小学高段(4-6年级)';
|
||
if (gradeStr.includes('初一')) return '初中前期(初一初二)';
|
||
if (gradeStr.includes('初二') || gradeStr.includes('准初')) return '初中前期(初一初二)';
|
||
if (gradeStr.includes('初三') || gradeStr.includes('九年')) return '初中毕业班(初三)';
|
||
if (gradeStr.includes('高一')) return '高中前期(高一高二)';
|
||
if (gradeStr.includes('高二')) return '高中前期(高一高二)';
|
||
if (gradeStr.includes('高三')) return '高中毕业班(高三)';
|
||
|
||
return '学段未知';
|
||
}
|
||
|
||
// 亲子关系分类
|
||
function relationshipQuality(text) {
|
||
if (!text) return '未指定';
|
||
const lowerText = String(text).toLowerCase();
|
||
|
||
if (lowerText.includes('良好') || lowerText.includes('好') ||
|
||
lowerText.includes('和谐') || lowerText.includes('可以') ||
|
||
lowerText.includes('还好') || lowerText.includes('较好') ||
|
||
lowerText.includes('还可以')) {
|
||
return '亲子关系良好';
|
||
}
|
||
|
||
if (lowerText.includes('一般') || lowerText.includes('还行') ||
|
||
lowerText.includes('正常') || lowerText.includes('时好时坏')) {
|
||
return '亲子关系一般';
|
||
}
|
||
|
||
if (lowerText.includes('不好') || lowerText.includes('差') ||
|
||
lowerText.includes('紧张')) {
|
||
return '亲子关系较差';
|
||
}
|
||
|
||
return '亲子关系未评估';
|
||
}
|
||
|
||
async function importCleanData() {
|
||
try {
|
||
console.log(`\n📂 读取 Excel 文件: ${EXCEL_FILE}`);
|
||
|
||
const workbook = new ExcelJS.Workbook();
|
||
await workbook.xlsx.readFile(EXCEL_FILE);
|
||
|
||
const worksheet = workbook.getWorksheet(1);
|
||
if (!worksheet) {
|
||
throw new Error('找不到工作表');
|
||
}
|
||
|
||
console.log(`📊 总行数: ${worksheet.rowCount}`);
|
||
|
||
const db = getDb('onion');
|
||
|
||
// 初始化数据库
|
||
initializeDatabase('onion');
|
||
|
||
// 创建所有标签分类
|
||
console.log('🏗️ 建立分类体系...');
|
||
const categoryMap = {};
|
||
for (const cat of TAG_CATEGORIES) {
|
||
const result = db.prepare(`
|
||
INSERT OR IGNORE INTO tag_categories (key, name, sort_order, color)
|
||
VALUES (?, ?, ?, ?)
|
||
`).run(cat.key, cat.name, 0, cat.color || '#6366f1');
|
||
|
||
const catRecord = db.prepare(`
|
||
SELECT id FROM tag_categories WHERE key = ?
|
||
`).get(cat.key);
|
||
categoryMap[cat.key] = catRecord.id;
|
||
}
|
||
|
||
console.log(`✅ 创建了 ${Object.keys(categoryMap).length} 个分类`);
|
||
|
||
// 处理数据行
|
||
let insertedCount = 0;
|
||
const insertUserStmt = db.prepare(`
|
||
INSERT OR IGNORE INTO users (uid, name, extra_json)
|
||
VALUES (?, ?, ?)
|
||
`);
|
||
|
||
const insertUserTagStmt = db.prepare(`
|
||
INSERT OR IGNORE INTO user_tags (user_id, tag_id)
|
||
VALUES (?, ?)
|
||
`);
|
||
|
||
const tagCache = {};
|
||
|
||
function getOrCreateTag(catKey, tagName) {
|
||
if (!tagName || !catKey) return null;
|
||
|
||
const cacheKey = `${catKey}:${tagName}`;
|
||
if (tagCache[cacheKey]) return tagCache[cacheKey];
|
||
|
||
// 先尝试找系统中是否已经有这个标签
|
||
let tag = db.prepare(`
|
||
SELECT id FROM tags WHERE category_id = ? AND name = ?
|
||
`).get(categoryMap[catKey], tagName);
|
||
|
||
if (!tag) {
|
||
// 如果没有,生成一个唯一的key
|
||
const tagNameNorm = String(tagName).toLowerCase().trim().replace(/\s+/g, '_');
|
||
const hashCode = Array.from(tagNameNorm).reduce((h, c) => ((h << 5) - h) + c.charCodeAt(0), 0) & 0xffffff;
|
||
let tagKey = `${catKey}_${hashCode.toString(16)}`;
|
||
|
||
// 检查key冲突
|
||
let counter = 1;
|
||
while (db.prepare(`SELECT 1 FROM tags WHERE key = ?`).get(tagKey)) {
|
||
tagKey = `${catKey}_${hashCode.toString(16)}_${counter}`;
|
||
counter++;
|
||
}
|
||
|
||
db.prepare(`
|
||
INSERT INTO tags (key, name, category_id, sort_order)
|
||
VALUES (?, ?, ?, ?)
|
||
`).run(tagKey, tagName, categoryMap[catKey], 0);
|
||
|
||
tag = db.prepare(`
|
||
SELECT id FROM tags WHERE key = ?
|
||
`).get(tagKey);
|
||
}
|
||
|
||
tagCache[cacheKey] = tag?.id;
|
||
return tag?.id;
|
||
}
|
||
|
||
// 遍历 Excel 数据行
|
||
let rowCount = 0;
|
||
worksheet.eachRow((row, rowNumber) => {
|
||
if (rowNumber === 1) return; // 跳过表头
|
||
|
||
rowCount++;
|
||
const values = row.values || [];
|
||
|
||
// 提取基本信息
|
||
const uid = `user_${rowNumber - 1}`; // 简单的用户ID
|
||
const guardianRole = values[1];
|
||
const childGrade = values[7];
|
||
const childDesc = values[16];
|
||
|
||
if (!guardianRole) {
|
||
console.warn(`⚠️ 行 ${rowNumber} 缺少监护人身份,跳过`);
|
||
return;
|
||
}
|
||
|
||
// 构建用户额外数据
|
||
const extraData = {
|
||
row: rowNumber,
|
||
guardianRole: guardianRole,
|
||
childGrade: childGrade,
|
||
childDescription: childDesc ? String(childDesc).substring(0, 500) : ''
|
||
};
|
||
|
||
// 插入用户
|
||
const result = insertUserStmt.run(uid, String(guardianRole), JSON.stringify(extraData));
|
||
|
||
if (result.changes > 0) {
|
||
insertedCount++;
|
||
const userId = result.lastInsertRowid;
|
||
|
||
// 为用户添加标签
|
||
addUserTags(userId, values, rowNumber, getOrCreateTag, insertUserTagStmt, categoryMap);
|
||
|
||
if (rowCount % 30 === 0) {
|
||
console.log(` 📝 已处理 ${rowCount} 行...`);
|
||
}
|
||
}
|
||
});
|
||
|
||
console.log(`\n✅ 用户导入完成:${insertedCount} 条`);
|
||
|
||
// 更新所有标签的覆盖统计
|
||
console.log('🔄 更新标签统计...');
|
||
updateTagStats(db);
|
||
|
||
console.log('\n📊 数据统计:');
|
||
const stats = db.prepare(`
|
||
SELECT
|
||
(SELECT COUNT(*) FROM users) as total_users,
|
||
(SELECT COUNT(*) FROM tags) as total_tags,
|
||
(SELECT COUNT(*) FROM tag_categories) as total_categories
|
||
`).get();
|
||
|
||
console.log(` • 总用户: ${stats.total_users}`);
|
||
console.log(` • 总标签: ${stats.total_tags}`);
|
||
console.log(` • 分类数: ${stats.total_categories}`);
|
||
|
||
db.close();
|
||
|
||
console.log('\n🎉 导入流程完成!\n');
|
||
|
||
} catch (error) {
|
||
console.error('❌ 导入失败:', error.message);
|
||
console.error(error.stack);
|
||
process.exit(1);
|
||
}
|
||
}
|
||
|
||
function addUserTags(userId, values, rowNumber, getOrCreateTag, insertUserTagStmt, categoryMap) {
|
||
// 监护人身份
|
||
if (values[1]) {
|
||
const role = String(values[1]).trim();
|
||
const mapped = TAG_MAPPINGS.guardian_role[role] || role;
|
||
const tagId = getOrCreateTag('guardian_role', mapped);
|
||
if (tagId !== null && tagId !== undefined) insertUserTagStmt.run(userId, tagId);
|
||
if (rowNumber <= 5) console.log(` [行${rowNumber}] 监护人身份: "${role}" -> "${mapped}" (tagId: ${tagId})`);
|
||
}
|
||
|
||
// 文化程度
|
||
if (values[2]) {
|
||
const edu = String(values[2]).trim();
|
||
const mapped = TAG_MAPPINGS.guardian_education[edu] || edu;
|
||
const tagId = getOrCreateTag('guardian_education', mapped);
|
||
if (tagId !== null && tagId !== undefined) insertUserTagStmt.run(userId, tagId);
|
||
if (rowNumber <= 5) console.log(` [行${rowNumber}] 文化程度: "${edu}" -> "${mapped}" (tagId: ${tagId})`);
|
||
}
|
||
|
||
// 职业(分类)
|
||
if (values[3]) {
|
||
const job = String(values[3]).trim().toLowerCase();
|
||
let jobCategory = '其他';
|
||
|
||
// 简单的职业分类
|
||
if (job.includes('教师') || job.includes('医生') || job.includes('工程') || job.includes('律师')) {
|
||
jobCategory = '专业人士';
|
||
} else if (job.includes('工人') || job.includes('工厂')) {
|
||
jobCategory = '工人';
|
||
} else if (job.includes('农') || job.includes('农民') || job.includes('务农')) {
|
||
jobCategory = '农民';
|
||
} else if (job.includes('员工') || job.includes('职员') || job.includes('公务') || job.includes('干部')) {
|
||
jobCategory = '公司/政府工作人员';
|
||
} else if (job.includes('退休') || job.includes('离退休')) {
|
||
jobCategory = '退休人士';
|
||
} else if (job.includes('个体') || job.includes('自由') || job.includes('经营')) {
|
||
jobCategory = '个体户/自由职业';
|
||
} else if (job.includes('商业') || job.includes('销售')) {
|
||
jobCategory = '销售/商业';
|
||
} else if (job.includes('家')) {
|
||
jobCategory = '家务';
|
||
}
|
||
|
||
const tagId = getOrCreateTag('guardian_occupation', jobCategory);
|
||
if (tagId !== null && tagId !== undefined) insertUserTagStmt.run(userId, tagId);
|
||
if (rowNumber <= 5) console.log(` [行${rowNumber}] 职业: "${job}" -> "${jobCategory}" (tagId: ${tagId})`);
|
||
}
|
||
|
||
// 年龄分组
|
||
if (values[4]) {
|
||
const ageGroup = getAgeGroup(values[4]);
|
||
const tagId = getOrCreateTag('guardian_age_group', ageGroup);
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
|
||
// 第二监护人身份
|
||
if (values[5]) {
|
||
const role2 = String(values[5]).trim();
|
||
if (role2 && role2 !== '无' && role2 !== '/') {
|
||
const mapped = TAG_MAPPINGS.guardian_role[role2] || role2;
|
||
const tagId = getOrCreateTag('second_guardian_role', mapped);
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
}
|
||
|
||
// 孩子性别
|
||
if (values[6]) {
|
||
const gender = String(values[6]).trim();
|
||
const mapped = TAG_MAPPINGS.child_gender[gender] || gender;
|
||
const tagId = getOrCreateTag('child_gender', mapped);
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
|
||
// 孩子学段
|
||
if (values[7]) {
|
||
const segment = gradeToSegment(values[7]);
|
||
const tagId = getOrCreateTag('child_grade', segment);
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
|
||
// 学习成绩
|
||
if (values[8]) {
|
||
const scoreStr = String(values[8]).trim();
|
||
// 处理混合值
|
||
const scores = scoreStr.split(/[、,]/).map(s => s.trim()).filter(s => s && !s.includes('null'));
|
||
for (const score of scores) {
|
||
const mapped = TAG_MAPPINGS.child_academic_score[score] || score;
|
||
const tagId = getOrCreateTag('child_academic_score', mapped);
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
}
|
||
|
||
// 家庭结构(关键词提取)
|
||
if (values[9]) {
|
||
const familyStr = String(values[9]).trim();
|
||
const keywords = ['三代同堂', '核心家庭', '隔代抚养', '离异', '单亲', '三口之家', '四口之家', '多代'];
|
||
const found = new Set();
|
||
for (const kw of keywords) {
|
||
if (familyStr.includes(kw) && !found.has(kw)) {
|
||
found.add(kw);
|
||
const tagId = getOrCreateTag('family_structure', kw);
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
}
|
||
// 如果没有识别任何关键词,用原始值
|
||
if (found.size === 0) {
|
||
const tagId = getOrCreateTag('family_structure', familyStr.substring(0, 50));
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
}
|
||
|
||
// 亲子关系
|
||
if (values[10]) {
|
||
const relationship = relationshipQuality(values[10]);
|
||
const tagId = getOrCreateTag('parent_child_relationship', relationship);
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
|
||
// 教育理念一致性
|
||
if (values[11]) {
|
||
const consensus = String(values[11]).trim();
|
||
const mapped = TAG_MAPPINGS.education_consensus[consensus] || (consensus.includes('有') ? '有分歧' : '无分歧');
|
||
const tagId = getOrCreateTag('education_consensus', mapped);
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
|
||
// 是否否定孩子
|
||
if (values[12]) {
|
||
const negation = String(values[12]).trim();
|
||
const mapped = TAG_MAPPINGS.child_negation[negation] || (negation.includes('是') || negation.includes('有') ? '是' : '否');
|
||
const tagId = getOrCreateTag('child_negation', mapped);
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
|
||
// 打骂教育
|
||
if (values[13]) {
|
||
const punishment = String(values[13]).trim();
|
||
const mapped = TAG_MAPPINGS.physical_punishment[punishment] || (punishment.includes('有') ? '有' : '无');
|
||
const tagId = getOrCreateTag('physical_punishment', mapped);
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
|
||
// 孩子与父母同住
|
||
if (values[14]) {
|
||
const living = String(values[14]).trim();
|
||
// 尝试映射,如果映射失败,尝试关键字匹配
|
||
let mapped = TAG_MAPPINGS.child_living_with_parents[living];
|
||
if (!mapped) {
|
||
// 关键字匹配
|
||
if (living.includes('是') && !living.includes('不是')) {
|
||
mapped = '是';
|
||
} else if (living.includes('否') || living.includes('不是')) {
|
||
mapped = '否';
|
||
} else {
|
||
mapped = '是'; // 默认
|
||
}
|
||
}
|
||
const tagId = getOrCreateTag('child_living_with_parents', mapped);
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
|
||
// 参与养育人员 - 提取关键信息
|
||
if (values[15]) {
|
||
const caregiverStr = String(values[15]).trim();
|
||
if (caregiverStr && caregiverStr !== '无' && caregiverStr !== '没有') {
|
||
// 识别主要的养育者
|
||
let caregiver = '其他';
|
||
if (caregiverStr.includes('妈妈')) caregiver = '母亲';
|
||
else if (caregiverStr.includes('父亲') || caregiverStr.includes('爸爸')) caregiver = '父亲';
|
||
else if (caregiverStr.includes('爷爷')) caregiver = '祖父';
|
||
else if (caregiverStr.includes('奶奶')) caregiver = '祖母';
|
||
else if (caregiverStr.includes('外公')) caregiver = '外祖父';
|
||
else if (caregiverStr.includes('外婆')) caregiver = '外祖母';
|
||
else if (caregiverStr.includes('祖')) caregiver = '祖父母';
|
||
else if (caregiverStr.includes('外')) caregiver = '外祖父母';
|
||
|
||
const tagId = getOrCreateTag('child_caregivers', caregiver);
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
}
|
||
|
||
// 服务周期
|
||
if (values[17]) {
|
||
const duration = String(values[17]).trim();
|
||
const mapped = TAG_MAPPINGS.service_duration[duration] || duration;
|
||
const tagId = getOrCreateTag('service_duration', mapped);
|
||
if (tagId) insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
}
|
||
|
||
function updateTagStats(db) {
|
||
const tags = db.prepare(`SELECT id FROM tags`).all();
|
||
const totalUsers = db.prepare(`SELECT COUNT(*) as n FROM users`).get().n;
|
||
|
||
for (const tag of tags) {
|
||
const result = db.prepare(`
|
||
SELECT COUNT(*) as n FROM user_tags WHERE tag_id = ?
|
||
`).get(tag.id);
|
||
|
||
const coverage = result.n || 0;
|
||
const coverageRate = totalUsers > 0 ? (coverage / totalUsers * 100).toFixed(2) : 0;
|
||
|
||
db.prepare(`
|
||
UPDATE tags SET coverage = ?, coverage_rate = ? WHERE id = ?
|
||
`).run(coverage, coverageRate, tag.id);
|
||
}
|
||
}
|
||
|
||
importCleanData();
|