415 lines
11 KiB
JavaScript
415 lines
11 KiB
JavaScript
/**
|
||
* Excel 数据导入脚本 v2
|
||
* 将"家庭教育档案-天数.xlsx"中的完整数据导入到数据库
|
||
* 支持多维度标签分类
|
||
*
|
||
* 用法: node scripts/import-excel.js [path/to/file.xlsx]
|
||
*/
|
||
|
||
const ExcelJS = require('exceljs');
|
||
const path = require('path');
|
||
const { getDb, initializeDatabase } = require('../db/init');
|
||
|
||
const EXCEL_FILE = process.argv[2] || path.join(__dirname, '../家庭教育档案-天数.xlsx');
|
||
|
||
// ────────────────────────────────────
|
||
// 标签分类定义
|
||
// ────────────────────────────────────
|
||
const TAG_CATEGORIES = [
|
||
// 1. 监护人信息
|
||
{
|
||
key: 'guardian_role',
|
||
name: '监护人身份',
|
||
color: '#3b82f6',
|
||
column: 3 // C: 家庭角色
|
||
},
|
||
{
|
||
key: 'guardian_education',
|
||
name: '监护人文化程度',
|
||
color: '#8b5cf6',
|
||
column: 4 // D: 文化程度
|
||
},
|
||
{
|
||
key: 'guardian1_personality',
|
||
name: '监护人1性格特征',
|
||
color: '#a78bfa',
|
||
column: 7 // G: 性格特征
|
||
},
|
||
{
|
||
key: 'guardian2_personality',
|
||
name: '监护人2性格特征',
|
||
color: '#c084fc',
|
||
column: 14 // N: 性格特征_2
|
||
},
|
||
|
||
// 2. 孩子信息
|
||
{
|
||
key: 'child_gender',
|
||
name: '孩子性别',
|
||
color: '#ec4899',
|
||
column: 17 // Q: 性别
|
||
},
|
||
{
|
||
key: 'child_personality',
|
||
name: '孩子性格特征',
|
||
color: '#f472b6',
|
||
column: 20 // T: 孩子性格特征
|
||
},
|
||
{
|
||
key: 'child_score',
|
||
name: '孩子学习成绩',
|
||
color: '#f59e0b',
|
||
column: 21 // U: 学习成绩
|
||
},
|
||
|
||
// 3. 家庭情况
|
||
{
|
||
key: 'family_structure',
|
||
name: '家庭基本情况',
|
||
color: '#06b6d4',
|
||
column: 23 // W: 家庭基本情况(含"三代同堂"等)
|
||
},
|
||
{
|
||
key: 'family_atmosphere',
|
||
name: '家庭氛围',
|
||
color: '#10b981',
|
||
column: 24 // X: 家庭氛围
|
||
},
|
||
{
|
||
key: 'parent_child_relation',
|
||
name: '亲子关系',
|
||
color: '#6366f1',
|
||
column: 25 // Y: 亲子关系
|
||
},
|
||
|
||
// 4. 教育行为
|
||
{
|
||
key: 'education_conflict',
|
||
name: '教育理念一致性',
|
||
column: 26 // Z: 家长有无教育分歧
|
||
},
|
||
{
|
||
key: 'child_negation',
|
||
name: '否定现象',
|
||
column: 27 // AA: 是否经常否定孩子
|
||
},
|
||
{
|
||
key: 'physical_punishment',
|
||
name: '纪律方式',
|
||
column: 28 // AB: 有无打骂教育
|
||
},
|
||
{
|
||
key: 'child_with_parents',
|
||
name: '亲子陪伴',
|
||
column: 29 // AC: 孩子是否在父母身边长大
|
||
},
|
||
|
||
// 5. 指导周期
|
||
{
|
||
key: 'duration',
|
||
name: '指导周期',
|
||
color: '#ef4444',
|
||
column: 38 // AL: 天数
|
||
}
|
||
];
|
||
|
||
// 标签值映射(将Excel值转化为标签)
|
||
const TAG_VALUE_MAP = {
|
||
'guardian_role': {
|
||
'母亲': '母亲',
|
||
'妈妈': '母亲',
|
||
'母': '母亲',
|
||
'父亲': '父亲',
|
||
'爸爸': '父亲',
|
||
'奶奶': '奶奶',
|
||
'爷爷': '爷爷',
|
||
'外婆': '外婆',
|
||
'外公': '外公',
|
||
'姥姥': '外婆',
|
||
'姥爷': '外公',
|
||
'祖母': '奶奶',
|
||
'大姐': '成年子女',
|
||
'舅舅': '其他亲属',
|
||
'妻子': '配偶'
|
||
},
|
||
'guardian_education': {
|
||
'初中': '初中',
|
||
'初小': '小学',
|
||
'小学': '小学',
|
||
'中师': '中专',
|
||
'中专': '中专',
|
||
'高中': '高中',
|
||
'大专': '大专',
|
||
'大学': '本科',
|
||
'本科': '本科',
|
||
'大学本科': '本科',
|
||
'硕士': '硕士',
|
||
'研究生': '硕士',
|
||
'在职研究生': '硕士'
|
||
},
|
||
'child_gender': {
|
||
'女': '女孩',
|
||
'男': '男孩',
|
||
'女、男': '双胞胎'
|
||
},
|
||
'child_score': {
|
||
'优秀': '优秀',
|
||
'良好': '良好',
|
||
'一般': '一般',
|
||
'差': '较差',
|
||
'较差': '较差',
|
||
'A': '优秀',
|
||
'B': '良好',
|
||
'C': '一般',
|
||
'D': '较差'
|
||
},
|
||
'duration': {
|
||
'60天': '60天课程',
|
||
'180天': '180天课程',
|
||
'90天': '90天课程',
|
||
'365天': '365天课程'
|
||
}
|
||
};
|
||
|
||
// 需要进行关键词提取的字段
|
||
const KEYWORD_EXTRACTION_FIELDS = {
|
||
'family_structure': {
|
||
column: 22,
|
||
keywords: ['三代同堂', '四口之家', '三口之家', '单亲', '离异', '隔代抚养', '二代', '三代']
|
||
}
|
||
};
|
||
|
||
async function importExcelData() {
|
||
try {
|
||
console.log(`\n📂 读取 Excel 文件: ${EXCEL_FILE}`);
|
||
|
||
const workbook = new ExcelJS.Workbook();
|
||
await workbook.xlsx.readFile(EXCEL_FILE);
|
||
|
||
const worksheet = workbook.getWorksheet(1);
|
||
if (!worksheet) {
|
||
throw new Error('找不到工作表');
|
||
}
|
||
|
||
console.log(`📊 总行数: ${worksheet.rowCount}`);
|
||
|
||
const db = getDb('onion');
|
||
|
||
// 初始化数据库
|
||
initializeDatabase('onion');
|
||
|
||
// 创建所有标签分类
|
||
console.log('🏗️ 建立分类体系...');
|
||
const categoryMap = {};
|
||
for (const cat of TAG_CATEGORIES) {
|
||
const result = db.prepare(`
|
||
INSERT OR IGNORE INTO tag_categories (key, name, sort_order, color)
|
||
VALUES (?, ?, ?, ?)
|
||
`).run(cat.key, cat.name, 0, cat.color || '#6366f1');
|
||
|
||
const catRecord = db.prepare(`
|
||
SELECT id FROM tag_categories WHERE key = ?
|
||
`).get(cat.key);
|
||
categoryMap[cat.key] = catRecord.id;
|
||
}
|
||
|
||
console.log(`✅ 创建了 ${Object.keys(categoryMap).length} 个分类`);
|
||
|
||
// 处理数据行
|
||
let insertedCount = 0;
|
||
const insertUserStmt = db.prepare(`
|
||
INSERT OR IGNORE INTO users (uid, name, extra_json)
|
||
VALUES (?, ?, ?)
|
||
`);
|
||
|
||
const insertUserTagStmt = db.prepare(`
|
||
INSERT OR IGNORE INTO user_tags (user_id, tag_id)
|
||
VALUES (?, ?)
|
||
`);
|
||
|
||
// 获取事先创建的标签ID映射
|
||
const tagCache = {};
|
||
|
||
function getOrCreateTag(catKey, tagName) {
|
||
if (!tagName || !catKey) return null;
|
||
|
||
const cacheKey = `${catKey}:${tagName}`;
|
||
if (tagCache[cacheKey]) return tagCache[cacheKey];
|
||
|
||
// 生成唯一的key - 对于长文本(性格特征)使用简化版本
|
||
let tagKey;
|
||
const isPersonality = catKey.includes('personality');
|
||
|
||
if (isPersonality && tagName.length > 30) {
|
||
// 对于长的性格特征,使用简化的标识符
|
||
// 使用前20个字符 + 长度id
|
||
const simplified = tagName.substring(0, 20).toLowerCase().replace(/\s+/g, '_').replace(/[^\w]/g, '');
|
||
const hash = require('crypto').createHash('md5').update(tagName).digest('hex').substring(0, 8);
|
||
tagKey = `${catKey}_${simplified}_${hash}`;
|
||
} else {
|
||
// 对于其他标签,使用原有方法
|
||
tagKey = `${catKey}_${tagName.toLowerCase().replace(/\s+/g, '_').replace(/[^\w]/g, '')}`;
|
||
}
|
||
|
||
const stmt = db.prepare(`
|
||
SELECT id FROM tags WHERE key = ?
|
||
`);
|
||
let tag = stmt.get(tagKey);
|
||
|
||
if (!tag) {
|
||
// 创建新标签
|
||
db.prepare(`
|
||
INSERT INTO tags (key, name, category_id, sort_order)
|
||
VALUES (?, ?, ?, ?)
|
||
`).run(tagKey, tagName, categoryMap[catKey], 0);
|
||
|
||
tag = stmt.get(tagKey);
|
||
}
|
||
|
||
tagCache[cacheKey] = tag?.id;
|
||
return tag?.id;
|
||
}
|
||
|
||
// 遍历Excel数据行
|
||
let rowCount = 0;
|
||
worksheet.eachRow((row, rowNumber) => {
|
||
if (rowNumber === 1) return; // 跳过表头
|
||
|
||
rowCount++;
|
||
const values = row.values || [];
|
||
|
||
// 提取基本信息
|
||
const fileName = values[1]; // 文件名称
|
||
const childName = values[16]; // 孩子姓名
|
||
|
||
if (!fileName) {
|
||
console.warn(`⚠️ 行 ${rowNumber} 缺少文件名,跳过`);
|
||
return;
|
||
}
|
||
|
||
// 构建用户额外数据
|
||
const extraData = {
|
||
fileName: fileName,
|
||
childName: childName || '',
|
||
guardian1Name: values[2],
|
||
childAge: values[17],
|
||
grade: values[19],
|
||
learningScore: values[21],
|
||
familyAddress: values[23],
|
||
questionnaireSummary: values[37],
|
||
};
|
||
|
||
// 插入用户
|
||
const result = insertUserStmt.run(fileName, childName || fileName, JSON.stringify(extraData));
|
||
|
||
if (result.changes > 0) {
|
||
insertedCount++;
|
||
const userId = result.lastInsertRowid;
|
||
|
||
// 为用户添加标签
|
||
addUserTags(userId, values, rowNumber, getOrCreateTag, insertUserTagStmt);
|
||
|
||
if (rowCount % 30 === 0) {
|
||
console.log(` 📝 已处理 ${rowCount} 行...`);
|
||
}
|
||
}
|
||
});
|
||
|
||
console.log(`\n✅ 用户导入完成:${insertedCount} 条`);
|
||
|
||
// 更新所有标签的覆盖统计
|
||
console.log('🔄 更新标签统计...');
|
||
updateTagStats(db);
|
||
|
||
console.log('\n📊 数据统计:');
|
||
const stats = db.prepare(`
|
||
SELECT
|
||
(SELECT COUNT(*) FROM users) as total_users,
|
||
(SELECT COUNT(*) FROM tags) as total_tags,
|
||
(SELECT COUNT(*) FROM tag_categories) as total_categories
|
||
`).get();
|
||
|
||
console.log(` • 总用户: ${stats.total_users}`);
|
||
console.log(` • 总标签: ${stats.total_tags}`);
|
||
console.log(` • 分类数: ${stats.total_categories}`);
|
||
|
||
db.close();
|
||
|
||
console.log('\n🎉 导入流程完成!\n');
|
||
|
||
} catch (error) {
|
||
console.error('❌ 导入失败:', error.message);
|
||
console.error(error.stack);
|
||
process.exit(1);
|
||
}
|
||
}
|
||
|
||
function addUserTags(userId, values, rowNumber, getOrCreateTag, insertUserTagStmt) {
|
||
for (const cat of TAG_CATEGORIES) {
|
||
const colIdx = cat.column;
|
||
if (colIdx >= values.length) continue;
|
||
|
||
let value = values[colIdx];
|
||
if (!value) continue;
|
||
|
||
value = String(value).trim();
|
||
|
||
// 特殊处理学习成绩的混合值(分解"优秀、良好"为两个标签)
|
||
if (cat.key === 'child_score' && value.includes('、')) {
|
||
const scores = value.split('、').map(s => s.trim());
|
||
for (const score of scores) {
|
||
const mapped = TAG_VALUE_MAP[cat.key]?.[score] || score;
|
||
const tagId = getOrCreateTag(cat.key, mapped);
|
||
if (tagId) {
|
||
insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
}
|
||
continue;
|
||
}
|
||
|
||
// 处理值映射
|
||
if (TAG_VALUE_MAP[cat.key] && TAG_VALUE_MAP[cat.key][value]) {
|
||
value = TAG_VALUE_MAP[cat.key][value];
|
||
}
|
||
|
||
// 获取或创建标签
|
||
const tagId = getOrCreateTag(cat.key, value);
|
||
if (tagId) {
|
||
insertUserTagStmt.run(userId, tagId);
|
||
}
|
||
|
||
// 处理关键词提取
|
||
if (KEYWORD_EXTRACTION_FIELDS[cat.key]) {
|
||
const keywords = KEYWORD_EXTRACTION_FIELDS[cat.key].keywords;
|
||
for (const keyword of keywords) {
|
||
if (value.includes(keyword)) {
|
||
const kwTagId = getOrCreateTag(cat.key, keyword);
|
||
if (kwTagId) {
|
||
insertUserTagStmt.run(userId, kwTagId);
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
function updateTagStats(db) {
|
||
const tags = db.prepare(`SELECT id FROM tags`).all();
|
||
const totalUsers = db.prepare(`SELECT COUNT(*) as n FROM users`).get().n;
|
||
|
||
for (const tag of tags) {
|
||
const result = db.prepare(`
|
||
SELECT COUNT(*) as n FROM user_tags WHERE tag_id = ?
|
||
`).get(tag.id);
|
||
|
||
const coverage = result.n || 0;
|
||
const coverageRate = totalUsers > 0 ? (coverage / totalUsers * 100).toFixed(2) : 0;
|
||
|
||
db.prepare(`
|
||
UPDATE tags SET coverage = ?, coverage_rate = ? WHERE id = ?
|
||
`).run(coverage, coverageRate, tag.id);
|
||
}
|
||
}
|
||
|
||
importExcelData();
|