/** * 数据种子 — 洋葱客户大数据标签系统 (K12教育/家长画像) * * 目标受众:初中、高中学生的家长,关注家庭结构、学业情况、消费能力等。 */ const { getDb, initializeDatabase } = require('./init'); // ============================================= // 洋葱客户标签体系定义 // ============================================= const TAG_SYSTEM = [ { key: 'parent_identity', name: '家庭角色', color: '#6366f1', tags: [ { key: 'pi_mom', name: '母亲主导', desc: '主要由母亲参与管理' }, { key: 'pi_dad', name: '父亲主导', desc: '主要由父亲参与管理' }, { key: 'pi_both', name: '双亲共育', desc: '父母共同活跃参与' }, { key: 'pi_single', name: '单亲家庭', desc: '系统标识为单亲状态' }, { key: 'pi_grand', name: '隔代参与', desc: '祖辈有关联操作或代付' }, ] }, { key: 'city_level', name: '所在城市', color: '#8b5cf6', tags: [ { key: 'ct_tier1', name: '一线城市', desc: '北上广深' }, { key: 'ct_new_tier1', name: '新一线', desc: '杭州/成都/武汉等15城' }, { key: 'ct_tier2', name: '二线城市', desc: '省会及副省级城市' }, { key: 'ct_tier3', name: '三线及以下', desc: '地级市及县城下沉市场' }, { key: 'ct_overseas', name: '海外及港澳台', desc: '非大陆地区访问' }, ] }, { key: 'income', name: '家庭月收入', color: '#a855f7', tags: [ { key: 'inc_high', name: '高收入 (>5w)', desc: '家庭月收入5万以上' }, { key: 'inc_mid_high', name: '中高 (2w-5w)', desc: '家庭月收入2万至5万' }, { key: 'inc_mid', name: '中等 (1w-2w)', desc: '家庭月收入1万至2万' }, { key: 'inc_low', name: '偏低 (<1w)', desc: '家庭月收入1万以下' }, ] }, { key: 'child_count', name: '子女数量', color: '#ec4899', tags: [ { key: 'cc_one', name: '独生子女', desc: '仅有一个孩子注册' }, { key: 'cc_two', name: '二胎家庭', desc: '绑定两个孩子' }, { key: 'cc_multi', name: '三胎及以上', desc: '绑定三个及以上孩子' }, { key: 'cc_cross', name: '跨学段多孩', desc: '多个孩子处于不同学段' }, ] }, { key: 'child_stage', name: '孩子学段', color: '#f59e0b', tags: [ { key: 'cs_mid', name: '初中阶段', desc: '处于初一至初三年级' }, { key: 'cs_high', name: '高中阶段', desc: '处于高一至高三年级' }, { key: 'cs_transition', name: '小升初/初升高', desc: '处于升学接轨期' }, ] }, { key: 'child_grade', name: '具体年级', color: '#f97316', tags: [ { key: 'cg_mid1', name: '初一 (7年级)', desc: '初一年级' }, { key: 'cg_mid2', name: '初二 (8年级)', desc: '初二年级' }, { key: 'cg_mid3', name: '初三 (9年级)', desc: '中考备战期' }, { key: 'cg_high1', name: '高一 (10年级)', desc: '高一年级' }, { key: 'cg_high2', name: '高二 (11年级)', desc: '高二分班/学考期' }, { key: 'cg_high3', name: '高三 (12年级)', desc: '高考冲刺期' }, ] }, { key: 'study_pref', name: '学习偏好', color: '#ef4444', tags: [ { key: 'sp_top', name: '培优拔高', desc: '注重竞赛、难题突破' }, { key: 'sp_base', name: '基础巩固', desc: '注重课内知识达标' }, { key: 'sp_art', name: '艺体生', desc: '艺术/体育专业方向考学' }, { key: 'sp_abroad', name: '出国留学', desc: '有国际路线意向' }, { key: 'sp_self', name: '自主探究', desc: '孩子主动学习能力强' }, ] }, { key: 'subject_weak', name: '薄弱学科', color: '#14b8a6', tags: [ { key: 'sw_math', name: '数学薄弱', desc: '数学经常低于平均分' }, { key: 'sw_english', name: '英语薄弱', desc: '英语单词/听力为短板' }, { key: 'sw_science', name: '理综薄弱', desc: '物理/化学跨学科困难' }, { key: 'sw_arts', name: '文综薄弱', desc: '政史地背诵/理解困难' }, { key: 'sw_chinese', name: '语文薄弱', desc: '阅读理解/作文得分低' }, ] }, { key: 'school_type', name: '学校类型', color: '#22c55e', tags: [ { key: 'st_key', name: '重点/示范校', desc: '省/市级重点中学' }, { key: 'st_normal', name: '普通公办', desc: '常规公理中学' }, { key: 'st_private', name: '私立/民办', desc: '高收费民办学校' }, { key: 'st_intl', name: '国际学校', desc: '双语或国际课程学校' }, { key: 'st_town', name: '乡镇/县域', desc: '非市区下沉学校' }, ] }, { key: 'parent_job', name: '家长职业', color: '#3b82f6', tags: [ { key: 'pj_gov', name: '体制内/国企', desc: '公务员、教师、医生等' }, { key: 'pj_corp', name: '企业白领/高管', desc: '外企、大厂、管理层' }, { key: 'pj_biz', name: '个体/私营', desc: '企业主、商户' }, { key: 'pj_free', name: '自由职业', desc: '弹性工作制' }, { key: 'pj_fulltime', name: '全职妈妈', desc: '脱产带娃' }, { key: 'pj_worker', name: '蓝领/基层', desc: '制造业、服务业基层' }, ] }, { key: 'engagement', name: '活跃特征', color: '#06b6d4', tags: [ { key: 'eng_active_daily',name: '日活用户', desc: '每日登录做题/检查' }, { key: 'eng_weekend', name: '周末活跃', desc: '集中在周末使用' }, { key: 'eng_exam', name: '考前突击', desc: '期中/期末活跃度飙升' }, { key: 'eng_dormant', name: '沉默用户', desc: '超过30天未登录' }, { key: 'eng_paid', name: '付费会员', desc: '购买了长期课程/资料' }, ] }, { key: 'device', name: '设备信息', color: '#64748b', tags: [ { key: 'dv_ios', name: 'iOS 主导', desc: '主要用 iPhone/iPad' }, { key: 'dv_android', name: 'Android 主导', desc: '主要用安卓设备' }, { key: 'dv_pad', name: '平板活跃', desc: '大量时间在Pad上学习' }, { key: 'dv_pc', name: 'PC/网页端', desc: '常用电脑宽屏上课' }, ] }, ]; // ============================================= // 数据生成 // ============================================= function random(min, max) { return Math.floor(Math.random() * (max - min + 1)) + min; } function pick(arr) { return arr[Math.floor(Math.random() * arr.length)]; } function weightedPick(options) { const total = options.reduce((s, o) => s + o.weight, 0); let r = Math.random() * total; for (const o of options) { r -= o.weight; if (r <= 0) return o.value; } return options[options.length - 1].value; } function seedData() { initializeDatabase('onion'); const db = getDb('onion'); console.log('🏗️ 开始生成 洋葱客户大数据 模拟数据...\n'); const USER_COUNT = 50_000; // 用户要求 5 万数据 // === Step 1: 创建标签分类和标签 === console.log('📌 Step 1: 创建标签体系...'); const insertCat = db.prepare( 'INSERT INTO tag_categories (key, name, sort_order, color) VALUES (?, ?, ?, ?)' ); const insertTag = db.prepare( 'INSERT INTO tags (key, name, category_id, description, sort_order) VALUES (?, ?, ?, ?, ?)' ); const tagMap = {}; let totalTags = 0; const txTags = db.transaction(() => { TAG_SYSTEM.forEach((cat, ci) => { const catRes = insertCat.run(cat.key, cat.name, ci, cat.color); const catId = catRes.lastInsertRowid; cat.tags.forEach((tag, ti) => { const tagRes = insertTag.run(tag.key, tag.name, catId, tag.desc, ti); tagMap[tag.key] = { id: Number(tagRes.lastInsertRowid), catKey: cat.key }; totalTags++; }); }); }); txTags(); console.log(` ✅ ${TAG_SYSTEM.length} 个分类,${totalTags} 个标签\n`); // === Step 2: 生成用户 === console.log(`👥 Step 2: 生成 ${USER_COUNT.toLocaleString()} 个家长/学生用户...`); const insertUser = db.prepare('INSERT INTO users (uid, name, email) VALUES (?, ?, ?)'); const insertUserTag = db.prepare('INSERT OR IGNORE INTO user_tags (user_id, tag_id) VALUES (?, ?)'); const BATCH = 5000; let tagAssignments = 0; for (let batch = 0; batch < USER_COUNT / BATCH; batch++) { const tx = db.transaction(() => { for (let i = 0; i < BATCH; i++) { const idx = batch * BATCH + i + 1; const uid = `u_${String(idx).padStart(7, '0')}`; const userRes = insertUser.run(uid, `家长 ${idx}`, `parent${idx}@onion.example.com`); const userId = userRes.lastInsertRowid; const userTags = generateUserTags(); for (const tagKey of userTags) { if (tagMap[tagKey]) { insertUserTag.run(userId, tagMap[tagKey].id); tagAssignments++; } } } }); tx(); if ((batch + 1) % 2 === 0 || batch === 0) { process.stdout.write(` 进度: ${((batch + 1) * BATCH).toLocaleString()}/${USER_COUNT.toLocaleString()}\n`); } } console.log(`\n ✅ ${USER_COUNT.toLocaleString()} 个用户,${tagAssignments.toLocaleString()} 个标签关联\n`); // === Step 3: 更新标签统计 === console.log('📈 Step 3: 统计标签覆盖...'); db.exec(` UPDATE tags SET coverage = (SELECT COUNT(*) FROM user_tags WHERE tag_id = tags.id), coverage_rate = ROUND((SELECT COUNT(*) FROM user_tags WHERE tag_id = tags.id) * 100.0 / ${USER_COUNT}, 2) `); // 生成趋势数据(模拟 ±5 以内) db.prepare('UPDATE tags SET trend = ? WHERE id = ?').bind(0, 0); const allTags = db.prepare('SELECT id FROM tags').all(); const updateTrend = db.prepare('UPDATE tags SET trend = ? WHERE id = ?'); const txTrend = db.transaction(() => { for (const t of allTags) { updateTrend.run(Number((Math.random() * 10 - 3).toFixed(2)), t.id); } }); txTrend(); console.log(' ✅ 统计完成\n'); console.log('🎉 数据生成完毕!可启动 server.js'); db.close(); } // ============================================= // 标签分配逻辑(相关性构建) // ============================================= function generateUserTags() { const tags = []; // 家庭角色 const role = weightedPick([ { value: 'pi_mom', weight: 60 }, { value: 'pi_dad', weight: 20 }, { value: 'pi_both', weight: 12 }, { value: 'pi_single', weight: 5 }, { value: 'pi_grand', weight: 3 }, ]); tags.push(role); // 城市线级 const city = weightedPick([ { value: 'ct_tier1', weight: 15 }, { value: 'ct_new_tier1', weight: 25 }, { value: 'ct_tier2', weight: 30 }, { value: 'ct_tier3', weight: 28 }, { value: 'ct_overseas', weight: 2 }, ]); tags.push(city); // 收入分布与城市强相关 let income; if (city === 'ct_tier1' || city === 'ct_overseas') { income = weightedPick([{value:'inc_high',w:30}, {value:'inc_mid_high',w:40}, {value:'inc_mid',w:20}, {value:'inc_low',w:10}]); } else if (city === 'ct_tier3') { income = weightedPick([{value:'inc_high',w:5}, {value:'inc_mid_high',w:15}, {value:'inc_mid',w:40}, {value:'inc_low',w:40}]); } else { income = weightedPick([{value:'inc_high',w:10}, {value:'inc_mid_high',w:30}, {value:'inc_mid',w:40}, {value:'inc_low',w:20}]); } tags.push(income.value || income); // Handle object parsing if needed from previous logic, wait obj is {value, w}, let's fix weightedPick logic for inline // Re-define for scope safety: const getIncome = (city) => { if (city === 'ct_tier1' || city === 'ct_overseas') return weightedPick([{value:'inc_high',weight:30}, {value:'inc_mid_high',weight:40}, {value:'inc_mid',weight:20}, {value:'inc_low',weight:10}]); if (city === 'ct_tier3') return weightedPick([{value:'inc_high',weight:5}, {value:'inc_mid_high',weight:15}, {value:'inc_mid',weight:40}, {value:'inc_low',weight:40}]); return weightedPick([{value:'inc_high',weight:10}, {value:'inc_mid_high',weight:30}, {value:'inc_mid',weight:40}, {value:'inc_low',weight:20}]); } const actualIncome = getIncome(city); tags.push(actualIncome); // 子女数量 const childCount = weightedPick([ { value: 'cc_one', weight: 55 }, { value: 'cc_two', weight: 40 }, { value: 'cc_multi', weight: 5 }, ]); tags.push(childCount); if (childCount === 'cc_two' || childCount === 'cc_multi') { if (Math.random() < 0.3) tags.push('cc_cross'); } // 学段及年级 const stage = weightedPick([ { value: 'cs_mid', weight: 60 }, { value: 'cs_high', weight: 40 }, ]); tags.push(stage); if (stage === 'cs_mid') { tags.push(weightedPick([{ value: 'cg_mid1', weight: 35 }, { value: 'cg_mid2', weight: 35 }, { value: 'cg_mid3', weight: 30 }])); } else { tags.push(weightedPick([{ value: 'cg_high1', weight: 40 }, { value: 'cg_high2', weight: 35 }, { value: 'cg_high3', weight: 25 }])); } // 多孩家庭大概率增加另一个年级标签跨界 if ((childCount === 'cc_two' || childCount === 'cc_multi') && Math.random() < 0.7) { tags.push(pick(['cs_mid', 'cs_high'])); tags.push(pick(['cg_mid1', 'cg_mid2', 'cg_mid3', 'cg_high1', 'cg_high2', 'cg_high3'])); } if (Math.random() < 0.25) tags.push('cs_transition'); const prefWeights = []; if (actualIncome === 'inc_high' || city === 'ct_tier1') { prefWeights.push({ value: 'sp_top', weight: 30 }, { value: 'sp_abroad', weight: 20 }, { value: 'sp_base', weight: 30 }, { value: 'sp_self', weight: 15 }, { value: 'sp_art', weight: 5 }); } else { prefWeights.push({ value: 'sp_base', weight: 60 }, { value: 'sp_top', weight: 15 }, { value: 'sp_self', weight: 15 }, { value: 'sp_art', weight: 8 }, { value: 'sp_abroad', weight: 2 }); } tags.push(weightedPick(prefWeights)); if (Math.random() < 0.6) tags.push(pick(['sp_base', 'sp_self', 'sp_top'])); // 薄弱学科 (多选2-3个以提高自然重合度) tags.push(weightedPick([{ value: 'sw_math', weight: 35 }, { value: 'sw_english', weight: 25 }, { value: 'sw_science', weight: 20 }, { value: 'sw_arts', weight: 10 }, { value: 'sw_chinese', weight: 10 }])); if (Math.random() < 0.7) tags.push(pick(['sw_math', 'sw_english', 'sw_science', 'sw_arts'])); if (Math.random() < 0.3) tags.push(pick(['sw_chinese', 'sw_arts', 'sw_science'])); // 学校类型 if (city === 'ct_tier3') { tags.push(weightedPick([{value:'st_normal',weight:50}, {value:'st_town',weight:40}, {value:'st_key',weight:10}])); } else { const stWeights = [{value:'st_key',weight:30}, {value:'st_normal',weight:50}, {value:'st_private',weight:15}]; if (actualIncome === 'inc_high') stWeights.push({value:'st_intl',weight:15}); tags.push(weightedPick(stWeights)); } // 家长职业 (可能父母职业不同,有概率选出两个) if (role === 'pi_mom' && Math.random() < 0.2) { tags.push('pj_fulltime'); } else { tags.push(weightedPick([ { value: 'pj_gov', weight: 25 }, { value: 'pj_corp', weight: 30 }, { value: 'pj_biz', weight: 15 }, { value: 'pj_free', weight: 10 }, { value: 'pj_worker', weight: 20 } ])); } if (Math.random() < 0.4) { tags.push(pick(['pj_gov', 'pj_corp', 'pj_biz', 'pj_free'])); } // 活跃特征 - 基于用户画像智能分配(改进版) let engWeights; // 高收入 + 培优拔高 → 大概率日活 if ((actualIncome === 'inc_high' || actualIncome === 'inc_mid_high') && tags.includes('sp_top')) { engWeights = [ { value: 'eng_active_daily', weight: 40 }, { value: 'eng_weekend', weight: 30 }, { value: 'eng_exam', weight: 20 }, { value: 'eng_dormant', weight: 10 } ]; } // 全职妈妈 → 高概率日活 else if (tags.includes('pj_fulltime')) { engWeights = [ { value: 'eng_active_daily', weight: 50 }, { value: 'eng_weekend', weight: 25 }, { value: 'eng_exam', weight: 15 }, { value: 'eng_dormant', weight: 10 } ]; } // 体制内/国企 → 中等日活概率 else if (tags.includes('pj_gov')) { engWeights = [ { value: 'eng_active_daily', weight: 30 }, { value: 'eng_weekend', weight: 35 }, { value: 'eng_exam', weight: 25 }, { value: 'eng_dormant', weight: 10 } ]; } // 高收入用户整体活跃度高 else if (actualIncome === 'inc_high') { engWeights = [ { value: 'eng_active_daily', weight: 35 }, { value: 'eng_weekend', weight: 30 }, { value: 'eng_exam', weight: 25 }, { value: 'eng_dormant', weight: 10 } ]; } // 其他情况 else { engWeights = [ { value: 'eng_active_daily', weight: 20 }, { value: 'eng_weekend', weight: 35 }, { value: 'eng_exam', weight: 30 }, { value: 'eng_dormant', weight: 15 } ]; } tags.push(weightedPick(engWeights)); if (Math.random() < 0.4) tags.push('eng_exam'); // 付费会员:高收入、培优拔高、日活用户更可能付费 if (actualIncome === 'inc_high' || tags.includes('sp_top') || tags.includes('eng_active_daily')) { if (Math.random() < 0.5) tags.push('eng_paid'); } else if (Math.random() < 0.15) { tags.push('eng_paid'); } // 设备 (跨设备活跃十分常见) tags.push(weightedPick([{ value: 'dv_ios', weight: 40 }, { value: 'dv_android', weight: 50 }, { value: 'dv_pc', weight: 10 }])); if (Math.random() < 0.5) tags.push('dv_pad'); if (Math.random() < 0.3) tags.push('dv_pc'); if (Math.random() < 0.3) tags.push(pick(['dv_ios', 'dv_android'])); // 去重 (防止 push 重复 tag 导致 SQLite Unique 报错虽被 IGNORE,但尽量在内存中干净) return [...new Set(tags)]; } seedData();