224 lines
8.1 KiB
JavaScript
224 lines
8.1 KiB
JavaScript
const { getDb } = require('../db/init');
|
|
|
|
const db = getDb('onion');
|
|
|
|
const CATEGORY_KEY = 'basic_info_role';
|
|
|
|
const RENAME_MAP = new Map([
|
|
// 妈妈系
|
|
['母', '妈妈'],
|
|
['妈', '妈妈'],
|
|
['母亲', '妈妈'],
|
|
['母 亲', '妈妈'],
|
|
['母親', '妈妈'],
|
|
['毋亲', '妈妈'],
|
|
['妈 妈', '妈妈'],
|
|
['妈吗', '妈妈'],
|
|
['妈好', '妈妈'],
|
|
['妈专', '妈妈'],
|
|
['蚂妈', '妈妈'],
|
|
['宝妈', '妈妈'],
|
|
['全职妈妈', '妈妈'],
|
|
['家庭主妇', '妈妈'],
|
|
['主妇', '妈妈'],
|
|
['家家庭主妇', '妈妈'],
|
|
['女主人', '妈妈'],
|
|
|
|
// 爸爸系
|
|
['父', '爸爸'],
|
|
['爸', '爸爸'],
|
|
['父亲', '爸爸'],
|
|
['父 亲', '爸爸'],
|
|
['孩子爸', '爸爸'],
|
|
['爸专', '爸爸'],
|
|
['爸备', '爸爸'],
|
|
|
|
// 祖辈系
|
|
['祖父', '爷爷'],
|
|
['姥爷', '外公'],
|
|
['外爷', '外公'],
|
|
['祖母', '奶奶'],
|
|
['姥姥', '外婆'],
|
|
['姥姥/外婆', '外婆'],
|
|
['外婆', '外婆'],
|
|
['婆婆', '奶奶'],
|
|
|
|
// 其他明确亲属
|
|
['姑妈', '姑姑'],
|
|
]);
|
|
|
|
// 这些值属于家庭角色中的明确亲属关系,保留即可
|
|
const KEEP_SET = new Set([
|
|
'妈妈', '爸爸', '爷爷', '奶奶', '外公', '外婆',
|
|
'姑姑', '舅舅', '姨妈', '伯娘', '继母', '妻子',
|
|
'女儿', '儿子', '姐姐', '父母', '家长', '其他监护人',
|
|
]);
|
|
|
|
// 明显不是家庭角色的噪声、描述、乱码、占位符
|
|
const DELETE_EXACT = new Set([
|
|
'上班族', '母性', '女', '主', '主妇', '全职', '母中', '母女', '母子',
|
|
'一般', '陪读', '父母', '母家', '高中', '经济', '无', '目前', '内勤',
|
|
'带娃', '白黑', '家长', '全能', '次', '普通', '好人', '主导', '主角',
|
|
'主内', '主&角初中', '初中', '文 化', '/', 'I', '13296773713',
|
|
'盛自根', '经济支柱', '经济、教育、生活是核心', '助推庭教育',
|
|
'呵护,做具体事', '教育陪伴孩子', '照孩子', '家庭主妇', '家家庭主妇',
|
|
'妈专', '妈好', '妈吗', '妈 妈', '父 亲', '妈 亲', '母 亲', '母親',
|
|
'母', '父', '爸', '孩子爸', '爸专', '爸备', '宝妈', '蚂妈', '毋亲',
|
|
'外爷', '姥爷', '祖父', '祖母', '姑妈', '婆婆', '女主人', '母亲',
|
|
]);
|
|
|
|
const DELETE_PATTERNS = [
|
|
/^\d+$/, // 数字
|
|
/^[\s\W_]+$/, // 纯符号/空白
|
|
/联系方式|电话|手机号|微信/, // 联系方式片段
|
|
/上班|内勤|经济|教育|陪伴|助推|呵护|主导|主角|全能|普通|一般|目前|无|好人|次/,
|
|
/家庭主妇|主妇|全职|陪读|带娃/,
|
|
/文化|初中|高中|白黑|盛自根/,
|
|
];
|
|
|
|
function canonicalizeName(rawName) {
|
|
const name = String(rawName || '').trim();
|
|
if (!name) return null;
|
|
if (RENAME_MAP.has(name)) return RENAME_MAP.get(name);
|
|
return name;
|
|
}
|
|
|
|
function shouldDelete(name) {
|
|
if (DELETE_EXACT.has(name)) return true;
|
|
return DELETE_PATTERNS.some((re) => re.test(name));
|
|
}
|
|
|
|
function updateStats(dbConn) {
|
|
const totalUsers = dbConn.prepare('SELECT COUNT(*) AS n FROM users').get().n || 1;
|
|
const tags = dbConn.prepare('SELECT id FROM tags').all();
|
|
const stmt = dbConn.prepare(`
|
|
UPDATE tags
|
|
SET
|
|
coverage = (SELECT COUNT(DISTINCT user_id) FROM user_tags WHERE tag_id = tags.id),
|
|
coverage_rate = ROUND((SELECT COUNT(DISTINCT user_id) FROM user_tags WHERE tag_id = tags.id) * 100.0 / ?, 2)
|
|
WHERE id = ?
|
|
`);
|
|
for (const tag of tags) stmt.run(totalUsers, tag.id);
|
|
}
|
|
|
|
function main() {
|
|
try {
|
|
const category = db.prepare('SELECT id FROM tag_categories WHERE key = ?').get(CATEGORY_KEY);
|
|
if (!category) throw new Error(`找不到分类: ${CATEGORY_KEY}`);
|
|
|
|
const catId = category.id;
|
|
const tags = db.prepare('SELECT id, name FROM tags WHERE category_id = ?').all(catId);
|
|
|
|
console.log('🧹 开始清理家庭角色噪声数据...');
|
|
console.log(`📂 当前标签数: ${tags.length}`);
|
|
|
|
let merged = 0;
|
|
let deleted = 0;
|
|
let kept = 0;
|
|
|
|
const tx = db.transaction(() => {
|
|
const getByName = db.prepare('SELECT id, name FROM tags WHERE category_id = ? AND name = ?');
|
|
const insertRel = db.prepare('INSERT OR IGNORE INTO user_tags (user_id, tag_id) VALUES (?, ?)');
|
|
const deleteRel = db.prepare('DELETE FROM user_tags WHERE tag_id = ?');
|
|
const deleteTag = db.prepare('DELETE FROM tags WHERE id = ?');
|
|
const updateTag = db.prepare('UPDATE tags SET name = ? WHERE id = ?');
|
|
|
|
for (const tag of tags) {
|
|
const originalName = String(tag.name || '').trim();
|
|
const canonicalName = canonicalizeName(originalName);
|
|
|
|
if (KEEP_SET.has(originalName)) {
|
|
kept += 1;
|
|
continue;
|
|
}
|
|
|
|
if (canonicalName && canonicalName !== originalName && KEEP_SET.has(canonicalName)) {
|
|
const target = getByName.get(catId, canonicalName);
|
|
if (target) {
|
|
// 先把关系迁移过去,再删除旧标签
|
|
db.prepare(`INSERT OR IGNORE INTO user_tags (user_id, tag_id)
|
|
SELECT user_id, ? FROM user_tags WHERE tag_id = ?`).run(target.id, tag.id);
|
|
deleteRel.run(tag.id);
|
|
deleteTag.run(tag.id);
|
|
console.log(`✅ 合并: ${originalName} -> ${canonicalName}`);
|
|
merged += 1;
|
|
}
|
|
continue;
|
|
}
|
|
|
|
// 未在保留名单中:如果是明显噪声则删除
|
|
if (shouldDelete(originalName)) {
|
|
deleteRel.run(tag.id);
|
|
deleteTag.run(tag.id);
|
|
console.log(`🗑️ 删除: ${originalName}`);
|
|
deleted += 1;
|
|
continue;
|
|
}
|
|
|
|
// 其他未明确规则的值:保守处理,保留但不改名
|
|
kept += 1;
|
|
}
|
|
|
|
// 额外处理:把一些未能通过 canonicalize 但明显可归类到妈妈/爸爸的值再扫一遍
|
|
const leftovers = db.prepare('SELECT id, name FROM tags WHERE category_id = ?').all(catId);
|
|
for (const tag of leftovers) {
|
|
const name = String(tag.name || '').trim();
|
|
if (KEEP_SET.has(name)) continue;
|
|
if (/妈|母|宝妈/.test(name)) {
|
|
const target = getByName.get(catId, '妈妈');
|
|
if (target && target.id !== tag.id) {
|
|
db.prepare(`INSERT OR IGNORE INTO user_tags (user_id, tag_id)
|
|
SELECT user_id, ? FROM user_tags WHERE tag_id = ?`).run(target.id, tag.id);
|
|
deleteRel.run(tag.id);
|
|
deleteTag.run(tag.id);
|
|
console.log(`✅ 合并(兜底): ${name} -> 妈妈`);
|
|
merged += 1;
|
|
continue;
|
|
}
|
|
}
|
|
if (/爸|父|孩子爸/.test(name)) {
|
|
const target = getByName.get(catId, '爸爸');
|
|
if (target && target.id !== tag.id) {
|
|
db.prepare(`INSERT OR IGNORE INTO user_tags (user_id, tag_id)
|
|
SELECT user_id, ? FROM user_tags WHERE tag_id = ?`).run(target.id, tag.id);
|
|
deleteRel.run(tag.id);
|
|
deleteTag.run(tag.id);
|
|
console.log(`✅ 合并(兜底): ${name} -> 爸爸`);
|
|
merged += 1;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
});
|
|
|
|
tx();
|
|
updateStats(db);
|
|
|
|
const stats = db.prepare(`
|
|
SELECT
|
|
(SELECT COUNT(*) FROM tags t JOIN tag_categories c ON c.id=t.category_id WHERE c.key = ?) AS tag_count,
|
|
(SELECT COUNT(*) FROM user_tags) AS rel_count,
|
|
(SELECT COUNT(*) FROM tags WHERE name = '妈妈') AS mom_count,
|
|
(SELECT COUNT(*) FROM tags WHERE name = '爸爸') AS dad_count,
|
|
(SELECT COUNT(*) FROM tags WHERE name = '爷爷') AS grandpa_count,
|
|
(SELECT COUNT(*) FROM tags WHERE name = '外公') AS mgp_count,
|
|
(SELECT COUNT(*) FROM tags WHERE name = '外婆') AS mgm_count
|
|
`).get(CATEGORY_KEY);
|
|
|
|
console.log('\n✨ 清理完成');
|
|
console.log(` • 合并: ${merged}`);
|
|
console.log(` • 删除: ${deleted}`);
|
|
console.log(` • 保留(未改名): ${kept}`);
|
|
console.log(` • 家庭角色标签剩余: ${stats.tag_count}`);
|
|
console.log(` • 妈妈/爸爸/爷爷/外公/外婆 计数: ${stats.mom_count}/${stats.dad_count}/${stats.grandpa_count}/${stats.mgp_count}/${stats.mgm_count}`);
|
|
|
|
db.close();
|
|
} catch (error) {
|
|
console.error('❌ 清理失败:', error);
|
|
try { db.close(); } catch (_) {}
|
|
process.exit(1);
|
|
}
|
|
}
|
|
|
|
main();
|