chore(crawler): 移除 BossCrawler 中调试打印代码
- 删除了打印 geek_data 和 geek_card 属性的调试语句 - 移除了候选人 name 和 source_id 解析时的调试输出 chore(ingestion): 去除数据验证失败时的调试打印 - 删除了验证失败后打印原始数据和标准化数据的调试信息 - 精简验证失败的日志输出,保持代码简洁
This commit is contained in:
@@ -138,13 +138,6 @@ class BossCrawler(BaseCrawler):
|
|||||||
# 获取 geekCard(Boss SDK 的数据通常在 geekCard 中)
|
# 获取 geekCard(Boss SDK 的数据通常在 geekCard 中)
|
||||||
geek_card = getattr(geek_data, 'geekCard', None) or geek_data
|
geek_card = getattr(geek_data, 'geekCard', None) or geek_data
|
||||||
|
|
||||||
# 调试:打印 geek_data 和 geek_card 的所有属性
|
|
||||||
print(f"[DEBUG] geek_data type: {type(geek_data)}")
|
|
||||||
print(f"[DEBUG] geek_data attrs: {dir(geek_data) if hasattr(geek_data, '__dict__') else 'no __dict__'}")
|
|
||||||
if geek_card is not geek_data:
|
|
||||||
print(f"[DEBUG] geek_card type: {type(geek_card)}")
|
|
||||||
print(f"[DEBUG] geek_card attrs: {dir(geek_card) if hasattr(geek_card, '__dict__') else 'no __dict__'}")
|
|
||||||
|
|
||||||
# 从 SDK 返回的数据中提取候选人信息
|
# 从 SDK 返回的数据中提取候选人信息
|
||||||
source_id = (getattr(geek_data, 'geekId', '') or
|
source_id = (getattr(geek_data, 'geekId', '') or
|
||||||
getattr(geek_data, 'encryptGeekId', '') or
|
getattr(geek_data, 'encryptGeekId', '') or
|
||||||
@@ -155,8 +148,6 @@ class BossCrawler(BaseCrawler):
|
|||||||
name = (getattr(geek_card, 'geekName', '') or
|
name = (getattr(geek_card, 'geekName', '') or
|
||||||
getattr(geek_data, 'geekName', ''))
|
getattr(geek_data, 'geekName', ''))
|
||||||
|
|
||||||
print(f"[DEBUG] Parsed name: '{name}', source_id: '{source_id}'")
|
|
||||||
|
|
||||||
# 解析薪资期望(Boss SDK 使用 salary 或 lowSalary/highSalary)
|
# 解析薪资期望(Boss SDK 使用 salary 或 lowSalary/highSalary)
|
||||||
salary_str = (getattr(geek_card, 'salary', '') or
|
salary_str = (getattr(geek_card, 'salary', '') or
|
||||||
getattr(geek_data, 'salary', ''))
|
getattr(geek_data, 'salary', ''))
|
||||||
|
|||||||
@@ -137,12 +137,6 @@ class UnifiedIngestionService:
|
|||||||
# 2. 数据验证
|
# 2. 数据验证
|
||||||
validation_result = self._validate(normalized)
|
validation_result = self._validate(normalized)
|
||||||
if not validation_result.is_valid:
|
if not validation_result.is_valid:
|
||||||
# 打印原始数据和标准化数据,方便排查
|
|
||||||
print(f"[数据验证失败] 错误: {validation_result.error_messages}")
|
|
||||||
print(f"[数据验证失败] 原始数据: {raw_data}")
|
|
||||||
print(f"[数据验证失败] 标准化后候选人: name={normalized.candidate.name}, "
|
|
||||||
f"source={normalized.candidate.source}, source_id={normalized.candidate.source_id}")
|
|
||||||
print(f"[数据验证失败] 标准化后简历: raw_content长度={len(normalized.resume.raw_content) if normalized.resume.raw_content else 0}")
|
|
||||||
return IngestionResult.failed_result(
|
return IngestionResult.failed_result(
|
||||||
errors=validation_result.error_messages,
|
errors=validation_result.error_messages,
|
||||||
message="数据验证失败"
|
message="数据验证失败"
|
||||||
|
|||||||
Reference in New Issue
Block a user