fix(data): 解决数据序列化和定时任务调度问题

- 添加递归转换方法以支持复杂数据类型的JSON序列化
- 优化实体转换逻辑,避免手工字段赋值,提升代码简洁性
- 修正性别字段处理,确保枚举类型能正确转换为值
- 调整爬取任务调度频率,从30秒改为1分钟,提高合理性
This commit is contained in:
2026-03-24 16:26:54 +08:00
parent 9be9d338ae
commit b6afe82d2f
3 changed files with 41 additions and 21 deletions

View File

@@ -37,29 +37,42 @@ class ResumeMapper:
updated_at=model.updated_at
)
def _convert_to_serializable(self, obj):
"""递归转换对象为可JSON序列化的格式"""
if obj is None:
return None
# 处理枚举类型
if hasattr(obj, 'value'):
return obj.value
# 处理 Decimal 类型
from decimal import Decimal
if isinstance(obj, Decimal):
return float(obj)
# 处理日期时间类型
from datetime import datetime, date
if isinstance(obj, (datetime, date)):
return obj.isoformat()
# 处理列表
if isinstance(obj, list):
return [self._convert_to_serializable(item) for item in obj]
# 处理字典
if isinstance(obj, dict):
return {k: self._convert_to_serializable(v) for k, v in obj.items()}
# 处理dataclass
if hasattr(obj, '__dataclass_fields__'):
result = {}
for field_name in obj.__dataclass_fields__:
value = getattr(obj, field_name)
result[field_name] = self._convert_to_serializable(value)
return result
return obj
def _entity_to_model(self, entity: Resume) -> ResumeModel:
"""将实体转换为模型"""
parsed_dict = None
if entity.parsed_content:
parsed_dict = {
'name': entity.parsed_content.name,
'phone': entity.parsed_content.phone,
'email': entity.parsed_content.email,
'gender': entity.parsed_content.gender,
'age': entity.parsed_content.age,
'location': entity.parsed_content.location,
'current_company': entity.parsed_content.current_company,
'current_position': entity.parsed_content.current_position,
'work_years': entity.parsed_content.work_years,
'education': entity.parsed_content.education,
'school': entity.parsed_content.school,
'skills': entity.parsed_content.skills,
'self_evaluation': entity.parsed_content.self_evaluation,
'work_experiences': entity.parsed_content.work_experiences,
'project_experiences': entity.parsed_content.project_experiences,
'education_experiences': entity.parsed_content.education_experiences,
'raw_data': entity.parsed_content.raw_data
}
# 递归转换所有字段为可序列化格式
parsed_dict = self._convert_to_serializable(entity.parsed_content)
return ResumeModel(
id=entity.id,

View File

@@ -119,7 +119,14 @@ class BossNormalizer(SourceNormalizer):
parsed.name = data.get('name')
parsed.phone = data.get('phone')
parsed.email = data.get('email')
parsed.gender = data.get('gender')
# 处理 gender转换为可序列化的值
gender = data.get('gender')
if hasattr(gender, 'value'):
parsed.gender = gender.value
else:
parsed.gender = gender
parsed.age = data.get('age')
parsed.location = data.get('location') or data.get('cityName')
parsed.current_company = data.get('company') or data.get('currentCompany')

View File

@@ -45,7 +45,7 @@ class CrawlScheduler:
# 每30分钟爬取一次 Boss 直聘
self.scheduler.add_job(
self._crawl_boss,
trigger=IntervalTrigger(seconds=30),
trigger=IntervalTrigger(minutes=1),
id="crawl_boss",
name="爬取Boss直聘简历",
replace_existing=True