fix(data): 解决数据序列化和定时任务调度问题
- 添加递归转换方法以支持复杂数据类型的JSON序列化 - 优化实体转换逻辑,避免手工字段赋值,提升代码简洁性 - 修正性别字段处理,确保枚举类型能正确转换为值 - 调整爬取任务调度频率,从30秒改为1分钟,提高合理性
This commit is contained in:
@@ -37,29 +37,42 @@ class ResumeMapper:
|
||||
updated_at=model.updated_at
|
||||
)
|
||||
|
||||
def _convert_to_serializable(self, obj):
|
||||
"""递归转换对象为可JSON序列化的格式"""
|
||||
if obj is None:
|
||||
return None
|
||||
# 处理枚举类型
|
||||
if hasattr(obj, 'value'):
|
||||
return obj.value
|
||||
# 处理 Decimal 类型
|
||||
from decimal import Decimal
|
||||
if isinstance(obj, Decimal):
|
||||
return float(obj)
|
||||
# 处理日期时间类型
|
||||
from datetime import datetime, date
|
||||
if isinstance(obj, (datetime, date)):
|
||||
return obj.isoformat()
|
||||
# 处理列表
|
||||
if isinstance(obj, list):
|
||||
return [self._convert_to_serializable(item) for item in obj]
|
||||
# 处理字典
|
||||
if isinstance(obj, dict):
|
||||
return {k: self._convert_to_serializable(v) for k, v in obj.items()}
|
||||
# 处理dataclass
|
||||
if hasattr(obj, '__dataclass_fields__'):
|
||||
result = {}
|
||||
for field_name in obj.__dataclass_fields__:
|
||||
value = getattr(obj, field_name)
|
||||
result[field_name] = self._convert_to_serializable(value)
|
||||
return result
|
||||
return obj
|
||||
|
||||
def _entity_to_model(self, entity: Resume) -> ResumeModel:
|
||||
"""将实体转换为模型"""
|
||||
parsed_dict = None
|
||||
if entity.parsed_content:
|
||||
parsed_dict = {
|
||||
'name': entity.parsed_content.name,
|
||||
'phone': entity.parsed_content.phone,
|
||||
'email': entity.parsed_content.email,
|
||||
'gender': entity.parsed_content.gender,
|
||||
'age': entity.parsed_content.age,
|
||||
'location': entity.parsed_content.location,
|
||||
'current_company': entity.parsed_content.current_company,
|
||||
'current_position': entity.parsed_content.current_position,
|
||||
'work_years': entity.parsed_content.work_years,
|
||||
'education': entity.parsed_content.education,
|
||||
'school': entity.parsed_content.school,
|
||||
'skills': entity.parsed_content.skills,
|
||||
'self_evaluation': entity.parsed_content.self_evaluation,
|
||||
'work_experiences': entity.parsed_content.work_experiences,
|
||||
'project_experiences': entity.parsed_content.project_experiences,
|
||||
'education_experiences': entity.parsed_content.education_experiences,
|
||||
'raw_data': entity.parsed_content.raw_data
|
||||
}
|
||||
# 递归转换所有字段为可序列化格式
|
||||
parsed_dict = self._convert_to_serializable(entity.parsed_content)
|
||||
|
||||
return ResumeModel(
|
||||
id=entity.id,
|
||||
|
||||
@@ -119,7 +119,14 @@ class BossNormalizer(SourceNormalizer):
|
||||
parsed.name = data.get('name')
|
||||
parsed.phone = data.get('phone')
|
||||
parsed.email = data.get('email')
|
||||
parsed.gender = data.get('gender')
|
||||
|
||||
# 处理 gender,转换为可序列化的值
|
||||
gender = data.get('gender')
|
||||
if hasattr(gender, 'value'):
|
||||
parsed.gender = gender.value
|
||||
else:
|
||||
parsed.gender = gender
|
||||
|
||||
parsed.age = data.get('age')
|
||||
parsed.location = data.get('location') or data.get('cityName')
|
||||
parsed.current_company = data.get('company') or data.get('currentCompany')
|
||||
|
||||
@@ -45,7 +45,7 @@ class CrawlScheduler:
|
||||
# 每30分钟爬取一次 Boss 直聘
|
||||
self.scheduler.add_job(
|
||||
self._crawl_boss,
|
||||
trigger=IntervalTrigger(seconds=30),
|
||||
trigger=IntervalTrigger(minutes=1),
|
||||
id="crawl_boss",
|
||||
name="爬取Boss直聘简历",
|
||||
replace_existing=True
|
||||
|
||||
Reference in New Issue
Block a user