fix(data): 解决数据序列化和定时任务调度问题
- 添加递归转换方法以支持复杂数据类型的JSON序列化 - 优化实体转换逻辑,避免手工字段赋值,提升代码简洁性 - 修正性别字段处理,确保枚举类型能正确转换为值 - 调整爬取任务调度频率,从30秒改为1分钟,提高合理性
This commit is contained in:
@@ -37,29 +37,42 @@ class ResumeMapper:
|
|||||||
updated_at=model.updated_at
|
updated_at=model.updated_at
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _convert_to_serializable(self, obj):
|
||||||
|
"""递归转换对象为可JSON序列化的格式"""
|
||||||
|
if obj is None:
|
||||||
|
return None
|
||||||
|
# 处理枚举类型
|
||||||
|
if hasattr(obj, 'value'):
|
||||||
|
return obj.value
|
||||||
|
# 处理 Decimal 类型
|
||||||
|
from decimal import Decimal
|
||||||
|
if isinstance(obj, Decimal):
|
||||||
|
return float(obj)
|
||||||
|
# 处理日期时间类型
|
||||||
|
from datetime import datetime, date
|
||||||
|
if isinstance(obj, (datetime, date)):
|
||||||
|
return obj.isoformat()
|
||||||
|
# 处理列表
|
||||||
|
if isinstance(obj, list):
|
||||||
|
return [self._convert_to_serializable(item) for item in obj]
|
||||||
|
# 处理字典
|
||||||
|
if isinstance(obj, dict):
|
||||||
|
return {k: self._convert_to_serializable(v) for k, v in obj.items()}
|
||||||
|
# 处理dataclass
|
||||||
|
if hasattr(obj, '__dataclass_fields__'):
|
||||||
|
result = {}
|
||||||
|
for field_name in obj.__dataclass_fields__:
|
||||||
|
value = getattr(obj, field_name)
|
||||||
|
result[field_name] = self._convert_to_serializable(value)
|
||||||
|
return result
|
||||||
|
return obj
|
||||||
|
|
||||||
def _entity_to_model(self, entity: Resume) -> ResumeModel:
|
def _entity_to_model(self, entity: Resume) -> ResumeModel:
|
||||||
"""将实体转换为模型"""
|
"""将实体转换为模型"""
|
||||||
parsed_dict = None
|
parsed_dict = None
|
||||||
if entity.parsed_content:
|
if entity.parsed_content:
|
||||||
parsed_dict = {
|
# 递归转换所有字段为可序列化格式
|
||||||
'name': entity.parsed_content.name,
|
parsed_dict = self._convert_to_serializable(entity.parsed_content)
|
||||||
'phone': entity.parsed_content.phone,
|
|
||||||
'email': entity.parsed_content.email,
|
|
||||||
'gender': entity.parsed_content.gender,
|
|
||||||
'age': entity.parsed_content.age,
|
|
||||||
'location': entity.parsed_content.location,
|
|
||||||
'current_company': entity.parsed_content.current_company,
|
|
||||||
'current_position': entity.parsed_content.current_position,
|
|
||||||
'work_years': entity.parsed_content.work_years,
|
|
||||||
'education': entity.parsed_content.education,
|
|
||||||
'school': entity.parsed_content.school,
|
|
||||||
'skills': entity.parsed_content.skills,
|
|
||||||
'self_evaluation': entity.parsed_content.self_evaluation,
|
|
||||||
'work_experiences': entity.parsed_content.work_experiences,
|
|
||||||
'project_experiences': entity.parsed_content.project_experiences,
|
|
||||||
'education_experiences': entity.parsed_content.education_experiences,
|
|
||||||
'raw_data': entity.parsed_content.raw_data
|
|
||||||
}
|
|
||||||
|
|
||||||
return ResumeModel(
|
return ResumeModel(
|
||||||
id=entity.id,
|
id=entity.id,
|
||||||
|
|||||||
@@ -119,7 +119,14 @@ class BossNormalizer(SourceNormalizer):
|
|||||||
parsed.name = data.get('name')
|
parsed.name = data.get('name')
|
||||||
parsed.phone = data.get('phone')
|
parsed.phone = data.get('phone')
|
||||||
parsed.email = data.get('email')
|
parsed.email = data.get('email')
|
||||||
parsed.gender = data.get('gender')
|
|
||||||
|
# 处理 gender,转换为可序列化的值
|
||||||
|
gender = data.get('gender')
|
||||||
|
if hasattr(gender, 'value'):
|
||||||
|
parsed.gender = gender.value
|
||||||
|
else:
|
||||||
|
parsed.gender = gender
|
||||||
|
|
||||||
parsed.age = data.get('age')
|
parsed.age = data.get('age')
|
||||||
parsed.location = data.get('location') or data.get('cityName')
|
parsed.location = data.get('location') or data.get('cityName')
|
||||||
parsed.current_company = data.get('company') or data.get('currentCompany')
|
parsed.current_company = data.get('company') or data.get('currentCompany')
|
||||||
|
|||||||
@@ -45,7 +45,7 @@ class CrawlScheduler:
|
|||||||
# 每30分钟爬取一次 Boss 直聘
|
# 每30分钟爬取一次 Boss 直聘
|
||||||
self.scheduler.add_job(
|
self.scheduler.add_job(
|
||||||
self._crawl_boss,
|
self._crawl_boss,
|
||||||
trigger=IntervalTrigger(seconds=30),
|
trigger=IntervalTrigger(minutes=1),
|
||||||
id="crawl_boss",
|
id="crawl_boss",
|
||||||
name="爬取Boss直聘简历",
|
name="爬取Boss直聘简历",
|
||||||
replace_existing=True
|
replace_existing=True
|
||||||
|
|||||||
Reference in New Issue
Block a user