From b6afe82d2fee1ffed4c6f91f35ce1942d4ed0a5d Mon Sep 17 00:00:00 2001 From: JiaoTianBo Date: Tue, 24 Mar 2026 16:26:54 +0800 Subject: [PATCH] =?UTF-8?q?fix(data):=20=E8=A7=A3=E5=86=B3=E6=95=B0?= =?UTF-8?q?=E6=8D=AE=E5=BA=8F=E5=88=97=E5=8C=96=E5=92=8C=E5=AE=9A=E6=97=B6?= =?UTF-8?q?=E4=BB=BB=E5=8A=A1=E8=B0=83=E5=BA=A6=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - 添加递归转换方法以支持复杂数据类型的JSON序列化 - 优化实体转换逻辑,避免手工字段赋值,提升代码简洁性 - 修正性别字段处理,确保枚举类型能正确转换为值 - 调整爬取任务调度频率,从30秒改为1分钟,提高合理性 --- .../ylhp_hr_2_0/mapper/resume_mapper.py | 51 ++++++++++++------- .../service/ingestion/data_normalizer.py | 9 +++- .../ylhp_hr_2_0/service/scheduler.py | 2 +- 3 files changed, 41 insertions(+), 21 deletions(-) diff --git a/src/main/python/cn/yinlihupo/ylhp_hr_2_0/mapper/resume_mapper.py b/src/main/python/cn/yinlihupo/ylhp_hr_2_0/mapper/resume_mapper.py index e0adc11..8e13da0 100644 --- a/src/main/python/cn/yinlihupo/ylhp_hr_2_0/mapper/resume_mapper.py +++ b/src/main/python/cn/yinlihupo/ylhp_hr_2_0/mapper/resume_mapper.py @@ -37,29 +37,42 @@ class ResumeMapper: updated_at=model.updated_at ) + def _convert_to_serializable(self, obj): + """递归转换对象为可JSON序列化的格式""" + if obj is None: + return None + # 处理枚举类型 + if hasattr(obj, 'value'): + return obj.value + # 处理 Decimal 类型 + from decimal import Decimal + if isinstance(obj, Decimal): + return float(obj) + # 处理日期时间类型 + from datetime import datetime, date + if isinstance(obj, (datetime, date)): + return obj.isoformat() + # 处理列表 + if isinstance(obj, list): + return [self._convert_to_serializable(item) for item in obj] + # 处理字典 + if isinstance(obj, dict): + return {k: self._convert_to_serializable(v) for k, v in obj.items()} + # 处理dataclass + if hasattr(obj, '__dataclass_fields__'): + result = {} + for field_name in obj.__dataclass_fields__: + value = getattr(obj, field_name) + result[field_name] = self._convert_to_serializable(value) + return result + return obj + def _entity_to_model(self, entity: Resume) -> ResumeModel: """将实体转换为模型""" parsed_dict = None if entity.parsed_content: - parsed_dict = { - 'name': entity.parsed_content.name, - 'phone': entity.parsed_content.phone, - 'email': entity.parsed_content.email, - 'gender': entity.parsed_content.gender, - 'age': entity.parsed_content.age, - 'location': entity.parsed_content.location, - 'current_company': entity.parsed_content.current_company, - 'current_position': entity.parsed_content.current_position, - 'work_years': entity.parsed_content.work_years, - 'education': entity.parsed_content.education, - 'school': entity.parsed_content.school, - 'skills': entity.parsed_content.skills, - 'self_evaluation': entity.parsed_content.self_evaluation, - 'work_experiences': entity.parsed_content.work_experiences, - 'project_experiences': entity.parsed_content.project_experiences, - 'education_experiences': entity.parsed_content.education_experiences, - 'raw_data': entity.parsed_content.raw_data - } + # 递归转换所有字段为可序列化格式 + parsed_dict = self._convert_to_serializable(entity.parsed_content) return ResumeModel( id=entity.id, diff --git a/src/main/python/cn/yinlihupo/ylhp_hr_2_0/service/ingestion/data_normalizer.py b/src/main/python/cn/yinlihupo/ylhp_hr_2_0/service/ingestion/data_normalizer.py index 3a7d9d5..233de1d 100644 --- a/src/main/python/cn/yinlihupo/ylhp_hr_2_0/service/ingestion/data_normalizer.py +++ b/src/main/python/cn/yinlihupo/ylhp_hr_2_0/service/ingestion/data_normalizer.py @@ -119,7 +119,14 @@ class BossNormalizer(SourceNormalizer): parsed.name = data.get('name') parsed.phone = data.get('phone') parsed.email = data.get('email') - parsed.gender = data.get('gender') + + # 处理 gender,转换为可序列化的值 + gender = data.get('gender') + if hasattr(gender, 'value'): + parsed.gender = gender.value + else: + parsed.gender = gender + parsed.age = data.get('age') parsed.location = data.get('location') or data.get('cityName') parsed.current_company = data.get('company') or data.get('currentCompany') diff --git a/src/main/python/cn/yinlihupo/ylhp_hr_2_0/service/scheduler.py b/src/main/python/cn/yinlihupo/ylhp_hr_2_0/service/scheduler.py index f52d6c1..a152a60 100644 --- a/src/main/python/cn/yinlihupo/ylhp_hr_2_0/service/scheduler.py +++ b/src/main/python/cn/yinlihupo/ylhp_hr_2_0/service/scheduler.py @@ -45,7 +45,7 @@ class CrawlScheduler: # 每30分钟爬取一次 Boss 直聘 self.scheduler.add_job( self._crawl_boss, - trigger=IntervalTrigger(seconds=30), + trigger=IntervalTrigger(minutes=1), id="crawl_boss", name="爬取Boss直聘简历", replace_existing=True