fix(data): 解决数据序列化和定时任务调度问题

- 添加递归转换方法以支持复杂数据类型的JSON序列化 - 优化实体转换逻辑，避免手工字段赋值，提升代码简洁性 - 修正性别字段处理，确保枚举类型能正确转换为值 - 调整爬取任务调度频率，从30秒改为1分钟，提高合理性
2026-03-24 16:26:54 +08:00
parent 9be9d338ae
commit b6afe82d2f
3 changed files with 41 additions and 21 deletions
--- a/src/main/python/cn/yinlihupo/ylhp_hr_2_0/mapper/resume_mapper.py
+++ b/src/main/python/cn/yinlihupo/ylhp_hr_2_0/mapper/resume_mapper.py
@@ -37,29 +37,42 @@ class ResumeMapper:
            updated_at=model.updated_at
        )
    
+    def _convert_to_serializable(self, obj):
+        """递归转换对象为可JSON序列化的格式"""
+        if obj is None:
+            return None
+        # 处理枚举类型
+        if hasattr(obj, 'value'):
+            return obj.value
+        # 处理 Decimal 类型
+        from decimal import Decimal
+        if isinstance(obj, Decimal):
+            return float(obj)
+        # 处理日期时间类型
+        from datetime import datetime, date
+        if isinstance(obj, (datetime, date)):
+            return obj.isoformat()
+        # 处理列表
+        if isinstance(obj, list):
+            return [self._convert_to_serializable(item) for item in obj]
+        # 处理字典
+        if isinstance(obj, dict):
+            return {k: self._convert_to_serializable(v) for k, v in obj.items()}
+        # 处理dataclass
+        if hasattr(obj, '__dataclass_fields__'):
+            result = {}
+            for field_name in obj.__dataclass_fields__:
+                value = getattr(obj, field_name)
+                result[field_name] = self._convert_to_serializable(value)
+            return result
+        return obj
+    
    def _entity_to_model(self, entity: Resume) -> ResumeModel:
        """将实体转换为模型"""
        parsed_dict = None
        if entity.parsed_content:
-            parsed_dict = {
-                'name': entity.parsed_content.name,
-                'phone': entity.parsed_content.phone,
-                'email': entity.parsed_content.email,
-                'gender': entity.parsed_content.gender,
-                'age': entity.parsed_content.age,
-                'location': entity.parsed_content.location,
-                'current_company': entity.parsed_content.current_company,
-                'current_position': entity.parsed_content.current_position,
-                'work_years': entity.parsed_content.work_years,
-                'education': entity.parsed_content.education,
-                'school': entity.parsed_content.school,
-                'skills': entity.parsed_content.skills,
-                'self_evaluation': entity.parsed_content.self_evaluation,
-                'work_experiences': entity.parsed_content.work_experiences,
-                'project_experiences': entity.parsed_content.project_experiences,
-                'education_experiences': entity.parsed_content.education_experiences,
-                'raw_data': entity.parsed_content.raw_data
-            }
+            # 递归转换所有字段为可序列化格式
+            parsed_dict = self._convert_to_serializable(entity.parsed_content)
        
        return ResumeModel(
            id=entity.id,
--- a/src/main/python/cn/yinlihupo/ylhp_hr_2_0/service/ingestion/data_normalizer.py
+++ b/src/main/python/cn/yinlihupo/ylhp_hr_2_0/service/ingestion/data_normalizer.py
@@ -119,7 +119,14 @@ class BossNormalizer(SourceNormalizer):
        parsed.name = data.get('name')
        parsed.phone = data.get('phone')
        parsed.email = data.get('email')
-        parsed.gender = data.get('gender')
+        
+        # 处理 gender，转换为可序列化的值
+        gender = data.get('gender')
+        if hasattr(gender, 'value'):
+            parsed.gender = gender.value
+        else:
+            parsed.gender = gender
+        
        parsed.age = data.get('age')
        parsed.location = data.get('location') or data.get('cityName')
        parsed.current_company = data.get('company') or data.get('currentCompany')
--- a/src/main/python/cn/yinlihupo/ylhp_hr_2_0/service/scheduler.py
+++ b/src/main/python/cn/yinlihupo/ylhp_hr_2_0/service/scheduler.py
@@ -45,7 +45,7 @@ class CrawlScheduler:
        # 每30分钟爬取一次 Boss 直聘
        self.scheduler.add_job(
            self._crawl_boss,
-            trigger=IntervalTrigger(seconds=30),
+            trigger=IntervalTrigger(minutes=1),
            id="crawl_boss",
            name="爬取Boss直聘简历",
            replace_existing=True