feat(core): 重构项目为简历智能体系统基础架构
- 重命名项目及包结构为ylhp-hr-2-0,支持多平台简历爬取与AI分析 - 移除旧的main.py,新增统一主应用入口及初始化流程 - 实现配置模块,支持数据库、LLM、通知和爬虫多种配置项及环境变量加载 - 构建领域模型,包括候选人、简历、职位、评价等实体与枚举定义 - 设计评价方案服务,提供默认评价模板及方案管理接口 - 开发分析服务,整合LLM客户端实现基于AI的简历分析功能 - 实现多种通知渠道支持,包括企业微信、钉钉、邮件 - 引入爬虫工厂及Boss爬虫模块支持候选人数据抓取 - 统一入库服务,完成数据归一化、验证及去重功能 - 添加异步任务协调流程,支持爬取后自动分析及通知 - 配置项目依赖管理,支持选装LLM和开发工具插件 - 初步搭建代码目录结构,划分配置、领域、服务、映射、控制器层等模块
This commit is contained in:
1
src/main/python/cn/__init__.py
Normal file
1
src/main/python/cn/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""cn package"""
|
||||
1
src/main/python/cn/yinlihupo/__init__.py
Normal file
1
src/main/python/cn/yinlihupo/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""yinlihupo package"""
|
||||
3
src/main/python/cn/yinlihupo/ylhp_hr_2.0/__init__.py
Normal file
3
src/main/python/cn/yinlihupo/ylhp_hr_2.0/__init__.py
Normal file
@@ -0,0 +1,3 @@
|
||||
"""ylhp_hr_2.0 - Resume Intelligence Agent"""
|
||||
|
||||
__version__ = "0.1.0"
|
||||
@@ -0,0 +1 @@
|
||||
"""Common utilities"""
|
||||
@@ -0,0 +1,5 @@
|
||||
"""Configuration module"""
|
||||
|
||||
from .settings import Settings, get_settings
|
||||
|
||||
__all__ = ["Settings", "get_settings"]
|
||||
95
src/main/python/cn/yinlihupo/ylhp_hr_2.0/config/settings.py
Normal file
95
src/main/python/cn/yinlihupo/ylhp_hr_2.0/config/settings.py
Normal file
@@ -0,0 +1,95 @@
|
||||
"""Application settings"""
|
||||
from typing import Optional
|
||||
from pydantic_settings import BaseSettings
|
||||
from pydantic import Field
|
||||
|
||||
|
||||
class DatabaseSettings(BaseSettings):
|
||||
"""数据库配置"""
|
||||
url: str = Field(default="sqlite:///./hr_agent.db", description="数据库连接URL")
|
||||
echo: bool = Field(default=False, description="是否打印SQL语句")
|
||||
|
||||
class Config:
|
||||
env_prefix = "DB_"
|
||||
|
||||
|
||||
class LLMSettings(BaseSettings):
|
||||
"""LLM 配置"""
|
||||
provider: str = Field(default="openai", description="LLM提供商: openai, claude, mock")
|
||||
api_key: Optional[str] = Field(default=None, description="API密钥")
|
||||
base_url: Optional[str] = Field(default=None, description="自定义API地址")
|
||||
model: str = Field(default="gpt-4", description="模型名称")
|
||||
temperature: float = Field(default=0.7, description="温度参数")
|
||||
max_tokens: int = Field(default=2000, description="最大token数")
|
||||
|
||||
class Config:
|
||||
env_prefix = "LLM_"
|
||||
|
||||
|
||||
class NotificationSettings(BaseSettings):
|
||||
"""通知配置"""
|
||||
# 企业微信
|
||||
wechat_work_webhook: Optional[str] = Field(default=None, description="企业微信Webhook")
|
||||
wechat_work_mentioned: Optional[str] = Field(default=None, description="@提醒列表,逗号分隔")
|
||||
|
||||
# 钉钉
|
||||
dingtalk_webhook: Optional[str] = Field(default=None, description="钉钉Webhook")
|
||||
dingtalk_secret: Optional[str] = Field(default=None, description="钉钉加签密钥")
|
||||
dingtalk_at_mobiles: Optional[str] = Field(default=None, description="@手机号列表,逗号分隔")
|
||||
|
||||
# 邮件
|
||||
email_smtp_host: Optional[str] = Field(default=None, description="SMTP服务器")
|
||||
email_smtp_port: int = Field(default=587, description="SMTP端口")
|
||||
email_username: Optional[str] = Field(default=None, description="邮箱用户名")
|
||||
email_password: Optional[str] = Field(default=None, description="邮箱密码")
|
||||
email_from: Optional[str] = Field(default=None, description="发件人地址")
|
||||
email_to: Optional[str] = Field(default=None, description="收件人地址,逗号分隔")
|
||||
|
||||
class Config:
|
||||
env_prefix = "NOTIFY_"
|
||||
|
||||
|
||||
class CrawlerSettings(BaseSettings):
|
||||
"""爬虫配置"""
|
||||
boss_wt_token: Optional[str] = Field(default=None, description="Boss直聘WT Token")
|
||||
|
||||
class Config:
|
||||
env_prefix = "CRAWLER_"
|
||||
|
||||
|
||||
class Settings(BaseSettings):
|
||||
"""应用配置"""
|
||||
|
||||
# 应用信息
|
||||
app_name: str = Field(default="ylhp_hr_2.0", description="应用名称")
|
||||
app_version: str = Field(default="0.1.0", description="应用版本")
|
||||
debug: bool = Field(default=False, description="调试模式")
|
||||
|
||||
# 子配置
|
||||
database: DatabaseSettings = Field(default_factory=DatabaseSettings)
|
||||
llm: LLMSettings = Field(default_factory=LLMSettings)
|
||||
notification: NotificationSettings = Field(default_factory=NotificationSettings)
|
||||
crawler: CrawlerSettings = Field(default_factory=CrawlerSettings)
|
||||
|
||||
class Config:
|
||||
env_file = ".env"
|
||||
env_file_encoding = "utf-8"
|
||||
|
||||
|
||||
# 全局配置实例
|
||||
_settings: Optional[Settings] = None
|
||||
|
||||
|
||||
def get_settings() -> Settings:
|
||||
"""获取配置实例(单例)"""
|
||||
global _settings
|
||||
if _settings is None:
|
||||
_settings = Settings()
|
||||
return _settings
|
||||
|
||||
|
||||
def reload_settings() -> Settings:
|
||||
"""重新加载配置"""
|
||||
global _settings
|
||||
_settings = Settings()
|
||||
return _settings
|
||||
@@ -0,0 +1 @@
|
||||
"""Controller layer - API endpoints"""
|
||||
24
src/main/python/cn/yinlihupo/ylhp_hr_2.0/domain/__init__.py
Normal file
24
src/main/python/cn/yinlihupo/ylhp_hr_2.0/domain/__init__.py
Normal file
@@ -0,0 +1,24 @@
|
||||
"""Domain layer - Entity definitions"""
|
||||
|
||||
from .candidate import Candidate, CandidateSource, CandidateStatus
|
||||
from .resume import Resume, ResumeParsed
|
||||
from .job import Job, JobStatus
|
||||
from .evaluation import Evaluation, EvaluationSchema, Dimension, DimensionScore
|
||||
from .enums import Gender, Education, Recommendation
|
||||
|
||||
__all__ = [
|
||||
"Candidate",
|
||||
"CandidateSource",
|
||||
"CandidateStatus",
|
||||
"Resume",
|
||||
"ResumeParsed",
|
||||
"Job",
|
||||
"JobStatus",
|
||||
"Evaluation",
|
||||
"EvaluationSchema",
|
||||
"Dimension",
|
||||
"DimensionScore",
|
||||
"Gender",
|
||||
"Education",
|
||||
"Recommendation",
|
||||
]
|
||||
113
src/main/python/cn/yinlihupo/ylhp_hr_2.0/domain/candidate.py
Normal file
113
src/main/python/cn/yinlihupo/ylhp_hr_2.0/domain/candidate.py
Normal file
@@ -0,0 +1,113 @@
|
||||
"""Candidate entity definitions"""
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from decimal import Decimal
|
||||
from typing import Optional, List
|
||||
from enum import Enum
|
||||
|
||||
from .enums import Gender
|
||||
|
||||
|
||||
class CandidateSource(Enum):
|
||||
"""候选人来源渠道"""
|
||||
BOSS = "boss"
|
||||
LIEPIN = "liepin"
|
||||
ZHILIAN = "zhilian"
|
||||
OTHER = "other"
|
||||
|
||||
|
||||
class CandidateStatus(Enum):
|
||||
"""候选人状态"""
|
||||
NEW = "new" # 新入库
|
||||
ANALYZED = "analyzed" # 已分析
|
||||
PUSHED = "pushed" # 已推送
|
||||
CONTACTED = "contacted" # 已联系
|
||||
INTERVIEWED = "interviewed" # 已面试
|
||||
HIRED = "hired" # 已录用
|
||||
REJECTED = "rejected" # 已拒绝
|
||||
|
||||
|
||||
@dataclass
|
||||
class SalaryRange:
|
||||
"""薪资范围"""
|
||||
min_salary: Optional[int] = None
|
||||
max_salary: Optional[int] = None
|
||||
|
||||
def __str__(self) -> str:
|
||||
if self.min_salary and self.max_salary:
|
||||
return f"{self.min_salary}-{self.max_salary}K"
|
||||
elif self.min_salary:
|
||||
return f"{self.min_salary}K+"
|
||||
elif self.max_salary:
|
||||
return f"0-{self.max_salary}K"
|
||||
return "面议"
|
||||
|
||||
|
||||
@dataclass
|
||||
class Candidate:
|
||||
"""候选人实体"""
|
||||
# 主键信息
|
||||
id: Optional[str] = None
|
||||
source: CandidateSource = CandidateSource.BOSS
|
||||
source_id: str = ""
|
||||
|
||||
# 基本信息
|
||||
name: str = ""
|
||||
phone: Optional[str] = None
|
||||
email: Optional[str] = None
|
||||
wechat: Optional[str] = None
|
||||
gender: Gender = Gender.UNKNOWN
|
||||
age: Optional[int] = None
|
||||
location: Optional[str] = None
|
||||
|
||||
# 职业信息
|
||||
current_company: Optional[str] = None
|
||||
current_position: Optional[str] = None
|
||||
work_years: Optional[Decimal] = None
|
||||
education: Optional[str] = None
|
||||
school: Optional[str] = None
|
||||
salary_expectation: Optional[SalaryRange] = None
|
||||
|
||||
# 状态管理
|
||||
status: CandidateStatus = CandidateStatus.NEW
|
||||
|
||||
# 元数据
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.created_at is None:
|
||||
self.created_at = datetime.now()
|
||||
if self.updated_at is None:
|
||||
self.updated_at = datetime.now()
|
||||
|
||||
|
||||
@dataclass
|
||||
class WorkExperience:
|
||||
"""工作经历"""
|
||||
company: str = ""
|
||||
position: str = ""
|
||||
start_date: Optional[str] = None
|
||||
end_date: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
is_current: bool = False
|
||||
|
||||
|
||||
@dataclass
|
||||
class ProjectExperience:
|
||||
"""项目经历"""
|
||||
name: str = ""
|
||||
role: Optional[str] = None
|
||||
start_date: Optional[str] = None
|
||||
end_date: Optional[str] = None
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class EducationExperience:
|
||||
"""教育经历"""
|
||||
school: str = ""
|
||||
major: Optional[str] = None
|
||||
degree: Optional[str] = None
|
||||
start_date: Optional[str] = None
|
||||
end_date: Optional[str] = None
|
||||
43
src/main/python/cn/yinlihupo/ylhp_hr_2.0/domain/enums.py
Normal file
43
src/main/python/cn/yinlihupo/ylhp_hr_2.0/domain/enums.py
Normal file
@@ -0,0 +1,43 @@
|
||||
"""Enum definitions for domain models"""
|
||||
from enum import Enum, auto
|
||||
|
||||
|
||||
class Gender(Enum):
|
||||
"""性别枚举"""
|
||||
UNKNOWN = 0
|
||||
MALE = 1
|
||||
FEMALE = 2
|
||||
|
||||
|
||||
class Education(Enum):
|
||||
"""学历枚举"""
|
||||
UNKNOWN = "unknown"
|
||||
HIGH_SCHOOL = "high_school"
|
||||
ASSOCIATE = "associate"
|
||||
BACHELOR = "bachelor"
|
||||
MASTER = "master"
|
||||
PHD = "phd"
|
||||
POSTDOC = "postdoc"
|
||||
|
||||
|
||||
class Recommendation(Enum):
|
||||
"""推荐意见枚举"""
|
||||
STRONG_RECOMMEND = "strong_recommend"
|
||||
RECOMMEND = "recommend"
|
||||
CONSIDER = "consider"
|
||||
NOT_RECOMMEND = "not_recommend"
|
||||
|
||||
|
||||
class ChannelType(Enum):
|
||||
"""通知渠道类型"""
|
||||
WECHAT_WORK = "wechat_work"
|
||||
DINGTALK = "dingtalk"
|
||||
EMAIL = "email"
|
||||
WEBHOOK = "webhook"
|
||||
|
||||
|
||||
class NotificationStatus(Enum):
|
||||
"""通知状态"""
|
||||
PENDING = "pending"
|
||||
SENT = "sent"
|
||||
FAILED = "failed"
|
||||
131
src/main/python/cn/yinlihupo/ylhp_hr_2.0/domain/evaluation.py
Normal file
131
src/main/python/cn/yinlihupo/ylhp_hr_2.0/domain/evaluation.py
Normal file
@@ -0,0 +1,131 @@
|
||||
"""Evaluation entity definitions"""
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Tuple, Any
|
||||
|
||||
from .enums import Recommendation
|
||||
|
||||
|
||||
@dataclass
|
||||
class Dimension:
|
||||
"""评价维度定义"""
|
||||
id: str = ""
|
||||
name: str = "" # 维度名称,如"技术能力"
|
||||
description: str = "" # 维度描述
|
||||
criteria: List[str] = field(default_factory=list) # 评价标准
|
||||
score_range: Tuple[int, int] = (0, 100) # 分数范围
|
||||
|
||||
|
||||
@dataclass
|
||||
class DimensionScore:
|
||||
"""维度评分结果"""
|
||||
dimension_id: str = ""
|
||||
dimension_name: str = ""
|
||||
score: float = 0.0 # 分数
|
||||
weight: float = 1.0 # 权重
|
||||
comment: Optional[str] = None # 评价说明
|
||||
|
||||
|
||||
@dataclass
|
||||
class EvaluationSchema:
|
||||
"""评价方案 - 可配置的多维度评价模板"""
|
||||
id: Optional[str] = None
|
||||
name: str = "" # 方案名称,如"Java后端评价方案"
|
||||
description: Optional[str] = None
|
||||
|
||||
# 评价维度配置
|
||||
dimensions: List[Dimension] = field(default_factory=list)
|
||||
|
||||
# 维度权重
|
||||
weights: Dict[str, float] = field(default_factory=dict)
|
||||
|
||||
# AI提示词模板
|
||||
prompt_template: Optional[str] = None
|
||||
|
||||
# 是否为默认方案
|
||||
is_default: bool = False
|
||||
|
||||
# 元数据
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.created_at is None:
|
||||
self.created_at = datetime.now()
|
||||
if self.updated_at is None:
|
||||
self.updated_at = datetime.now()
|
||||
|
||||
def get_weight(self, dimension_id: str) -> float:
|
||||
"""获取维度权重"""
|
||||
return self.weights.get(dimension_id, 1.0)
|
||||
|
||||
def calculate_overall_score(self, dimension_scores: List[DimensionScore]) -> float:
|
||||
"""计算综合评分"""
|
||||
if not dimension_scores:
|
||||
return 0.0
|
||||
|
||||
weighted_sum = 0.0
|
||||
total_weight = 0.0
|
||||
|
||||
for ds in dimension_scores:
|
||||
weight = self.get_weight(ds.dimension_id)
|
||||
weighted_sum += ds.score * weight
|
||||
total_weight += weight
|
||||
|
||||
return weighted_sum / total_weight if total_weight > 0 else 0.0
|
||||
|
||||
|
||||
@dataclass
|
||||
class Evaluation:
|
||||
"""评价记录实体"""
|
||||
id: Optional[str] = None
|
||||
candidate_id: Optional[str] = None
|
||||
schema_id: Optional[str] = None
|
||||
job_id: Optional[str] = None
|
||||
|
||||
# 评分结果
|
||||
overall_score: float = 0.0 # 综合评分
|
||||
dimension_scores: List[DimensionScore] = field(default_factory=list)
|
||||
|
||||
# AI分析结果
|
||||
tags: List[str] = field(default_factory=list) # AI标签
|
||||
summary: Optional[str] = None # 评价摘要
|
||||
strengths: List[str] = field(default_factory=list) # 优势
|
||||
weaknesses: List[str] = field(default_factory=list) # 不足
|
||||
recommendation: Optional[Recommendation] = None # 推荐意见
|
||||
|
||||
# 原始响应
|
||||
raw_response: Optional[str] = None # LLM原始响应
|
||||
|
||||
# 元数据
|
||||
created_at: Optional[datetime] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.created_at is None:
|
||||
self.created_at = datetime.now()
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""转换为字典"""
|
||||
return {
|
||||
"id": self.id,
|
||||
"candidate_id": self.candidate_id,
|
||||
"schema_id": self.schema_id,
|
||||
"job_id": self.job_id,
|
||||
"overall_score": self.overall_score,
|
||||
"dimension_scores": [
|
||||
{
|
||||
"dimension_id": ds.dimension_id,
|
||||
"dimension_name": ds.dimension_name,
|
||||
"score": ds.score,
|
||||
"weight": ds.weight,
|
||||
"comment": ds.comment
|
||||
}
|
||||
for ds in self.dimension_scores
|
||||
],
|
||||
"tags": self.tags,
|
||||
"summary": self.summary,
|
||||
"strengths": self.strengths,
|
||||
"weaknesses": self.weaknesses,
|
||||
"recommendation": self.recommendation.value if self.recommendation else None,
|
||||
"created_at": self.created_at.isoformat() if self.created_at else None
|
||||
}
|
||||
61
src/main/python/cn/yinlihupo/ylhp_hr_2.0/domain/job.py
Normal file
61
src/main/python/cn/yinlihupo/ylhp_hr_2.0/domain/job.py
Normal file
@@ -0,0 +1,61 @@
|
||||
"""Job entity definitions"""
|
||||
from dataclasses import dataclass
|
||||
from datetime import datetime
|
||||
from typing import Optional, List
|
||||
from enum import Enum
|
||||
|
||||
from .candidate import CandidateSource
|
||||
|
||||
|
||||
class JobStatus(Enum):
|
||||
"""职位状态"""
|
||||
ACTIVE = "active"
|
||||
PAUSED = "paused"
|
||||
CLOSED = "closed"
|
||||
ARCHIVED = "archived"
|
||||
|
||||
|
||||
@dataclass
|
||||
class JobRequirement:
|
||||
"""职位要求"""
|
||||
min_work_years: Optional[int] = None
|
||||
max_work_years: Optional[int] = None
|
||||
education: Optional[str] = None
|
||||
skills: Optional[List[str]] = None
|
||||
description: Optional[str] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Job:
|
||||
"""职位实体"""
|
||||
id: Optional[str] = None
|
||||
source: CandidateSource = CandidateSource.BOSS
|
||||
source_id: str = ""
|
||||
|
||||
# 职位信息
|
||||
title: str = ""
|
||||
department: Optional[str] = None
|
||||
location: Optional[str] = None
|
||||
|
||||
# 薪资范围
|
||||
salary_min: Optional[int] = None
|
||||
salary_max: Optional[int] = None
|
||||
|
||||
# 职位要求
|
||||
requirements: Optional[JobRequirement] = None
|
||||
description: Optional[str] = None
|
||||
|
||||
# 状态
|
||||
status: JobStatus = JobStatus.ACTIVE
|
||||
|
||||
# 元数据
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.created_at is None:
|
||||
self.created_at = datetime.now()
|
||||
if self.updated_at is None:
|
||||
self.updated_at = datetime.now()
|
||||
if self.requirements is None:
|
||||
self.requirements = JobRequirement()
|
||||
69
src/main/python/cn/yinlihupo/ylhp_hr_2.0/domain/resume.py
Normal file
69
src/main/python/cn/yinlihupo/ylhp_hr_2.0/domain/resume.py
Normal file
@@ -0,0 +1,69 @@
|
||||
"""Resume entity definitions"""
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
from typing import Optional, List, Dict, Any
|
||||
|
||||
from .candidate import WorkExperience, ProjectExperience, EducationExperience
|
||||
|
||||
|
||||
@dataclass
|
||||
class ResumeParsed:
|
||||
"""结构化解析后的简历内容"""
|
||||
# 基本信息
|
||||
name: Optional[str] = None
|
||||
phone: Optional[str] = None
|
||||
email: Optional[str] = None
|
||||
gender: Optional[str] = None
|
||||
age: Optional[int] = None
|
||||
location: Optional[str] = None
|
||||
|
||||
# 职业信息
|
||||
current_company: Optional[str] = None
|
||||
current_position: Optional[str] = None
|
||||
work_years: Optional[float] = None
|
||||
education: Optional[str] = None
|
||||
school: Optional[str] = None
|
||||
|
||||
# 详细经历
|
||||
work_experiences: List[WorkExperience] = field(default_factory=list)
|
||||
project_experiences: List[ProjectExperience] = field(default_factory=list)
|
||||
education_experiences: List[EducationExperience] = field(default_factory=list)
|
||||
|
||||
# 技能标签
|
||||
skills: List[str] = field(default_factory=list)
|
||||
|
||||
# 自我评价
|
||||
self_evaluation: Optional[str] = None
|
||||
|
||||
# 原始解析数据
|
||||
raw_data: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
@dataclass
|
||||
class Resume:
|
||||
"""简历实体"""
|
||||
id: Optional[str] = None
|
||||
candidate_id: Optional[str] = None
|
||||
|
||||
# 简历内容
|
||||
raw_content: str = "" # 原始简历文本
|
||||
parsed_content: Optional[ResumeParsed] = None # 结构化解析内容
|
||||
|
||||
# 附件
|
||||
attachment_url: Optional[str] = None # 附件URL
|
||||
attachment_type: Optional[str] = None # 附件类型 (pdf, doc, etc.)
|
||||
|
||||
# 版本控制
|
||||
version: int = 1
|
||||
|
||||
# 元数据
|
||||
created_at: Optional[datetime] = None
|
||||
updated_at: Optional[datetime] = None
|
||||
|
||||
def __post_init__(self):
|
||||
if self.created_at is None:
|
||||
self.created_at = datetime.now()
|
||||
if self.updated_at is None:
|
||||
self.updated_at = datetime.now()
|
||||
if self.parsed_content is None:
|
||||
self.parsed_content = ResumeParsed()
|
||||
293
src/main/python/cn/yinlihupo/ylhp_hr_2.0/main.py
Normal file
293
src/main/python/cn/yinlihupo/ylhp_hr_2.0/main.py
Normal file
@@ -0,0 +1,293 @@
|
||||
"""Application entry point"""
|
||||
import asyncio
|
||||
from typing import Optional
|
||||
|
||||
from .config.settings import get_settings
|
||||
from .domain.candidate import CandidateSource
|
||||
from .service.crawler import CrawlerFactory, BossCrawler
|
||||
from .service.ingestion import (
|
||||
UnifiedIngestionService,
|
||||
DataNormalizer,
|
||||
DataValidator,
|
||||
DeduplicationService
|
||||
)
|
||||
from .service.analysis import (
|
||||
ResumeAnalyzer,
|
||||
EvaluationSchemaService,
|
||||
LLMClient,
|
||||
OpenAIClient,
|
||||
MockLLMClient
|
||||
)
|
||||
from .service.notification import (
|
||||
NotificationService,
|
||||
WeChatWorkChannel,
|
||||
DingTalkChannel,
|
||||
EmailChannel
|
||||
)
|
||||
|
||||
|
||||
class HRAgentApplication:
|
||||
"""
|
||||
HR Agent 应用主类
|
||||
|
||||
整合所有服务组件,提供统一的操作接口
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self.settings = get_settings()
|
||||
self.crawler_factory = CrawlerFactory()
|
||||
self.ingestion_service: Optional[UnifiedIngestionService] = None
|
||||
self.analyzer: Optional[ResumeAnalyzer] = None
|
||||
self.notification_service: Optional[NotificationService] = None
|
||||
|
||||
self._initialized = False
|
||||
|
||||
def initialize(self):
|
||||
"""初始化应用"""
|
||||
if self._initialized:
|
||||
return
|
||||
|
||||
# 1. 初始化爬虫
|
||||
self._init_crawlers()
|
||||
|
||||
# 2. 初始化入库服务
|
||||
self._init_ingestion_service()
|
||||
|
||||
# 3. 初始化分析器
|
||||
self._init_analyzer()
|
||||
|
||||
# 4. 初始化通知服务
|
||||
self._init_notification_service()
|
||||
|
||||
self._initialized = True
|
||||
print(f"HR Agent {self.settings.app_version} initialized successfully")
|
||||
|
||||
def _init_crawlers(self):
|
||||
"""初始化爬虫"""
|
||||
# Boss 爬虫
|
||||
if self.settings.crawler.boss_wt_token:
|
||||
boss_crawler = BossCrawler(wt_token=self.settings.crawler.boss_wt_token)
|
||||
self.crawler_factory.register(CandidateSource.BOSS, boss_crawler)
|
||||
print("Boss crawler registered")
|
||||
|
||||
def _init_ingestion_service(self):
|
||||
"""初始化入库服务"""
|
||||
self.ingestion_service = UnifiedIngestionService(
|
||||
normalizer=DataNormalizer(),
|
||||
validator=DataValidator(),
|
||||
deduplicator=DeduplicationService(),
|
||||
on_analysis_triggered=self._on_analysis_triggered
|
||||
)
|
||||
|
||||
def _init_analyzer(self):
|
||||
"""初始化分析器"""
|
||||
# 根据配置选择 LLM 客户端
|
||||
llm_client = self._create_llm_client()
|
||||
|
||||
self.analyzer = ResumeAnalyzer(
|
||||
llm_client=llm_client,
|
||||
schema_service=EvaluationSchemaService()
|
||||
)
|
||||
|
||||
def _create_llm_client(self) -> LLMClient:
|
||||
"""创建 LLM 客户端"""
|
||||
provider = self.settings.llm.provider.lower()
|
||||
|
||||
if provider == "openai":
|
||||
if self.settings.llm.api_key:
|
||||
return OpenAIClient(
|
||||
api_key=self.settings.llm.api_key,
|
||||
model=self.settings.llm.model,
|
||||
base_url=self.settings.llm.base_url,
|
||||
temperature=self.settings.llm.temperature,
|
||||
max_tokens=self.settings.llm.max_tokens
|
||||
)
|
||||
else:
|
||||
print("Warning: OpenAI API key not configured, using mock client")
|
||||
return MockLLMClient()
|
||||
|
||||
elif provider == "mock":
|
||||
return MockLLMClient()
|
||||
|
||||
else:
|
||||
print(f"Warning: Unknown LLM provider '{provider}', using mock client")
|
||||
return MockLLMClient()
|
||||
|
||||
def _init_notification_service(self):
|
||||
"""初始化通知服务"""
|
||||
notification_service = NotificationService()
|
||||
|
||||
# 企业微信
|
||||
if self.settings.notification.wechat_work_webhook:
|
||||
mentioned_list = None
|
||||
if self.settings.notification.wechat_work_mentioned:
|
||||
mentioned_list = [
|
||||
m.strip()
|
||||
for m in self.settings.notification.wechat_work_mentioned.split(",")
|
||||
]
|
||||
|
||||
channel = WeChatWorkChannel(
|
||||
webhook_url=self.settings.notification.wechat_work_webhook,
|
||||
mentioned_list=mentioned_list
|
||||
)
|
||||
notification_service.register_channel(channel)
|
||||
print("WeChat Work channel registered")
|
||||
|
||||
# 钉钉
|
||||
if self.settings.notification.dingtalk_webhook:
|
||||
at_mobiles = None
|
||||
if self.settings.notification.dingtalk_at_mobiles:
|
||||
at_mobiles = [
|
||||
m.strip()
|
||||
for m in self.settings.notification.dingtalk_at_mobiles.split(",")
|
||||
]
|
||||
|
||||
channel = DingTalkChannel(
|
||||
webhook_url=self.settings.notification.dingtalk_webhook,
|
||||
secret=self.settings.notification.dingtalk_secret,
|
||||
at_mobiles=at_mobiles
|
||||
)
|
||||
notification_service.register_channel(channel)
|
||||
print("DingTalk channel registered")
|
||||
|
||||
# 邮件
|
||||
if (self.settings.notification.email_smtp_host and
|
||||
self.settings.notification.email_username):
|
||||
to_addrs = []
|
||||
if self.settings.notification.email_to:
|
||||
to_addrs = [
|
||||
addr.strip()
|
||||
for addr in self.settings.notification.email_to.split(",")
|
||||
]
|
||||
|
||||
if to_addrs:
|
||||
channel = EmailChannel(
|
||||
smtp_host=self.settings.notification.email_smtp_host,
|
||||
smtp_port=self.settings.notification.email_smtp_port,
|
||||
username=self.settings.notification.email_username,
|
||||
password=self.settings.notification.email_password or "",
|
||||
from_addr=self.settings.notification.email_from or self.settings.notification.email_username,
|
||||
to_addrs=to_addrs
|
||||
)
|
||||
notification_service.register_channel(channel)
|
||||
print("Email channel registered")
|
||||
|
||||
self.notification_service = notification_service
|
||||
|
||||
def _on_analysis_triggered(self, candidate_id: str):
|
||||
"""分析触发回调"""
|
||||
# 可以在这里触发异步分析任务
|
||||
print(f"Analysis triggered for candidate: {candidate_id}")
|
||||
|
||||
async def crawl_and_ingest(
|
||||
self,
|
||||
source: CandidateSource,
|
||||
job_id: str,
|
||||
page: int = 1
|
||||
):
|
||||
"""
|
||||
爬取并入库候选人
|
||||
|
||||
Args:
|
||||
source: 数据来源
|
||||
job_id: 职位ID
|
||||
page: 页码
|
||||
"""
|
||||
crawler = self.crawler_factory.get_crawler(source)
|
||||
if not crawler:
|
||||
print(f"No crawler registered for source: {source}")
|
||||
return
|
||||
|
||||
# 获取候选人列表
|
||||
candidates = crawler.get_candidates(job_id, page=page)
|
||||
print(f"Found {len(candidates)} candidates from {source.value}")
|
||||
|
||||
for candidate in candidates:
|
||||
# 获取简历详情
|
||||
resume = crawler.get_resume_detail(candidate)
|
||||
if not resume:
|
||||
print(f"Failed to get resume for {candidate.name}")
|
||||
continue
|
||||
|
||||
# 构建原始数据
|
||||
raw_data = {
|
||||
"geekId": candidate.source_id,
|
||||
"name": candidate.name,
|
||||
"phone": candidate.phone,
|
||||
"email": candidate.email,
|
||||
"age": candidate.age,
|
||||
"gender": candidate.gender,
|
||||
"company": candidate.current_company,
|
||||
"position": candidate.current_position,
|
||||
"workYears": candidate.work_years,
|
||||
"education": candidate.education,
|
||||
"school": candidate.school,
|
||||
"resumeText": resume.raw_content,
|
||||
}
|
||||
|
||||
# 入库
|
||||
result = self.ingestion_service.ingest(source, raw_data)
|
||||
print(f"Ingestion result for {candidate.name}: {result.message}")
|
||||
|
||||
if result.success and result.candidate_id:
|
||||
# 触发分析
|
||||
await self._analyze_and_notify(result.candidate_id, resume)
|
||||
|
||||
async def _analyze_and_notify(self, candidate_id: str, resume):
|
||||
"""分析并通知"""
|
||||
try:
|
||||
# 分析简历
|
||||
evaluation = await self.analyzer.analyze(
|
||||
candidate_id=candidate_id,
|
||||
resume=resume
|
||||
)
|
||||
print(f"Analysis completed for {candidate_id}, score: {evaluation.overall_score}")
|
||||
|
||||
# 发送通知
|
||||
if self.notification_service:
|
||||
# 获取候选人信息(这里简化处理)
|
||||
from .domain.candidate import Candidate
|
||||
candidate = Candidate(
|
||||
id=candidate_id,
|
||||
name="候选人", # 实际应该从仓库获取
|
||||
source=CandidateSource.BOSS
|
||||
)
|
||||
|
||||
result = await self.notification_service.notify(
|
||||
candidate=candidate,
|
||||
evaluation=evaluation
|
||||
)
|
||||
print(f"Notification result: {result.message}")
|
||||
|
||||
except Exception as e:
|
||||
print(f"Failed to analyze and notify: {e}")
|
||||
|
||||
|
||||
# 全局应用实例
|
||||
_app: Optional[HRAgentApplication] = None
|
||||
|
||||
|
||||
def get_app() -> HRAgentApplication:
|
||||
"""获取应用实例(单例)"""
|
||||
global _app
|
||||
if _app is None:
|
||||
_app = HRAgentApplication()
|
||||
_app.initialize()
|
||||
return _app
|
||||
|
||||
|
||||
async def main():
|
||||
"""主函数"""
|
||||
app = get_app()
|
||||
|
||||
# 示例:爬取并入库
|
||||
# await app.crawl_and_ingest(
|
||||
# source=CandidateSource.BOSS,
|
||||
# job_id="your_job_id"
|
||||
# )
|
||||
|
||||
print("HR Agent is running...")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
@@ -0,0 +1 @@
|
||||
"""Mapper layer - Data access"""
|
||||
@@ -0,0 +1 @@
|
||||
"""Service layer - Business logic"""
|
||||
@@ -0,0 +1,16 @@
|
||||
"""Analysis service layer - AI-powered resume analysis"""
|
||||
|
||||
from .evaluation_schema import EvaluationSchemaService
|
||||
from .resume_analyzer import ResumeAnalyzer
|
||||
from .scoring_engine import ScoringEngine
|
||||
from .prompt_builder import PromptBuilder
|
||||
from .llm_client import LLMClient, OpenAIClient
|
||||
|
||||
__all__ = [
|
||||
"EvaluationSchemaService",
|
||||
"ResumeAnalyzer",
|
||||
"ScoringEngine",
|
||||
"PromptBuilder",
|
||||
"LLMClient",
|
||||
"OpenAIClient",
|
||||
]
|
||||
@@ -0,0 +1,282 @@
|
||||
"""Evaluation schema service - Manage evaluation schemas"""
|
||||
from typing import List, Optional, Dict, Any
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ...domain.evaluation import EvaluationSchema, Dimension
|
||||
from ...domain.enums import Recommendation
|
||||
|
||||
|
||||
@dataclass
|
||||
class DefaultSchemas:
|
||||
"""默认评价方案"""
|
||||
|
||||
@staticmethod
|
||||
def java_backend() -> EvaluationSchema:
|
||||
"""Java后端工程师评价方案"""
|
||||
return EvaluationSchema(
|
||||
id="java_backend",
|
||||
name="Java后端工程师评价方案",
|
||||
description="针对Java后端开发岗位的综合评价方案",
|
||||
dimensions=[
|
||||
Dimension(
|
||||
id="tech_capability",
|
||||
name="技术能力",
|
||||
description="Java技术栈掌握程度",
|
||||
criteria=[
|
||||
"Java基础扎实程度",
|
||||
"Spring生态熟悉度",
|
||||
"数据库设计与优化",
|
||||
"分布式系统经验"
|
||||
]
|
||||
),
|
||||
Dimension(
|
||||
id="project_exp",
|
||||
name="项目经验",
|
||||
description="项目经历的丰富度和质量",
|
||||
criteria=[
|
||||
"项目复杂度",
|
||||
"承担角色重要性",
|
||||
"技术挑战解决能力"
|
||||
]
|
||||
),
|
||||
Dimension(
|
||||
id="learning_ability",
|
||||
name="学习能力",
|
||||
description="学习新技术和适应新环境的能力",
|
||||
criteria=[
|
||||
"技术广度",
|
||||
"新技术掌握速度",
|
||||
"自我驱动学习"
|
||||
]
|
||||
),
|
||||
Dimension(
|
||||
id="communication",
|
||||
name="沟通协作",
|
||||
description="团队协作和沟通能力",
|
||||
criteria=[
|
||||
"跨团队协作经验",
|
||||
"技术文档能力",
|
||||
"问题表达能力"
|
||||
]
|
||||
),
|
||||
Dimension(
|
||||
id="stability",
|
||||
name="稳定性",
|
||||
description="职业稳定性和忠诚度",
|
||||
criteria=[
|
||||
"平均在职时长",
|
||||
"跳槽频率",
|
||||
"职业发展规划清晰度"
|
||||
]
|
||||
)
|
||||
],
|
||||
weights={
|
||||
"tech_capability": 0.35,
|
||||
"project_exp": 0.25,
|
||||
"learning_ability": 0.15,
|
||||
"communication": 0.15,
|
||||
"stability": 0.10
|
||||
},
|
||||
is_default=True
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def frontend() -> EvaluationSchema:
|
||||
"""前端工程师评价方案"""
|
||||
return EvaluationSchema(
|
||||
id="frontend",
|
||||
name="前端工程师评价方案",
|
||||
description="针对前端开发岗位的综合评价方案",
|
||||
dimensions=[
|
||||
Dimension(
|
||||
id="tech_capability",
|
||||
name="技术能力",
|
||||
description="前端技术栈掌握程度",
|
||||
criteria=[
|
||||
"JavaScript/TypeScript熟练度",
|
||||
"主流框架掌握(Vue/React/Angular)",
|
||||
"前端工程化经验",
|
||||
"性能优化能力"
|
||||
]
|
||||
),
|
||||
Dimension(
|
||||
id="ui_ux_sense",
|
||||
name="UI/UX感知",
|
||||
description="对界面设计和用户体验的理解",
|
||||
criteria=[
|
||||
"设计还原度",
|
||||
"交互体验意识",
|
||||
"响应式设计经验"
|
||||
]
|
||||
),
|
||||
Dimension(
|
||||
id="project_exp",
|
||||
name="项目经验",
|
||||
description="项目经历的丰富度和质量"
|
||||
),
|
||||
Dimension(
|
||||
id="learning_ability",
|
||||
name="学习能力",
|
||||
description="学习新技术的能力"
|
||||
),
|
||||
Dimension(
|
||||
id="communication",
|
||||
name="沟通协作",
|
||||
description="团队协作能力"
|
||||
)
|
||||
],
|
||||
weights={
|
||||
"tech_capability": 0.30,
|
||||
"ui_ux_sense": 0.20,
|
||||
"project_exp": 0.25,
|
||||
"learning_ability": 0.15,
|
||||
"communication": 0.10
|
||||
}
|
||||
)
|
||||
|
||||
@staticmethod
|
||||
def general() -> EvaluationSchema:
|
||||
"""通用评价方案"""
|
||||
return EvaluationSchema(
|
||||
id="general",
|
||||
name="通用评价方案",
|
||||
description="适用于各类岗位的通用评价方案",
|
||||
dimensions=[
|
||||
Dimension(
|
||||
id="professional",
|
||||
name="专业能力",
|
||||
description="岗位相关专业技能水平"
|
||||
),
|
||||
Dimension(
|
||||
id="experience",
|
||||
name="工作经验",
|
||||
description="相关工作经验丰富度"
|
||||
),
|
||||
Dimension(
|
||||
id="education",
|
||||
name="教育背景",
|
||||
description="学历和专业匹配度"
|
||||
),
|
||||
Dimension(
|
||||
id="potential",
|
||||
name="发展潜力",
|
||||
description="未来成长空间"
|
||||
),
|
||||
Dimension(
|
||||
id="culture_fit",
|
||||
name="文化匹配",
|
||||
description="与企业文化的匹配度"
|
||||
)
|
||||
],
|
||||
weights={
|
||||
"professional": 0.30,
|
||||
"experience": 0.25,
|
||||
"education": 0.15,
|
||||
"potential": 0.15,
|
||||
"culture_fit": 0.15
|
||||
},
|
||||
is_default=True
|
||||
)
|
||||
|
||||
|
||||
class EvaluationSchemaService:
|
||||
"""
|
||||
评价方案服务
|
||||
|
||||
管理评价方案的 CRUD 操作
|
||||
"""
|
||||
|
||||
def __init__(self, repository=None):
|
||||
"""
|
||||
初始化评价方案服务
|
||||
|
||||
Args:
|
||||
repository: 评价方案数据访问接口
|
||||
"""
|
||||
self.repository = repository
|
||||
self._default_schemas: Dict[str, EvaluationSchema] = {}
|
||||
self._init_default_schemas()
|
||||
|
||||
def _init_default_schemas(self):
|
||||
"""初始化默认评价方案"""
|
||||
defaults = [
|
||||
DefaultSchemas.general(),
|
||||
DefaultSchemas.java_backend(),
|
||||
DefaultSchemas.frontend(),
|
||||
]
|
||||
for schema in defaults:
|
||||
self._default_schemas[schema.id] = schema
|
||||
|
||||
def get_schema(self, schema_id: str) -> Optional[EvaluationSchema]:
|
||||
"""
|
||||
获取评价方案
|
||||
|
||||
Args:
|
||||
schema_id: 方案ID
|
||||
|
||||
Returns:
|
||||
评价方案,如果不存在返回 None
|
||||
"""
|
||||
# 先查默认方案
|
||||
if schema_id in self._default_schemas:
|
||||
return self._default_schemas[schema_id]
|
||||
|
||||
# 再查数据库
|
||||
if self.repository:
|
||||
return self.repository.get_by_id(schema_id)
|
||||
|
||||
return None
|
||||
|
||||
def get_default_schema(self) -> EvaluationSchema:
|
||||
"""获取默认评价方案"""
|
||||
for schema in self._default_schemas.values():
|
||||
if schema.is_default:
|
||||
return schema
|
||||
|
||||
# 如果没有标记为默认的,返回第一个
|
||||
return next(iter(self._default_schemas.values()))
|
||||
|
||||
def list_schemas(self) -> List[EvaluationSchema]:
|
||||
"""获取所有评价方案"""
|
||||
schemas = list(self._default_schemas.values())
|
||||
|
||||
if self.repository:
|
||||
db_schemas = self.repository.list_all()
|
||||
# 合并,数据库中的覆盖默认的
|
||||
schema_dict = {s.id: s for s in schemas}
|
||||
for s in db_schemas:
|
||||
schema_dict[s.id] = s
|
||||
schemas = list(schema_dict.values())
|
||||
|
||||
return schemas
|
||||
|
||||
def create_schema(self, schema: EvaluationSchema) -> EvaluationSchema:
|
||||
"""创建评价方案"""
|
||||
if self.repository:
|
||||
return self.repository.save(schema)
|
||||
|
||||
# 如果没有仓库,保存到内存
|
||||
self._default_schemas[schema.id] = schema
|
||||
return schema
|
||||
|
||||
def update_schema(self, schema: EvaluationSchema) -> Optional[EvaluationSchema]:
|
||||
"""更新评价方案"""
|
||||
if self.repository:
|
||||
return self.repository.update(schema)
|
||||
|
||||
if schema.id in self._default_schemas:
|
||||
self._default_schemas[schema.id] = schema
|
||||
return schema
|
||||
|
||||
return None
|
||||
|
||||
def delete_schema(self, schema_id: str) -> bool:
|
||||
"""删除评价方案"""
|
||||
# 不能删除默认方案
|
||||
if schema_id in self._default_schemas:
|
||||
return False
|
||||
|
||||
if self.repository:
|
||||
return self.repository.delete(schema_id)
|
||||
|
||||
return False
|
||||
@@ -0,0 +1,170 @@
|
||||
"""LLM client for resume analysis"""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import Optional, Dict, Any, List
|
||||
import json
|
||||
|
||||
|
||||
class LLMClient(ABC):
|
||||
"""LLM 客户端抽象基类"""
|
||||
|
||||
@abstractmethod
|
||||
async def analyze(self, prompt: str, **kwargs) -> str:
|
||||
"""
|
||||
发送分析请求
|
||||
|
||||
Args:
|
||||
prompt: 提示词
|
||||
**kwargs: 额外参数
|
||||
|
||||
Returns:
|
||||
LLM 响应文本
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def is_available(self) -> bool:
|
||||
"""检查客户端是否可用"""
|
||||
pass
|
||||
|
||||
|
||||
class OpenAIClient(LLMClient):
|
||||
"""OpenAI API 客户端"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
model: str = "gpt-4",
|
||||
base_url: Optional[str] = None,
|
||||
temperature: float = 0.7,
|
||||
max_tokens: int = 2000
|
||||
):
|
||||
"""
|
||||
初始化 OpenAI 客户端
|
||||
|
||||
Args:
|
||||
api_key: API 密钥
|
||||
model: 模型名称
|
||||
base_url: 自定义 API 地址(用于兼容其他服务)
|
||||
temperature: 温度参数
|
||||
max_tokens: 最大 token 数
|
||||
"""
|
||||
self.api_key = api_key
|
||||
self.model = model
|
||||
self.base_url = base_url
|
||||
self.temperature = temperature
|
||||
self.max_tokens = max_tokens
|
||||
self._client = None
|
||||
|
||||
def _get_client(self):
|
||||
"""获取或创建客户端实例"""
|
||||
if self._client is None:
|
||||
try:
|
||||
from openai import AsyncOpenAI
|
||||
self._client = AsyncOpenAI(
|
||||
api_key=self.api_key,
|
||||
base_url=self.base_url
|
||||
)
|
||||
except ImportError:
|
||||
raise ImportError("openai package is required. Install with: pip install openai")
|
||||
return self._client
|
||||
|
||||
async def analyze(self, prompt: str, **kwargs) -> str:
|
||||
"""发送分析请求"""
|
||||
client = self._get_client()
|
||||
|
||||
messages = [
|
||||
{"role": "system", "content": "你是一个专业的简历分析专家,擅长评估候选人的能力和潜力。"},
|
||||
{"role": "user", "content": prompt}
|
||||
]
|
||||
|
||||
response = await client.chat.completions.create(
|
||||
model=kwargs.get("model", self.model),
|
||||
messages=messages,
|
||||
temperature=kwargs.get("temperature", self.temperature),
|
||||
max_tokens=kwargs.get("max_tokens", self.max_tokens)
|
||||
)
|
||||
|
||||
return response.choices[0].message.content
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""检查客户端是否可用"""
|
||||
try:
|
||||
from openai import AsyncOpenAI
|
||||
return bool(self.api_key)
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
class ClaudeClient(LLMClient):
|
||||
"""Claude API 客户端"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
model: str = "claude-3-sonnet-20240229",
|
||||
max_tokens: int = 2000
|
||||
):
|
||||
self.api_key = api_key
|
||||
self.model = model
|
||||
self.max_tokens = max_tokens
|
||||
self._client = None
|
||||
|
||||
def _get_client(self):
|
||||
"""获取或创建客户端实例"""
|
||||
if self._client is None:
|
||||
try:
|
||||
import anthropic
|
||||
self._client = anthropic.AsyncAnthropic(api_key=self.api_key)
|
||||
except ImportError:
|
||||
raise ImportError("anthropic package is required. Install with: pip install anthropic")
|
||||
return self._client
|
||||
|
||||
async def analyze(self, prompt: str, **kwargs) -> str:
|
||||
"""发送分析请求"""
|
||||
client = self._get_client()
|
||||
|
||||
response = await client.messages.create(
|
||||
model=kwargs.get("model", self.model),
|
||||
max_tokens=kwargs.get("max_tokens", self.max_tokens),
|
||||
messages=[{"role": "user", "content": prompt}]
|
||||
)
|
||||
|
||||
return response.content[0].text
|
||||
|
||||
def is_available(self) -> bool:
|
||||
"""检查客户端是否可用"""
|
||||
try:
|
||||
import anthropic
|
||||
return bool(self.api_key)
|
||||
except ImportError:
|
||||
return False
|
||||
|
||||
|
||||
class MockLLMClient(LLMClient):
|
||||
"""模拟 LLM 客户端(用于测试)"""
|
||||
|
||||
def __init__(self, response_template: Optional[str] = None):
|
||||
self.response_template = response_template or self._default_response()
|
||||
|
||||
async def analyze(self, prompt: str, **kwargs) -> str:
|
||||
"""返回模拟响应"""
|
||||
return self.response_template
|
||||
|
||||
def is_available(self) -> bool:
|
||||
return True
|
||||
|
||||
def _default_response(self) -> str:
|
||||
"""默认模拟响应"""
|
||||
return json.dumps({
|
||||
"overall_score": 85,
|
||||
"dimension_scores": [
|
||||
{"dimension_id": "tech", "score": 90, "comment": "技术能力优秀"},
|
||||
{"dimension_id": "experience", "score": 80, "comment": "经验丰富"},
|
||||
{"dimension_id": "education", "score": 85, "comment": "学历良好"}
|
||||
],
|
||||
"tags": ["Java", "Spring", "微服务", "5年经验"],
|
||||
"summary": "该候选人技术能力优秀,经验丰富,是一个不错的候选人。",
|
||||
"strengths": ["技术扎实", "项目经验丰富", "学习能力强"],
|
||||
"weaknesses": ["管理经验较少"],
|
||||
"recommendation": "recommend"
|
||||
}, ensure_ascii=False)
|
||||
@@ -0,0 +1,221 @@
|
||||
"""Prompt builder for resume analysis"""
|
||||
from typing import Optional, List, Dict, Any
|
||||
|
||||
from ...domain.resume import ResumeParsed
|
||||
from ...domain.evaluation import EvaluationSchema, Dimension
|
||||
from ...domain.job import Job
|
||||
|
||||
|
||||
class PromptBuilder:
|
||||
"""
|
||||
提示词构建器
|
||||
|
||||
根据简历内容、评价方案和职位要求构建 LLM 提示词
|
||||
"""
|
||||
|
||||
DEFAULT_TEMPLATE = """
|
||||
你是一位专业的简历分析专家。请根据以下信息对候选人进行全面评估。
|
||||
|
||||
## 评价方案
|
||||
方案名称:{schema_name}
|
||||
方案描述:{schema_description}
|
||||
|
||||
## 评价维度
|
||||
{dimensions}
|
||||
|
||||
## 职位要求
|
||||
{job_requirements}
|
||||
|
||||
## 候选人简历
|
||||
{resume_content}
|
||||
|
||||
## 分析要求
|
||||
请按照以下 JSON 格式输出分析结果:
|
||||
```json
|
||||
{{
|
||||
"overall_score": <综合评分 0-100>,
|
||||
"dimension_scores": [
|
||||
{{
|
||||
"dimension_id": "<维度ID>",
|
||||
"score": <该维度评分 0-100>,
|
||||
"comment": "<该维度评价说明>"
|
||||
}}
|
||||
],
|
||||
"tags": ["<标签1>", "<标签2>", ...],
|
||||
"summary": "<综合评价摘要,100字以内>",
|
||||
"strengths": ["<优势1>", "<优势2>", ...],
|
||||
"weaknesses": ["<不足1>", "<不足2>", ...],
|
||||
"recommendation": "<推荐意见: strong_recommend/recommend/consider/not_recommend>"
|
||||
}}
|
||||
```
|
||||
|
||||
注意:
|
||||
1. 评分要客观公正,基于简历实际内容
|
||||
2. 标签要简洁准确,体现候选人核心特点
|
||||
3. 优势和不足要具体,避免空泛描述
|
||||
4. 推荐意见要综合考虑各维度评分
|
||||
"""
|
||||
|
||||
def __init__(self, template: Optional[str] = None):
|
||||
"""
|
||||
初始化提示词构建器
|
||||
|
||||
Args:
|
||||
template: 自定义提示词模板
|
||||
"""
|
||||
self.template = template or self.DEFAULT_TEMPLATE
|
||||
|
||||
def build(
|
||||
self,
|
||||
resume: ResumeParsed,
|
||||
schema: EvaluationSchema,
|
||||
job: Optional[Job] = None
|
||||
) -> str:
|
||||
"""
|
||||
构建提示词
|
||||
|
||||
Args:
|
||||
resume: 解析后的简历内容
|
||||
schema: 评价方案
|
||||
job: 关联职位(可选)
|
||||
|
||||
Returns:
|
||||
完整的提示词
|
||||
"""
|
||||
# 构建维度描述
|
||||
dimensions_text = self._build_dimensions(schema.dimensions)
|
||||
|
||||
# 构建职位要求描述
|
||||
job_requirements_text = self._build_job_requirements(job)
|
||||
|
||||
# 构建简历内容描述
|
||||
resume_content_text = self._build_resume_content(resume)
|
||||
|
||||
# 填充模板
|
||||
return self.template.format(
|
||||
schema_name=schema.name,
|
||||
schema_description=schema.description or "无",
|
||||
dimensions=dimensions_text,
|
||||
job_requirements=job_requirements_text,
|
||||
resume_content=resume_content_text
|
||||
)
|
||||
|
||||
def _build_dimensions(self, dimensions: List[Dimension]) -> str:
|
||||
"""构建维度描述"""
|
||||
if not dimensions:
|
||||
return "使用默认维度进行评估"
|
||||
|
||||
lines = []
|
||||
for dim in dimensions:
|
||||
line = f"- {dim.name}(ID: {dim.id})"
|
||||
if dim.description:
|
||||
line += f":{dim.description}"
|
||||
if dim.criteria:
|
||||
line += f"\n 评价标准:{', '.join(dim.criteria)}"
|
||||
lines.append(line)
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _build_job_requirements(self, job: Optional[Job]) -> str:
|
||||
"""构建职位要求描述"""
|
||||
if not job:
|
||||
return "无特定职位要求"
|
||||
|
||||
lines = [f"职位:{job.title}"]
|
||||
|
||||
if job.location:
|
||||
lines.append(f"地点:{job.location}")
|
||||
|
||||
if job.salary_min or job.salary_max:
|
||||
salary = f"{job.salary_min or '?'}-{job.salary_max or '?'}K"
|
||||
lines.append(f"薪资范围:{salary}")
|
||||
|
||||
if job.requirements:
|
||||
if job.requirements.min_work_years:
|
||||
lines.append(f"最低工作年限:{job.requirements.min_work_years}年")
|
||||
if job.requirements.education:
|
||||
lines.append(f"学历要求:{job.requirements.education}")
|
||||
if job.requirements.skills:
|
||||
lines.append(f"技能要求:{', '.join(job.requirements.skills)}")
|
||||
if job.requirements.description:
|
||||
lines.append(f"其他要求:{job.requirements.description}")
|
||||
|
||||
if job.description:
|
||||
lines.append(f"\n职位描述:\n{job.description}")
|
||||
|
||||
return "\n".join(lines)
|
||||
|
||||
def _build_resume_content(self, resume: ResumeParsed) -> str:
|
||||
"""构建简历内容描述"""
|
||||
lines = []
|
||||
|
||||
# 基本信息
|
||||
lines.append("### 基本信息")
|
||||
if resume.name:
|
||||
lines.append(f"姓名:{resume.name}")
|
||||
if resume.gender:
|
||||
lines.append(f"性别:{resume.gender}")
|
||||
if resume.age:
|
||||
lines.append(f"年龄:{resume.age}")
|
||||
if resume.location:
|
||||
lines.append(f"所在地:{resume.location}")
|
||||
if resume.current_company:
|
||||
lines.append(f"当前公司:{resume.current_company}")
|
||||
if resume.current_position:
|
||||
lines.append(f"当前职位:{resume.current_position}")
|
||||
if resume.work_years:
|
||||
lines.append(f"工作年限:{resume.work_years}年")
|
||||
if resume.education:
|
||||
lines.append(f"学历:{resume.education}")
|
||||
if resume.school:
|
||||
lines.append(f"毕业院校:{resume.school}")
|
||||
|
||||
# 工作经历
|
||||
if resume.work_experiences:
|
||||
lines.append("\n### 工作经历")
|
||||
for exp in resume.work_experiences:
|
||||
line = f"- {exp.company} | {exp.position}"
|
||||
if exp.start_date:
|
||||
line += f" ({exp.start_date}"
|
||||
if exp.end_date:
|
||||
line += f" - {exp.end_date}"
|
||||
line += ")"
|
||||
lines.append(line)
|
||||
if exp.description:
|
||||
lines.append(f" {exp.description}")
|
||||
|
||||
# 项目经历
|
||||
if resume.project_experiences:
|
||||
lines.append("\n### 项目经历")
|
||||
for exp in resume.project_experiences:
|
||||
line = f"- {exp.name}"
|
||||
if exp.role:
|
||||
line += f" | {exp.role}"
|
||||
lines.append(line)
|
||||
if exp.description:
|
||||
lines.append(f" {exp.description}")
|
||||
|
||||
# 教育经历
|
||||
if resume.education_experiences:
|
||||
lines.append("\n### 教育经历")
|
||||
for exp in resume.education_experiences:
|
||||
line = f"- {exp.school}"
|
||||
if exp.major:
|
||||
line += f" | {exp.major}"
|
||||
if exp.degree:
|
||||
line += f" | {exp.degree}"
|
||||
lines.append(line)
|
||||
|
||||
# 技能
|
||||
if resume.skills:
|
||||
lines.append(f"\n### 技能\n{', '.join(resume.skills)}")
|
||||
|
||||
# 自我评价
|
||||
if resume.self_evaluation:
|
||||
lines.append(f"\n### 自我评价\n{resume.self_evaluation}")
|
||||
|
||||
# 原始数据
|
||||
if resume.raw_data and "full_text" in resume.raw_data:
|
||||
lines.append(f"\n### 完整简历文本\n{resume.raw_data['full_text']}")
|
||||
|
||||
return "\n".join(lines) if lines else "(简历内容为空)"
|
||||
@@ -0,0 +1,251 @@
|
||||
"""Resume analyzer - AI-powered resume analysis"""
|
||||
import json
|
||||
import re
|
||||
from typing import Optional, List, Dict, Any
|
||||
from datetime import datetime
|
||||
|
||||
from .llm_client import LLMClient
|
||||
from .prompt_builder import PromptBuilder
|
||||
from .scoring_engine import ScoringEngine
|
||||
from .evaluation_schema import EvaluationSchemaService
|
||||
from ...domain.evaluation import (
|
||||
Evaluation, EvaluationSchema, DimensionScore
|
||||
)
|
||||
from ...domain.enums import Recommendation
|
||||
from ...domain.resume import Resume
|
||||
|
||||
|
||||
class ResumeAnalyzer:
|
||||
"""
|
||||
简历分析器
|
||||
|
||||
基于 LLM 对简历进行智能分析,生成评价结果
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
llm_client: LLMClient,
|
||||
schema_service: Optional[EvaluationSchemaService] = None,
|
||||
prompt_builder: Optional[PromptBuilder] = None,
|
||||
scoring_engine: Optional[ScoringEngine] = None,
|
||||
evaluation_repo=None
|
||||
):
|
||||
"""
|
||||
初始化简历分析器
|
||||
|
||||
Args:
|
||||
llm_client: LLM 客户端
|
||||
schema_service: 评价方案服务
|
||||
prompt_builder: 提示词构建器
|
||||
scoring_engine: 评分引擎
|
||||
evaluation_repo: 评价记录仓库
|
||||
"""
|
||||
self.llm = llm_client
|
||||
self.schema_service = schema_service or EvaluationSchemaService()
|
||||
self.prompt_builder = prompt_builder or PromptBuilder()
|
||||
self.scoring_engine = scoring_engine or ScoringEngine()
|
||||
self.evaluation_repo = evaluation_repo
|
||||
|
||||
async def analyze(
|
||||
self,
|
||||
candidate_id: str,
|
||||
resume: Resume,
|
||||
schema_id: Optional[str] = None,
|
||||
job_id: Optional[str] = None
|
||||
) -> Evaluation:
|
||||
"""
|
||||
分析候选人简历
|
||||
|
||||
Args:
|
||||
candidate_id: 候选人ID
|
||||
resume: 简历对象
|
||||
schema_id: 评价方案ID,不传使用默认方案
|
||||
job_id: 关联职位ID
|
||||
|
||||
Returns:
|
||||
评价结果
|
||||
"""
|
||||
# 1. 获取评价方案
|
||||
schema = self._get_schema(schema_id)
|
||||
|
||||
# 2. 构建提示词
|
||||
prompt = self.prompt_builder.build(
|
||||
resume=resume.parsed_content,
|
||||
schema=schema,
|
||||
job=None # TODO: 获取职位信息
|
||||
)
|
||||
|
||||
# 3. 调用 LLM 分析
|
||||
try:
|
||||
response = await self.llm.analyze(prompt)
|
||||
except Exception as e:
|
||||
print(f"LLM analysis failed: {e}")
|
||||
# 返回空评价
|
||||
return self._create_empty_evaluation(candidate_id, schema_id, job_id)
|
||||
|
||||
# 4. 解析响应
|
||||
evaluation_data = self._parse_response(response)
|
||||
|
||||
# 5. 构建评价对象
|
||||
evaluation = self._build_evaluation(
|
||||
candidate_id=candidate_id,
|
||||
schema_id=schema_id or schema.id,
|
||||
job_id=job_id,
|
||||
evaluation_data=evaluation_data,
|
||||
raw_response=response
|
||||
)
|
||||
|
||||
# 6. 保存评价
|
||||
if self.evaluation_repo:
|
||||
evaluation = self.evaluation_repo.save(evaluation)
|
||||
|
||||
return evaluation
|
||||
|
||||
def _get_schema(self, schema_id: Optional[str]) -> EvaluationSchema:
|
||||
"""获取评价方案"""
|
||||
if schema_id:
|
||||
schema = self.schema_service.get_schema(schema_id)
|
||||
if schema:
|
||||
return schema
|
||||
|
||||
return self.schema_service.get_default_schema()
|
||||
|
||||
def _parse_response(self, response: str) -> Dict[str, Any]:
|
||||
"""解析 LLM 响应"""
|
||||
try:
|
||||
# 尝试直接解析 JSON
|
||||
return json.loads(response)
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 尝试从 markdown 代码块中提取 JSON
|
||||
json_match = re.search(r'```(?:json)?\s*(\{.*?\})\s*```', response, re.DOTALL)
|
||||
if json_match:
|
||||
try:
|
||||
return json.loads(json_match.group(1))
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 尝试从文本中提取 JSON
|
||||
json_match = re.search(r'\{.*\}', response, re.DOTALL)
|
||||
if json_match:
|
||||
try:
|
||||
return json.loads(json_match.group())
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# 解析失败,返回空结构
|
||||
print(f"Failed to parse LLM response: {response[:200]}...")
|
||||
return {}
|
||||
|
||||
def _build_evaluation(
|
||||
self,
|
||||
candidate_id: str,
|
||||
schema_id: str,
|
||||
job_id: Optional[str],
|
||||
evaluation_data: Dict[str, Any],
|
||||
raw_response: str
|
||||
) -> Evaluation:
|
||||
"""构建评价对象"""
|
||||
# 解析维度评分
|
||||
dimension_scores = []
|
||||
for ds_data in evaluation_data.get("dimension_scores", []):
|
||||
dimension_scores.append(DimensionScore(
|
||||
dimension_id=ds_data.get("dimension_id", ""),
|
||||
dimension_name=ds_data.get("dimension_name", ""),
|
||||
score=float(ds_data.get("score", 0)),
|
||||
weight=float(ds_data.get("weight", 1.0)),
|
||||
comment=ds_data.get("comment")
|
||||
))
|
||||
|
||||
# 解析推荐意见
|
||||
recommendation_str = evaluation_data.get("recommendation", "")
|
||||
recommendation = self._parse_recommendation(recommendation_str)
|
||||
|
||||
# 获取综合评分
|
||||
overall_score = float(evaluation_data.get("overall_score", 0))
|
||||
|
||||
# 如果 LLM 没有返回综合评分,计算加权得分
|
||||
if overall_score == 0 and dimension_scores:
|
||||
score_result = self.scoring_engine.calculate(dimension_scores)
|
||||
overall_score = score_result.overall_score
|
||||
|
||||
return Evaluation(
|
||||
candidate_id=candidate_id,
|
||||
schema_id=schema_id,
|
||||
job_id=job_id,
|
||||
overall_score=overall_score,
|
||||
dimension_scores=dimension_scores,
|
||||
tags=evaluation_data.get("tags", []),
|
||||
summary=evaluation_data.get("summary"),
|
||||
strengths=evaluation_data.get("strengths", []),
|
||||
weaknesses=evaluation_data.get("weaknesses", []),
|
||||
recommendation=recommendation,
|
||||
raw_response=raw_response
|
||||
)
|
||||
|
||||
def _parse_recommendation(self, value: str) -> Optional[Recommendation]:
|
||||
"""解析推荐意见"""
|
||||
if not value:
|
||||
return None
|
||||
|
||||
value = value.lower().strip()
|
||||
|
||||
mapping = {
|
||||
"strong_recommend": Recommendation.STRONG_RECOMMEND,
|
||||
"strong recommend": Recommendation.STRONG_RECOMMEND,
|
||||
"strong": Recommendation.STRONG_RECOMMEND,
|
||||
"recommend": Recommendation.RECOMMEND,
|
||||
"consider": Recommendation.CONSIDER,
|
||||
"not_recommend": Recommendation.NOT_RECOMMEND,
|
||||
"not recommend": Recommendation.NOT_RECOMMEND,
|
||||
"not": Recommendation.NOT_RECOMMEND,
|
||||
}
|
||||
|
||||
return mapping.get(value)
|
||||
|
||||
def _create_empty_evaluation(
|
||||
self,
|
||||
candidate_id: str,
|
||||
schema_id: Optional[str],
|
||||
job_id: Optional[str]
|
||||
) -> Evaluation:
|
||||
"""创建空评价(分析失败时使用)"""
|
||||
return Evaluation(
|
||||
candidate_id=candidate_id,
|
||||
schema_id=schema_id or "",
|
||||
job_id=job_id,
|
||||
overall_score=0,
|
||||
summary="分析失败,无法生成评价",
|
||||
recommendation=None
|
||||
)
|
||||
|
||||
async def batch_analyze(
|
||||
self,
|
||||
items: List[tuple]
|
||||
) -> List[Evaluation]:
|
||||
"""
|
||||
批量分析
|
||||
|
||||
Args:
|
||||
items: [(candidate_id, resume, schema_id, job_id), ...]
|
||||
|
||||
Returns:
|
||||
评价结果列表
|
||||
"""
|
||||
results = []
|
||||
for item in items:
|
||||
candidate_id, resume, schema_id, job_id = item
|
||||
try:
|
||||
evaluation = await self.analyze(
|
||||
candidate_id=candidate_id,
|
||||
resume=resume,
|
||||
schema_id=schema_id,
|
||||
job_id=job_id
|
||||
)
|
||||
results.append(evaluation)
|
||||
except Exception as e:
|
||||
print(f"Failed to analyze candidate {candidate_id}: {e}")
|
||||
results.append(self._create_empty_evaluation(candidate_id, schema_id, job_id))
|
||||
|
||||
return results
|
||||
@@ -0,0 +1,167 @@
|
||||
"""Scoring engine for evaluation calculation"""
|
||||
from typing import List, Dict, Optional
|
||||
from dataclasses import dataclass
|
||||
|
||||
from ...domain.evaluation import DimensionScore
|
||||
|
||||
|
||||
@dataclass
|
||||
class ScoreResult:
|
||||
"""评分结果"""
|
||||
overall_score: float
|
||||
dimension_scores: List[DimensionScore]
|
||||
weighted_score: float
|
||||
confidence: float # 置信度 0-1
|
||||
|
||||
|
||||
class ScoringEngine:
|
||||
"""
|
||||
评分计算引擎
|
||||
|
||||
负责:
|
||||
1. 计算各维度加权得分
|
||||
2. 计算综合评分
|
||||
3. 计算置信度
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
pass
|
||||
|
||||
def calculate(
|
||||
self,
|
||||
dimension_scores: List[DimensionScore],
|
||||
weights: Optional[Dict[str, float]] = None
|
||||
) -> ScoreResult:
|
||||
"""
|
||||
计算综合评分
|
||||
|
||||
Args:
|
||||
dimension_scores: 各维度评分列表
|
||||
weights: 维度权重,如果不提供则使用评分中的权重
|
||||
|
||||
Returns:
|
||||
评分结果
|
||||
"""
|
||||
if not dimension_scores:
|
||||
return ScoreResult(
|
||||
overall_score=0.0,
|
||||
dimension_scores=[],
|
||||
weighted_score=0.0,
|
||||
confidence=0.0
|
||||
)
|
||||
|
||||
# 计算加权得分
|
||||
weighted_sum = 0.0
|
||||
total_weight = 0.0
|
||||
|
||||
for ds in dimension_scores:
|
||||
weight = weights.get(ds.dimension_id, ds.weight) if weights else ds.weight
|
||||
weighted_sum += ds.score * weight
|
||||
total_weight += weight
|
||||
|
||||
weighted_score = weighted_sum / total_weight if total_weight > 0 else 0.0
|
||||
|
||||
# 计算简单平均分
|
||||
simple_average = sum(ds.score for ds in dimension_scores) / len(dimension_scores)
|
||||
|
||||
# 综合评分(加权分和简单平均的加权组合)
|
||||
overall_score = weighted_score * 0.7 + simple_average * 0.3
|
||||
|
||||
# 计算置信度
|
||||
confidence = self._calculate_confidence(dimension_scores)
|
||||
|
||||
return ScoreResult(
|
||||
overall_score=round(overall_score, 1),
|
||||
dimension_scores=dimension_scores,
|
||||
weighted_score=round(weighted_score, 1),
|
||||
confidence=round(confidence, 2)
|
||||
)
|
||||
|
||||
def _calculate_confidence(self, dimension_scores: List[DimensionScore]) -> float:
|
||||
"""
|
||||
计算置信度
|
||||
|
||||
基于以下因素:
|
||||
1. 评分数量(维度越多置信度越高)
|
||||
2. 评分一致性(分数方差越小置信度越高)
|
||||
"""
|
||||
if not dimension_scores:
|
||||
return 0.0
|
||||
|
||||
# 基于评分数量的基础置信度
|
||||
count_factor = min(len(dimension_scores) / 5, 1.0) # 5个维度为满分
|
||||
|
||||
# 基于评分一致性的置信度
|
||||
scores = [ds.score for ds in dimension_scores]
|
||||
if len(scores) > 1:
|
||||
import statistics
|
||||
try:
|
||||
variance = statistics.variance(scores)
|
||||
# 方差越小,一致性越高
|
||||
consistency_factor = max(0, 1 - variance / 1000)
|
||||
except statistics.StatisticsError:
|
||||
consistency_factor = 1.0
|
||||
else:
|
||||
consistency_factor = 0.5 # 只有一个维度时置信度较低
|
||||
|
||||
# 综合置信度
|
||||
confidence = count_factor * 0.4 + consistency_factor * 0.6
|
||||
|
||||
return min(max(confidence, 0.0), 1.0)
|
||||
|
||||
def normalize_scores(
|
||||
self,
|
||||
dimension_scores: List[DimensionScore],
|
||||
target_min: float = 0,
|
||||
target_max: float = 100
|
||||
) -> List[DimensionScore]:
|
||||
"""
|
||||
归一化评分到目标范围
|
||||
|
||||
Args:
|
||||
dimension_scores: 原始评分
|
||||
target_min: 目标最小值
|
||||
target_max: 目标最大值
|
||||
|
||||
Returns:
|
||||
归一化后的评分
|
||||
"""
|
||||
if not dimension_scores:
|
||||
return []
|
||||
|
||||
scores = [ds.score for ds in dimension_scores]
|
||||
current_min = min(scores)
|
||||
current_max = max(scores)
|
||||
|
||||
if current_max == current_min:
|
||||
# 所有分数相同,直接返回
|
||||
return dimension_scores
|
||||
|
||||
normalized = []
|
||||
for ds in dimension_scores:
|
||||
# 线性归一化
|
||||
normalized_score = target_min + (ds.score - current_min) / (current_max - current_min) * (target_max - target_min)
|
||||
normalized.append(DimensionScore(
|
||||
dimension_id=ds.dimension_id,
|
||||
dimension_name=ds.dimension_name,
|
||||
score=round(normalized_score, 1),
|
||||
weight=ds.weight,
|
||||
comment=ds.comment
|
||||
))
|
||||
|
||||
return normalized
|
||||
|
||||
def rank_candidates(
|
||||
self,
|
||||
candidate_scores: List[tuple]
|
||||
) -> List[tuple]:
|
||||
"""
|
||||
对候选人进行排序
|
||||
|
||||
Args:
|
||||
candidate_scores: [(candidate_id, score), ...]
|
||||
|
||||
Returns:
|
||||
排序后的列表(按分数降序)
|
||||
"""
|
||||
return sorted(candidate_scores, key=lambda x: x[1], reverse=True)
|
||||
@@ -0,0 +1,7 @@
|
||||
"""Crawler service layer"""
|
||||
|
||||
from .base_crawler import BaseCrawler
|
||||
from .boss_crawler import BossCrawler
|
||||
from .crawler_factory import CrawlerFactory
|
||||
|
||||
__all__ = ["BaseCrawler", "BossCrawler", "CrawlerFactory"]
|
||||
@@ -0,0 +1,95 @@
|
||||
"""Base crawler abstract class"""
|
||||
from abc import ABC, abstractmethod
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
from ...domain.candidate import Candidate, CandidateSource
|
||||
from ...domain.resume import Resume
|
||||
from ...domain.job import Job
|
||||
|
||||
|
||||
class BaseCrawler(ABC):
|
||||
"""
|
||||
所有渠道爬虫的抽象基类
|
||||
|
||||
子类需要实现:
|
||||
1. source_type - 返回渠道类型
|
||||
2. get_jobs - 获取职位列表
|
||||
3. get_candidates - 获取候选人列表
|
||||
4. get_resume_detail - 获取候选人简历详情
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def source_type(self) -> CandidateSource:
|
||||
"""返回爬虫对应的渠道类型"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_jobs(self, status: Optional[str] = None) -> List[Job]:
|
||||
"""
|
||||
获取职位列表
|
||||
|
||||
Args:
|
||||
status: 职位状态过滤,如 "active"
|
||||
|
||||
Returns:
|
||||
职位列表
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_candidates(
|
||||
self,
|
||||
job_id: str,
|
||||
page: int = 1,
|
||||
page_size: int = 20
|
||||
) -> List[Candidate]:
|
||||
"""
|
||||
获取指定职位下的候选人列表
|
||||
|
||||
Args:
|
||||
job_id: 职位ID
|
||||
page: 页码,从1开始
|
||||
page_size: 每页数量
|
||||
|
||||
Returns:
|
||||
候选人列表
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def get_resume_detail(self, candidate: Candidate) -> Optional[Resume]:
|
||||
"""
|
||||
获取候选人简历详情
|
||||
|
||||
Args:
|
||||
candidate: 候选人对象
|
||||
|
||||
Returns:
|
||||
简历对象,如果获取失败返回 None
|
||||
"""
|
||||
pass
|
||||
|
||||
def get_candidate_by_id(self, source_id: str) -> Optional[Candidate]:
|
||||
"""
|
||||
根据来源ID获取候选人(可选实现)
|
||||
|
||||
Args:
|
||||
source_id: 来源平台ID
|
||||
|
||||
Returns:
|
||||
候选人对象,如果不存在返回 None
|
||||
"""
|
||||
return None
|
||||
|
||||
def parse_raw_data(self, raw_data: Dict[str, Any]) -> Candidate:
|
||||
"""
|
||||
解析原始数据为候选人对象(可选实现)
|
||||
|
||||
Args:
|
||||
raw_data: 原始API返回数据
|
||||
|
||||
Returns:
|
||||
候选人对象
|
||||
"""
|
||||
raise NotImplementedError("parse_raw_data must be implemented")
|
||||
@@ -0,0 +1,207 @@
|
||||
"""Boss crawler implementation using ylhp-boss-hr SDK"""
|
||||
from decimal import Decimal
|
||||
from typing import List, Optional, Dict, Any
|
||||
import re
|
||||
|
||||
try:
|
||||
from boss import Boss
|
||||
except ImportError:
|
||||
Boss = None
|
||||
|
||||
from .base_crawler import BaseCrawler
|
||||
from ...domain.candidate import (
|
||||
Candidate, CandidateSource, CandidateStatus,
|
||||
SalaryRange, WorkExperience, ProjectExperience, EducationExperience
|
||||
)
|
||||
from ...domain.resume import Resume, ResumeParsed
|
||||
from ...domain.job import Job, JobStatus, JobRequirement
|
||||
from ...domain.enums import Gender
|
||||
|
||||
|
||||
class BossCrawler(BaseCrawler):
|
||||
"""
|
||||
Boss直聘爬虫实现
|
||||
|
||||
基于 ylhp-boss-hr SDK 封装,提供统一的候选人数据获取接口
|
||||
"""
|
||||
|
||||
def __init__(self, wt_token: str):
|
||||
"""
|
||||
初始化 Boss 爬虫
|
||||
|
||||
Args:
|
||||
wt_token: Boss 平台的 wt token
|
||||
"""
|
||||
if Boss is None:
|
||||
raise ImportError("ylhp-boss-hr SDK is not installed")
|
||||
self.client = Boss(wt=wt_token)
|
||||
|
||||
@property
|
||||
def source_type(self) -> CandidateSource:
|
||||
return CandidateSource.BOSS
|
||||
|
||||
def get_jobs(self, status: Optional[str] = None) -> List[Job]:
|
||||
"""获取职位列表"""
|
||||
try:
|
||||
jobs_data = self.client.get_jobs()
|
||||
return [self._parse_job(job_data) for job_data in jobs_data]
|
||||
except Exception as e:
|
||||
print(f"Failed to get jobs from Boss: {e}")
|
||||
return []
|
||||
|
||||
def get_candidates(
|
||||
self,
|
||||
job_id: str,
|
||||
page: int = 1,
|
||||
page_size: int = 20
|
||||
) -> List[Candidate]:
|
||||
"""获取指定职位下的候选人列表"""
|
||||
try:
|
||||
geeks_data = self.client.geek_info(jobid=job_id, page=page)
|
||||
return [self._parse_candidate(geek_data) for geek_data in geeks_data]
|
||||
except Exception as e:
|
||||
print(f"Failed to get candidates from Boss: {e}")
|
||||
return []
|
||||
|
||||
def get_resume_detail(self, candidate: Candidate) -> Optional[Resume]:
|
||||
"""获取候选人简历详情"""
|
||||
try:
|
||||
# 获取候选人详情
|
||||
detail = self.client.get_detail(candidate)
|
||||
|
||||
# 解密简历正文
|
||||
resume_text = self.client.get_detail_text(detail)
|
||||
|
||||
# 解析简历
|
||||
parsed_content = self._parse_resume_text(resume_text)
|
||||
|
||||
return Resume(
|
||||
candidate_id=candidate.id,
|
||||
raw_content=resume_text,
|
||||
parsed_content=parsed_content,
|
||||
version=1
|
||||
)
|
||||
except Exception as e:
|
||||
print(f"Failed to get resume detail from Boss: {e}")
|
||||
return None
|
||||
|
||||
def _parse_job(self, job_data: Any) -> Job:
|
||||
"""解析职位数据"""
|
||||
# 从 SDK 返回的数据中提取职位信息
|
||||
# 注意:具体字段名需要根据 SDK 实际返回调整
|
||||
return Job(
|
||||
source=CandidateSource.BOSS,
|
||||
source_id=getattr(job_data, 'encryptJobId', ''),
|
||||
title=getattr(job_data, 'jobName', ''),
|
||||
location=getattr(job_data, 'cityName', ''),
|
||||
salary_min=self._parse_salary_min(getattr(job_data, 'salary', '')),
|
||||
salary_max=self._parse_salary_max(getattr(job_data, 'salary', '')),
|
||||
status=JobStatus.ACTIVE
|
||||
)
|
||||
|
||||
def _parse_candidate(self, geek_data: Any) -> Candidate:
|
||||
"""解析候选人数据"""
|
||||
# 从 SDK 返回的数据中提取候选人信息
|
||||
source_id = getattr(geek_data, 'geekId', '') or getattr(geek_data, 'encryptGeekId', '')
|
||||
|
||||
# 解析薪资期望
|
||||
salary_str = getattr(geek_data, 'salary', '')
|
||||
salary_range = self._parse_salary_range(salary_str)
|
||||
|
||||
# 解析性别
|
||||
gender = self._parse_gender(getattr(geek_data, 'gender', ''))
|
||||
|
||||
# 解析工作年限
|
||||
work_years = self._parse_work_years(getattr(geek_data, 'workYears', ''))
|
||||
|
||||
return Candidate(
|
||||
source=CandidateSource.BOSS,
|
||||
source_id=str(source_id),
|
||||
name=getattr(geek_data, 'name', ''),
|
||||
gender=gender,
|
||||
age=getattr(geek_data, 'age', None),
|
||||
location=getattr(geek_data, 'location', None),
|
||||
current_company=getattr(geek_data, 'company', None),
|
||||
current_position=getattr(geek_data, 'position', None),
|
||||
work_years=work_years,
|
||||
education=getattr(geek_data, 'education', None),
|
||||
school=getattr(geek_data, 'school', None),
|
||||
salary_expectation=salary_range,
|
||||
status=CandidateStatus.NEW
|
||||
)
|
||||
|
||||
def _parse_resume_text(self, resume_text: str) -> ResumeParsed:
|
||||
"""解析简历文本为结构化数据"""
|
||||
parsed = ResumeParsed()
|
||||
|
||||
# 这里可以实现更复杂的简历解析逻辑
|
||||
# 目前为基础实现,可根据需要扩展
|
||||
|
||||
# 提取基本信息
|
||||
lines = resume_text.split('\n')
|
||||
for line in lines:
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
|
||||
# 提取手机号
|
||||
if not parsed.phone:
|
||||
phone_match = re.search(r'1[3-9]\d{9}', line)
|
||||
if phone_match:
|
||||
parsed.phone = phone_match.group()
|
||||
|
||||
# 提取邮箱
|
||||
if not parsed.email:
|
||||
email_match = re.search(r'[\w.-]+@[\w.-]+\.\w+', line)
|
||||
if email_match:
|
||||
parsed.email = email_match.group()
|
||||
|
||||
parsed.raw_data = {"full_text": resume_text}
|
||||
return parsed
|
||||
|
||||
def _parse_salary_range(self, salary_str: str) -> Optional[SalaryRange]:
|
||||
"""解析薪资范围字符串"""
|
||||
if not salary_str:
|
||||
return None
|
||||
|
||||
# 匹配 "15-25K" 或 "15K-25K" 格式
|
||||
match = re.search(r'(\d+)[\s-]*K?[\s-]*(\d+)?', salary_str, re.IGNORECASE)
|
||||
if match:
|
||||
min_sal = int(match.group(1))
|
||||
max_sal = int(match.group(2)) if match.group(2) else None
|
||||
return SalaryRange(min_salary=min_sal, max_salary=max_sal)
|
||||
|
||||
return None
|
||||
|
||||
def _parse_salary_min(self, salary_str: str) -> Optional[int]:
|
||||
"""解析最低薪资"""
|
||||
salary_range = self._parse_salary_range(salary_str)
|
||||
return salary_range.min_salary if salary_range else None
|
||||
|
||||
def _parse_salary_max(self, salary_str: str) -> Optional[int]:
|
||||
"""解析最高薪资"""
|
||||
salary_range = self._parse_salary_range(salary_str)
|
||||
return salary_range.max_salary if salary_range else None
|
||||
|
||||
def _parse_gender(self, gender_str: str) -> Gender:
|
||||
"""解析性别"""
|
||||
if not gender_str:
|
||||
return Gender.UNKNOWN
|
||||
|
||||
gender_str = str(gender_str).lower()
|
||||
if gender_str in ['男', 'male', 'm', '1']:
|
||||
return Gender.MALE
|
||||
elif gender_str in ['女', 'female', 'f', '2']:
|
||||
return Gender.FEMALE
|
||||
return Gender.UNKNOWN
|
||||
|
||||
def _parse_work_years(self, work_years_str: str) -> Optional[Decimal]:
|
||||
"""解析工作年限"""
|
||||
if not work_years_str:
|
||||
return None
|
||||
|
||||
# 提取数字
|
||||
match = re.search(r'(\d+(?:\.\d+)?)', str(work_years_str))
|
||||
if match:
|
||||
return Decimal(match.group(1))
|
||||
return None
|
||||
@@ -0,0 +1,113 @@
|
||||
"""Crawler factory for managing crawler instances"""
|
||||
from typing import Dict, Optional, Type
|
||||
|
||||
from .base_crawler import BaseCrawler
|
||||
from .boss_crawler import BossCrawler
|
||||
from ...domain.candidate import CandidateSource
|
||||
|
||||
|
||||
class CrawlerFactory:
|
||||
"""
|
||||
爬虫工厂类
|
||||
|
||||
管理所有爬虫实例的注册和获取,支持动态扩展新渠道
|
||||
|
||||
Usage:
|
||||
# 注册爬虫
|
||||
factory = CrawlerFactory()
|
||||
factory.register(CandidateSource.BOSS, BossCrawler(wt_token="xxx"))
|
||||
|
||||
# 获取爬虫
|
||||
boss_crawler = factory.get_crawler(CandidateSource.BOSS)
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._crawlers: Dict[CandidateSource, BaseCrawler] = {}
|
||||
|
||||
def register(self, source: CandidateSource, crawler: BaseCrawler) -> None:
|
||||
"""
|
||||
注册爬虫实例
|
||||
|
||||
Args:
|
||||
source: 渠道类型
|
||||
crawler: 爬虫实例
|
||||
"""
|
||||
if not isinstance(crawler, BaseCrawler):
|
||||
raise ValueError(f"Crawler must be instance of BaseCrawler, got {type(crawler)}")
|
||||
|
||||
if crawler.source_type != source:
|
||||
raise ValueError(
|
||||
f"Crawler source type mismatch: "
|
||||
f"expected {source}, got {crawler.source_type}"
|
||||
)
|
||||
|
||||
self._crawlers[source] = crawler
|
||||
|
||||
def get_crawler(self, source: CandidateSource) -> Optional[BaseCrawler]:
|
||||
"""
|
||||
获取指定渠道的爬虫实例
|
||||
|
||||
Args:
|
||||
source: 渠道类型
|
||||
|
||||
Returns:
|
||||
爬虫实例,如果未注册返回 None
|
||||
"""
|
||||
return self._crawlers.get(source)
|
||||
|
||||
def has_crawler(self, source: CandidateSource) -> bool:
|
||||
"""
|
||||
检查是否已注册指定渠道的爬虫
|
||||
|
||||
Args:
|
||||
source: 渠道类型
|
||||
|
||||
Returns:
|
||||
是否已注册
|
||||
"""
|
||||
return source in self._crawlers
|
||||
|
||||
def unregister(self, source: CandidateSource) -> None:
|
||||
"""
|
||||
注销指定渠道的爬虫
|
||||
|
||||
Args:
|
||||
source: 渠道类型
|
||||
"""
|
||||
if source in self._crawlers:
|
||||
del self._crawlers[source]
|
||||
|
||||
def get_all_crawlers(self) -> Dict[CandidateSource, BaseCrawler]:
|
||||
"""
|
||||
获取所有已注册的爬虫
|
||||
|
||||
Returns:
|
||||
渠道类型到爬虫实例的映射字典
|
||||
"""
|
||||
return self._crawlers.copy()
|
||||
|
||||
def get_registered_sources(self) -> list:
|
||||
"""
|
||||
获取所有已注册的渠道类型
|
||||
|
||||
Returns:
|
||||
渠道类型列表
|
||||
"""
|
||||
return list(self._crawlers.keys())
|
||||
|
||||
|
||||
# 全局爬虫工厂实例
|
||||
crawler_factory = CrawlerFactory()
|
||||
|
||||
|
||||
def create_boss_crawler(wt_token: str) -> BossCrawler:
|
||||
"""
|
||||
创建 Boss 爬虫实例的便捷函数
|
||||
|
||||
Args:
|
||||
wt_token: Boss 平台的 wt token
|
||||
|
||||
Returns:
|
||||
BossCrawler 实例
|
||||
"""
|
||||
return BossCrawler(wt_token=wt_token)
|
||||
@@ -0,0 +1,17 @@
|
||||
"""Ingestion service layer - Unified data ingestion"""
|
||||
|
||||
from .unified_ingestion_service import UnifiedIngestionService, IngestionResult
|
||||
from .data_normalizer import DataNormalizer, NormalizedData
|
||||
from .data_validator import DataValidator, ValidationResult
|
||||
from .deduplication_service import DeduplicationService, DuplicateCheckResult
|
||||
|
||||
__all__ = [
|
||||
"UnifiedIngestionService",
|
||||
"IngestionResult",
|
||||
"DataNormalizer",
|
||||
"NormalizedData",
|
||||
"DataValidator",
|
||||
"ValidationResult",
|
||||
"DeduplicationService",
|
||||
"DuplicateCheckResult",
|
||||
]
|
||||
@@ -0,0 +1,272 @@
|
||||
"""Data normalizer - Convert different source data to unified format"""
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Dict, Any, Optional
|
||||
from decimal import Decimal
|
||||
|
||||
from ...domain.candidate import (
|
||||
Candidate, CandidateSource, CandidateStatus,
|
||||
SalaryRange, Gender
|
||||
)
|
||||
from ...domain.resume import Resume, ResumeParsed
|
||||
|
||||
|
||||
@dataclass
|
||||
class NormalizedData:
|
||||
"""标准化后的数据"""
|
||||
candidate: Candidate
|
||||
resume: Resume
|
||||
raw_source_data: Optional[Dict[str, Any]] = None
|
||||
|
||||
|
||||
class SourceNormalizer(ABC):
|
||||
"""数据源标准化器基类"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def source_type(self) -> CandidateSource:
|
||||
"""返回处理的数据源类型"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def normalize(self, raw_data: Dict[str, Any]) -> NormalizedData:
|
||||
"""
|
||||
将原始数据标准化为统一格式
|
||||
|
||||
Args:
|
||||
raw_data: 原始API返回数据
|
||||
|
||||
Returns:
|
||||
标准化后的数据
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class BossNormalizer(SourceNormalizer):
|
||||
"""Boss直聘数据标准化器"""
|
||||
|
||||
@property
|
||||
def source_type(self) -> CandidateSource:
|
||||
return CandidateSource.BOSS
|
||||
|
||||
def normalize(self, raw_data: Dict[str, Any]) -> NormalizedData:
|
||||
"""将Boss数据标准化"""
|
||||
# 解析候选人信息
|
||||
candidate = self._parse_candidate(raw_data)
|
||||
|
||||
# 解析简历信息
|
||||
resume = self._parse_resume(raw_data)
|
||||
|
||||
return NormalizedData(
|
||||
candidate=candidate,
|
||||
resume=resume,
|
||||
raw_source_data=raw_data
|
||||
)
|
||||
|
||||
def _parse_candidate(self, data: Dict[str, Any]) -> Candidate:
|
||||
"""解析候选人信息"""
|
||||
# 提取基本信息
|
||||
source_id = data.get('geekId') or data.get('encryptGeekId') or data.get('id', '')
|
||||
name = data.get('name', '')
|
||||
|
||||
# 解析性别
|
||||
gender = self._parse_gender(data.get('gender'))
|
||||
|
||||
# 解析薪资期望
|
||||
salary_range = self._parse_salary(data.get('salary'))
|
||||
|
||||
# 解析工作年限
|
||||
work_years = self._parse_work_years(data.get('workYears'))
|
||||
|
||||
return Candidate(
|
||||
source=CandidateSource.BOSS,
|
||||
source_id=str(source_id),
|
||||
name=name,
|
||||
phone=data.get('phone'),
|
||||
email=data.get('email'),
|
||||
wechat=data.get('wechat'),
|
||||
gender=gender,
|
||||
age=data.get('age'),
|
||||
location=data.get('location') or data.get('cityName'),
|
||||
current_company=data.get('company') or data.get('currentCompany'),
|
||||
current_position=data.get('position') or data.get('currentPosition'),
|
||||
work_years=work_years,
|
||||
education=data.get('education'),
|
||||
school=data.get('school'),
|
||||
salary_expectation=salary_range,
|
||||
status=CandidateStatus.NEW
|
||||
)
|
||||
|
||||
def _parse_resume(self, data: Dict[str, Any]) -> Resume:
|
||||
"""解析简历信息"""
|
||||
# 获取简历文本
|
||||
resume_text = data.get('resumeText', '') or data.get('resumeContent', '')
|
||||
|
||||
# 解析结构化内容
|
||||
parsed_content = self._parse_resume_content(data)
|
||||
|
||||
return Resume(
|
||||
raw_content=resume_text,
|
||||
parsed_content=parsed_content,
|
||||
version=1
|
||||
)
|
||||
|
||||
def _parse_resume_content(self, data: Dict[str, Any]) -> ResumeParsed:
|
||||
"""解析简历内容为结构化数据"""
|
||||
parsed = ResumeParsed()
|
||||
|
||||
# 填充基本信息
|
||||
parsed.name = data.get('name')
|
||||
parsed.phone = data.get('phone')
|
||||
parsed.email = data.get('email')
|
||||
parsed.gender = data.get('gender')
|
||||
parsed.age = data.get('age')
|
||||
parsed.location = data.get('location') or data.get('cityName')
|
||||
parsed.current_company = data.get('company') or data.get('currentCompany')
|
||||
parsed.current_position = data.get('position') or data.get('currentPosition')
|
||||
parsed.work_years = self._parse_work_years_to_float(data.get('workYears'))
|
||||
parsed.education = data.get('education')
|
||||
parsed.school = data.get('school')
|
||||
|
||||
# 提取技能标签
|
||||
skills = data.get('skills', [])
|
||||
if isinstance(skills, str):
|
||||
skills = [s.strip() for s in skills.split(',') if s.strip()]
|
||||
parsed.skills = skills or []
|
||||
|
||||
# 自我评价
|
||||
parsed.self_evaluation = data.get('selfEvaluation') or data.get('selfDescription')
|
||||
|
||||
# 保存原始数据
|
||||
parsed.raw_data = data
|
||||
|
||||
return parsed
|
||||
|
||||
def _parse_gender(self, gender_value: Any) -> Gender:
|
||||
"""解析性别"""
|
||||
if gender_value is None:
|
||||
return Gender.UNKNOWN
|
||||
|
||||
gender_str = str(gender_value).lower().strip()
|
||||
|
||||
if gender_str in ('男', 'male', 'm', '1'):
|
||||
return Gender.MALE
|
||||
elif gender_str in ('女', 'female', 'f', '2'):
|
||||
return Gender.FEMALE
|
||||
|
||||
return Gender.UNKNOWN
|
||||
|
||||
def _parse_salary(self, salary_value: Any) -> Optional[SalaryRange]:
|
||||
"""解析薪资范围"""
|
||||
if not salary_value:
|
||||
return None
|
||||
|
||||
salary_str = str(salary_value)
|
||||
|
||||
# 尝试匹配 "15-25K" 或 "15K-25K" 格式
|
||||
import re
|
||||
match = re.search(r'(\d+)\s*[Kk]?\s*[-~~]\s*(\d+)\s*[Kk]?', salary_str)
|
||||
if match:
|
||||
return SalaryRange(
|
||||
min_salary=int(match.group(1)),
|
||||
max_salary=int(match.group(2))
|
||||
)
|
||||
|
||||
# 尝试匹配单个数字
|
||||
match = re.search(r'(\d+)\s*[Kk]', salary_str)
|
||||
if match:
|
||||
return SalaryRange(min_salary=int(match.group(1)))
|
||||
|
||||
return None
|
||||
|
||||
def _parse_work_years(self, work_years_value: Any) -> Optional[Decimal]:
|
||||
"""解析工作年限为Decimal"""
|
||||
if work_years_value is None:
|
||||
return None
|
||||
|
||||
try:
|
||||
# 如果是数字
|
||||
if isinstance(work_years_value, (int, float)):
|
||||
return Decimal(str(work_years_value))
|
||||
|
||||
# 如果是字符串,提取数字
|
||||
import re
|
||||
match = re.search(r'(\d+(?:\.\d+)?)', str(work_years_value))
|
||||
if match:
|
||||
return Decimal(match.group(1))
|
||||
except (ValueError, TypeError):
|
||||
pass
|
||||
|
||||
return None
|
||||
|
||||
def _parse_work_years_to_float(self, work_years_value: Any) -> Optional[float]:
|
||||
"""解析工作年限为float"""
|
||||
decimal_val = self._parse_work_years(work_years_value)
|
||||
return float(decimal_val) if decimal_val else None
|
||||
|
||||
|
||||
class LiepinNormalizer(SourceNormalizer):
|
||||
"""猎聘数据标准化器(预留)"""
|
||||
|
||||
@property
|
||||
def source_type(self) -> CandidateSource:
|
||||
return CandidateSource.LIEPIN
|
||||
|
||||
def normalize(self, raw_data: Dict[str, Any]) -> NormalizedData:
|
||||
# TODO: 实现猎聘数据标准化
|
||||
raise NotImplementedError("Liepin normalizer not implemented yet")
|
||||
|
||||
|
||||
class DataNormalizer:
|
||||
"""
|
||||
数据标准化器
|
||||
|
||||
将不同渠道的数据统一转换为标准格式
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._normalizers: Dict[CandidateSource, SourceNormalizer] = {}
|
||||
self._register_default_normalizers()
|
||||
|
||||
def _register_default_normalizers(self):
|
||||
"""注册默认的标准化器"""
|
||||
self.register(BossNormalizer())
|
||||
# 猎聘标准化器暂不注册
|
||||
# self.register(LiepinNormalizer())
|
||||
|
||||
def register(self, normalizer: SourceNormalizer) -> None:
|
||||
"""
|
||||
注册数据源标准化器
|
||||
|
||||
Args:
|
||||
normalizer: 标准化器实例
|
||||
"""
|
||||
self._normalizers[normalizer.source_type] = normalizer
|
||||
|
||||
def normalize(
|
||||
self,
|
||||
source: CandidateSource,
|
||||
raw_data: Dict[str, Any]
|
||||
) -> NormalizedData:
|
||||
"""
|
||||
标准化数据
|
||||
|
||||
Args:
|
||||
source: 数据来源
|
||||
raw_data: 原始数据
|
||||
|
||||
Returns:
|
||||
标准化后的数据
|
||||
|
||||
Raises:
|
||||
ValueError: 如果没有对应的标准化器
|
||||
"""
|
||||
normalizer = self._normalizers.get(source)
|
||||
if not normalizer:
|
||||
raise ValueError(f"No normalizer registered for source: {source}")
|
||||
|
||||
return normalizer.normalize(raw_data)
|
||||
|
||||
def has_normalizer(self, source: CandidateSource) -> bool:
|
||||
"""检查是否有对应的标准化器"""
|
||||
return source in self._normalizers
|
||||
@@ -0,0 +1,157 @@
|
||||
"""Data validator - Validate normalized data before ingestion"""
|
||||
from dataclasses import dataclass, field
|
||||
from typing import List, Optional, Dict, Any
|
||||
|
||||
from ...domain.candidate import Candidate
|
||||
from ...domain.resume import Resume
|
||||
from .data_normalizer import NormalizedData
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationError:
|
||||
"""验证错误"""
|
||||
field: str
|
||||
message: str
|
||||
code: str = "invalid"
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
"""验证结果"""
|
||||
is_valid: bool
|
||||
errors: List[ValidationError] = field(default_factory=list)
|
||||
warnings: List[ValidationError] = field(default_factory=list)
|
||||
|
||||
@property
|
||||
def error_messages(self) -> List[str]:
|
||||
"""获取错误消息列表"""
|
||||
return [f"{e.field}: {e.message}" for e in self.errors]
|
||||
|
||||
@property
|
||||
def warning_messages(self) -> List[str]:
|
||||
"""获取警告消息列表"""
|
||||
return [f"{w.field}: {w.message}" for w in self.warnings]
|
||||
|
||||
|
||||
class DataValidator:
|
||||
"""
|
||||
数据验证器
|
||||
|
||||
验证标准化后的数据是否符合入库要求
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._rules: List[callable] = []
|
||||
self._register_default_rules()
|
||||
|
||||
def _register_default_rules(self):
|
||||
"""注册默认验证规则"""
|
||||
self._rules = [
|
||||
self._validate_candidate_name,
|
||||
self._validate_candidate_source,
|
||||
self._validate_candidate_source_id,
|
||||
self._validate_resume_content,
|
||||
]
|
||||
|
||||
def validate(self, data: NormalizedData) -> ValidationResult:
|
||||
"""
|
||||
验证数据
|
||||
|
||||
Args:
|
||||
data: 标准化后的数据
|
||||
|
||||
Returns:
|
||||
验证结果
|
||||
"""
|
||||
errors = []
|
||||
warnings = []
|
||||
|
||||
for rule in self._rules:
|
||||
result = rule(data)
|
||||
if result:
|
||||
if result.code == "error":
|
||||
errors.append(result)
|
||||
else:
|
||||
warnings.append(result)
|
||||
|
||||
return ValidationResult(
|
||||
is_valid=len(errors) == 0,
|
||||
errors=errors,
|
||||
warnings=warnings
|
||||
)
|
||||
|
||||
def _validate_candidate_name(self, data: NormalizedData) -> Optional[ValidationError]:
|
||||
"""验证候选人姓名"""
|
||||
candidate = data.candidate
|
||||
|
||||
if not candidate.name:
|
||||
return ValidationError(
|
||||
field="candidate.name",
|
||||
message="候选人姓名不能为空",
|
||||
code="error"
|
||||
)
|
||||
|
||||
if len(candidate.name) < 2:
|
||||
return ValidationError(
|
||||
field="candidate.name",
|
||||
message=f"候选人姓名过短: {candidate.name}",
|
||||
code="warning"
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def _validate_candidate_source(self, data: NormalizedData) -> Optional[ValidationError]:
|
||||
"""验证候选人来源"""
|
||||
candidate = data.candidate
|
||||
|
||||
if not candidate.source:
|
||||
return ValidationError(
|
||||
field="candidate.source",
|
||||
message="候选人来源不能为空",
|
||||
code="error"
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def _validate_candidate_source_id(self, data: NormalizedData) -> Optional[ValidationError]:
|
||||
"""验证候选人来源ID"""
|
||||
candidate = data.candidate
|
||||
|
||||
if not candidate.source_id:
|
||||
return ValidationError(
|
||||
field="candidate.source_id",
|
||||
message="候选人来源ID不能为空",
|
||||
code="error"
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def _validate_resume_content(self, data: NormalizedData) -> Optional[ValidationError]:
|
||||
"""验证简历内容"""
|
||||
resume = data.resume
|
||||
|
||||
if not resume.raw_content:
|
||||
return ValidationError(
|
||||
field="resume.raw_content",
|
||||
message="简历内容为空",
|
||||
code="warning"
|
||||
)
|
||||
|
||||
# 检查简历内容长度
|
||||
if len(resume.raw_content) < 50:
|
||||
return ValidationError(
|
||||
field="resume.raw_content",
|
||||
message=f"简历内容过短 ({len(resume.raw_content)} 字符)",
|
||||
code="warning"
|
||||
)
|
||||
|
||||
return None
|
||||
|
||||
def add_rule(self, rule: callable) -> None:
|
||||
"""
|
||||
添加自定义验证规则
|
||||
|
||||
Args:
|
||||
rule: 验证规则函数,接收 NormalizedData 返回 ValidationError 或 None
|
||||
"""
|
||||
self._rules.append(rule)
|
||||
@@ -0,0 +1,266 @@
|
||||
"""Deduplication service - Check and handle duplicate candidates"""
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, List, Dict, Any
|
||||
from datetime import datetime
|
||||
|
||||
from ...domain.candidate import Candidate, CandidateSource
|
||||
from .data_normalizer import NormalizedData
|
||||
|
||||
|
||||
@dataclass
|
||||
class DuplicateCheckResult:
|
||||
"""去重检查结果"""
|
||||
is_duplicate: bool
|
||||
existing_candidate_id: Optional[str] = None
|
||||
similarity: float = 0.0 # 相似度 0-1
|
||||
duplicate_type: Optional[str] = None # 'exact', 'fuzzy', 'phone', 'email'
|
||||
message: str = ""
|
||||
|
||||
|
||||
class DeduplicationService:
|
||||
"""
|
||||
去重服务
|
||||
|
||||
基于多维度检查候选人是否已存在:
|
||||
1. 精确匹配:source + source_id
|
||||
2. 手机号匹配
|
||||
3. 邮箱匹配
|
||||
4. 模糊匹配:姓名 + 公司 + 职位
|
||||
"""
|
||||
|
||||
def __init__(self, candidate_repository=None):
|
||||
"""
|
||||
初始化去重服务
|
||||
|
||||
Args:
|
||||
candidate_repository: 候选人数据访问接口
|
||||
"""
|
||||
self.candidate_repo = candidate_repository
|
||||
|
||||
def check(self, data: NormalizedData) -> DuplicateCheckResult:
|
||||
"""
|
||||
检查是否为重复候选人
|
||||
|
||||
Args:
|
||||
data: 标准化后的数据
|
||||
|
||||
Returns:
|
||||
去重检查结果
|
||||
"""
|
||||
candidate = data.candidate
|
||||
|
||||
# 1. 精确匹配:source + source_id
|
||||
exact_match = self._check_exact_match(candidate)
|
||||
if exact_match.is_duplicate:
|
||||
return exact_match
|
||||
|
||||
# 2. 手机号匹配
|
||||
if candidate.phone:
|
||||
phone_match = self._check_phone_match(candidate)
|
||||
if phone_match.is_duplicate:
|
||||
return phone_match
|
||||
|
||||
# 3. 邮箱匹配
|
||||
if candidate.email:
|
||||
email_match = self._check_email_match(candidate)
|
||||
if email_match.is_duplicate:
|
||||
return email_match
|
||||
|
||||
# 4. 模糊匹配
|
||||
fuzzy_match = self._check_fuzzy_match(candidate)
|
||||
if fuzzy_match.is_duplicate:
|
||||
return fuzzy_match
|
||||
|
||||
return DuplicateCheckResult(is_duplicate=False)
|
||||
|
||||
def _check_exact_match(self, candidate: Candidate) -> DuplicateCheckResult:
|
||||
"""精确匹配检查"""
|
||||
if not self.candidate_repo:
|
||||
return DuplicateCheckResult(is_duplicate=False)
|
||||
|
||||
existing = self.candidate_repo.find_by_source_and_source_id(
|
||||
candidate.source,
|
||||
candidate.source_id
|
||||
)
|
||||
|
||||
if existing:
|
||||
return DuplicateCheckResult(
|
||||
is_duplicate=True,
|
||||
existing_candidate_id=existing.id,
|
||||
similarity=1.0,
|
||||
duplicate_type='exact',
|
||||
message=f"已存在相同来源的候选人: {existing.name}"
|
||||
)
|
||||
|
||||
return DuplicateCheckResult(is_duplicate=False)
|
||||
|
||||
def _check_phone_match(self, candidate: Candidate) -> DuplicateCheckResult:
|
||||
"""手机号匹配检查"""
|
||||
if not self.candidate_repo or not candidate.phone:
|
||||
return DuplicateCheckResult(is_duplicate=False)
|
||||
|
||||
existing = self.candidate_repo.find_by_phone(candidate.phone)
|
||||
|
||||
if existing:
|
||||
return DuplicateCheckResult(
|
||||
is_duplicate=True,
|
||||
existing_candidate_id=existing.id,
|
||||
similarity=0.9,
|
||||
duplicate_type='phone',
|
||||
message=f"已存在相同手机号的候选人: {existing.name}"
|
||||
)
|
||||
|
||||
return DuplicateCheckResult(is_duplicate=False)
|
||||
|
||||
def _check_email_match(self, candidate: Candidate) -> DuplicateCheckResult:
|
||||
"""邮箱匹配检查"""
|
||||
if not self.candidate_repo or not candidate.email:
|
||||
return DuplicateCheckResult(is_duplicate=False)
|
||||
|
||||
existing = self.candidate_repo.find_by_email(candidate.email)
|
||||
|
||||
if existing:
|
||||
return DuplicateCheckResult(
|
||||
is_duplicate=True,
|
||||
existing_candidate_id=existing.id,
|
||||
similarity=0.9,
|
||||
duplicate_type='email',
|
||||
message=f"已存在相同邮箱的候选人: {existing.name}"
|
||||
)
|
||||
|
||||
return DuplicateCheckResult(is_duplicate=False)
|
||||
|
||||
def _check_fuzzy_match(self, candidate: Candidate) -> DuplicateCheckResult:
|
||||
"""模糊匹配检查"""
|
||||
if not self.candidate_repo:
|
||||
return DuplicateCheckResult(is_duplicate=False)
|
||||
|
||||
# 基于姓名、公司、职位的模糊匹配
|
||||
candidates = self.candidate_repo.find_by_name(candidate.name)
|
||||
|
||||
for existing in candidates:
|
||||
similarity = self._calculate_similarity(candidate, existing)
|
||||
|
||||
if similarity >= 0.8: # 相似度阈值
|
||||
return DuplicateCheckResult(
|
||||
is_duplicate=True,
|
||||
existing_candidate_id=existing.id,
|
||||
similarity=similarity,
|
||||
duplicate_type='fuzzy',
|
||||
message=f"发现高度相似的候选人: {existing.name} (相似度: {similarity:.2f})"
|
||||
)
|
||||
|
||||
return DuplicateCheckResult(is_duplicate=False)
|
||||
|
||||
def _calculate_similarity(self, c1: Candidate, c2: Candidate) -> float:
|
||||
"""
|
||||
计算两个候选人的相似度
|
||||
|
||||
基于以下维度:
|
||||
- 姓名 (权重 0.3)
|
||||
- 公司 (权重 0.3)
|
||||
- 职位 (权重 0.2)
|
||||
- 年龄 (权重 0.1)
|
||||
- 学历 (权重 0.1)
|
||||
"""
|
||||
scores = []
|
||||
weights = []
|
||||
|
||||
# 姓名匹配
|
||||
if c1.name and c2.name:
|
||||
name_sim = 1.0 if c1.name == c2.name else 0.0
|
||||
scores.append(name_sim)
|
||||
weights.append(0.3)
|
||||
|
||||
# 公司匹配
|
||||
if c1.current_company and c2.current_company:
|
||||
company_sim = 1.0 if c1.current_company == c2.current_company else 0.0
|
||||
scores.append(company_sim)
|
||||
weights.append(0.3)
|
||||
|
||||
# 职位匹配
|
||||
if c1.current_position and c2.current_position:
|
||||
position_sim = 1.0 if c1.current_position == c2.current_position else 0.0
|
||||
scores.append(position_sim)
|
||||
weights.append(0.2)
|
||||
|
||||
# 年龄匹配
|
||||
if c1.age and c2.age:
|
||||
age_sim = 1.0 if c1.age == c2.age else 0.0
|
||||
scores.append(age_sim)
|
||||
weights.append(0.1)
|
||||
|
||||
# 学历匹配
|
||||
if c1.education and c2.education:
|
||||
edu_sim = 1.0 if c1.education == c2.education else 0.0
|
||||
scores.append(edu_sim)
|
||||
weights.append(0.1)
|
||||
|
||||
if not scores:
|
||||
return 0.0
|
||||
|
||||
# 加权平均
|
||||
total_weight = sum(weights)
|
||||
weighted_sum = sum(s * w for s, w in zip(scores, weights))
|
||||
return weighted_sum / total_weight if total_weight > 0 else 0.0
|
||||
|
||||
|
||||
# 内存中的候选人存储(用于测试)
|
||||
class InMemoryCandidateRepository:
|
||||
"""内存候选人存储(测试用)"""
|
||||
|
||||
def __init__(self):
|
||||
self._candidates: Dict[str, Candidate] = {}
|
||||
self._by_source: Dict[tuple, str] = {} # (source, source_id) -> id
|
||||
self._by_phone: Dict[str, str] = {}
|
||||
self._by_email: Dict[str, str] = {}
|
||||
self._by_name: Dict[str, List[str]] = {}
|
||||
|
||||
def save(self, candidate: Candidate) -> Candidate:
|
||||
"""保存候选人"""
|
||||
if not candidate.id:
|
||||
import uuid
|
||||
candidate.id = str(uuid.uuid4())
|
||||
|
||||
self._candidates[candidate.id] = candidate
|
||||
|
||||
# 更新索引
|
||||
self._by_source[(candidate.source, candidate.source_id)] = candidate.id
|
||||
|
||||
if candidate.phone:
|
||||
self._by_phone[candidate.phone] = candidate.id
|
||||
|
||||
if candidate.email:
|
||||
self._by_email[candidate.email] = candidate.id
|
||||
|
||||
if candidate.name:
|
||||
if candidate.name not in self._by_name:
|
||||
self._by_name[candidate.name] = []
|
||||
if candidate.id not in self._by_name[candidate.name]:
|
||||
self._by_name[candidate.name].append(candidate.id)
|
||||
|
||||
return candidate
|
||||
|
||||
def find_by_source_and_source_id(
|
||||
self,
|
||||
source: CandidateSource,
|
||||
source_id: str
|
||||
) -> Optional[Candidate]:
|
||||
"""根据来源和来源ID查找"""
|
||||
candidate_id = self._by_source.get((source, source_id))
|
||||
return self._candidates.get(candidate_id) if candidate_id else None
|
||||
|
||||
def find_by_phone(self, phone: str) -> Optional[Candidate]:
|
||||
"""根据手机号查找"""
|
||||
candidate_id = self._by_phone.get(phone)
|
||||
return self._candidates.get(candidate_id) if candidate_id else None
|
||||
|
||||
def find_by_email(self, email: str) -> Optional[Candidate]:
|
||||
"""根据邮箱查找"""
|
||||
candidate_id = self._by_email.get(email)
|
||||
return self._candidates.get(candidate_id) if candidate_id else None
|
||||
|
||||
def find_by_name(self, name: str) -> List[Candidate]:
|
||||
"""根据姓名查找"""
|
||||
candidate_ids = self._by_name.get(name, [])
|
||||
return [self._candidates[cid] for cid in candidate_ids if cid in self._candidates]
|
||||
@@ -0,0 +1,234 @@
|
||||
"""Unified ingestion service - Single entry point for all data sources"""
|
||||
from dataclasses import dataclass
|
||||
from typing import Optional, Dict, Any, Callable
|
||||
from datetime import datetime
|
||||
import uuid
|
||||
|
||||
from ...domain.candidate import Candidate, CandidateSource, CandidateStatus
|
||||
from ...domain.resume import Resume
|
||||
from .data_normalizer import DataNormalizer, NormalizedData
|
||||
from .data_validator import DataValidator, ValidationResult
|
||||
from .deduplication_service import DeduplicationService, DuplicateCheckResult
|
||||
|
||||
|
||||
@dataclass
|
||||
class IngestionResult:
|
||||
"""入库结果"""
|
||||
success: bool
|
||||
candidate_id: Optional[str] = None
|
||||
message: str = ""
|
||||
errors: list = None
|
||||
is_duplicate: bool = False
|
||||
existing_candidate_id: Optional[str] = None
|
||||
|
||||
@classmethod
|
||||
def success_result(cls, candidate_id: str, message: str = "") -> "IngestionResult":
|
||||
"""创建成功结果"""
|
||||
return cls(
|
||||
success=True,
|
||||
candidate_id=candidate_id,
|
||||
message=message or "入库成功"
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def failed_result(cls, errors: list, message: str = "") -> "IngestionResult":
|
||||
"""创建失败结果"""
|
||||
return cls(
|
||||
success=False,
|
||||
message=message or "入库失败",
|
||||
errors=errors or []
|
||||
)
|
||||
|
||||
@classmethod
|
||||
def duplicate_result(
|
||||
cls,
|
||||
existing_id: str,
|
||||
message: str = ""
|
||||
) -> "IngestionResult":
|
||||
"""创建重复结果"""
|
||||
return cls(
|
||||
success=True, # 重复不算失败
|
||||
is_duplicate=True,
|
||||
existing_candidate_id=existing_id,
|
||||
message=message or "候选人已存在"
|
||||
)
|
||||
|
||||
|
||||
class UnifiedIngestionService:
|
||||
"""
|
||||
统一数据入库服务
|
||||
|
||||
所有渠道数据的唯一入口,负责:
|
||||
1. 数据标准化
|
||||
2. 数据验证
|
||||
3. 去重检查
|
||||
4. 数据入库
|
||||
5. 触发后续处理(分析、通知等)
|
||||
|
||||
Usage:
|
||||
service = UnifiedIngestionService(
|
||||
candidate_repo=candidate_repo,
|
||||
resume_repo=resume_repo,
|
||||
normalizer=DataNormalizer(),
|
||||
validator=DataValidator(),
|
||||
deduplicator=DeduplicationService()
|
||||
)
|
||||
|
||||
result = service.ingest(
|
||||
source=CandidateSource.BOSS,
|
||||
raw_data={...}
|
||||
)
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
candidate_repo=None,
|
||||
resume_repo=None,
|
||||
normalizer: Optional[DataNormalizer] = None,
|
||||
validator: Optional[DataValidator] = None,
|
||||
deduplicator: Optional[DeduplicationService] = None,
|
||||
on_analysis_triggered: Optional[Callable[[str], None]] = None
|
||||
):
|
||||
"""
|
||||
初始化统一入库服务
|
||||
|
||||
Args:
|
||||
candidate_repo: 候选人数据访问接口
|
||||
resume_repo: 简历数据访问接口
|
||||
normalizer: 数据标准化器
|
||||
validator: 数据验证器
|
||||
deduplicator: 去重服务
|
||||
on_analysis_triggered: 分析触发回调函数
|
||||
"""
|
||||
self.candidate_repo = candidate_repo
|
||||
self.resume_repo = resume_repo
|
||||
self.normalizer = normalizer or DataNormalizer()
|
||||
self.validator = validator or DataValidator()
|
||||
self.deduplicator = deduplicator
|
||||
self.on_analysis_triggered = on_analysis_triggered
|
||||
|
||||
def ingest(
|
||||
self,
|
||||
source: CandidateSource,
|
||||
raw_data: Dict[str, Any]
|
||||
) -> IngestionResult:
|
||||
"""
|
||||
统一入库入口
|
||||
|
||||
流程:
|
||||
1. 数据标准化
|
||||
2. 数据验证
|
||||
3. 去重检查
|
||||
4. 保存候选人
|
||||
5. 保存简历内容
|
||||
6. 触发分析任务
|
||||
|
||||
Args:
|
||||
source: 数据来源
|
||||
raw_data: 原始数据
|
||||
|
||||
Returns:
|
||||
入库结果
|
||||
"""
|
||||
try:
|
||||
# 1. 数据标准化
|
||||
normalized = self._normalize(source, raw_data)
|
||||
|
||||
# 2. 数据验证
|
||||
validation_result = self._validate(normalized)
|
||||
if not validation_result.is_valid:
|
||||
return IngestionResult.failed_result(
|
||||
errors=validation_result.error_messages,
|
||||
message="数据验证失败"
|
||||
)
|
||||
|
||||
# 3. 去重检查
|
||||
if self.deduplicator:
|
||||
duplicate_check = self.deduplicator.check(normalized)
|
||||
if duplicate_check.is_duplicate:
|
||||
return self._handle_duplicate(normalized, duplicate_check)
|
||||
|
||||
# 4. 生成ID
|
||||
candidate_id = self._generate_id()
|
||||
normalized.candidate.id = candidate_id
|
||||
normalized.resume.candidate_id = candidate_id
|
||||
|
||||
# 5. 保存候选人
|
||||
if self.candidate_repo:
|
||||
self.candidate_repo.save(normalized.candidate)
|
||||
|
||||
# 6. 保存简历
|
||||
if self.resume_repo:
|
||||
self.resume_repo.save(normalized.resume)
|
||||
|
||||
# 7. 触发分析
|
||||
self._trigger_analysis(candidate_id)
|
||||
|
||||
return IngestionResult.success_result(
|
||||
candidate_id=candidate_id,
|
||||
message=f"候选人 {normalized.candidate.name} 入库成功"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return IngestionResult.failed_result(
|
||||
errors=[str(e)],
|
||||
message=f"入库异常: {str(e)}"
|
||||
)
|
||||
|
||||
def _normalize(
|
||||
self,
|
||||
source: CandidateSource,
|
||||
raw_data: Dict[str, Any]
|
||||
) -> NormalizedData:
|
||||
"""数据标准化"""
|
||||
return self.normalizer.normalize(source, raw_data)
|
||||
|
||||
def _validate(self, normalized: NormalizedData) -> ValidationResult:
|
||||
"""数据验证"""
|
||||
return self.validator.validate(normalized)
|
||||
|
||||
def _handle_duplicate(
|
||||
self,
|
||||
normalized: NormalizedData,
|
||||
duplicate_check: DuplicateCheckResult
|
||||
) -> IngestionResult:
|
||||
"""处理重复数据"""
|
||||
# 可以选择更新现有记录或跳过
|
||||
# 这里选择返回重复信息,不更新
|
||||
return IngestionResult.duplicate_result(
|
||||
existing_id=duplicate_check.existing_candidate_id,
|
||||
message=duplicate_check.message
|
||||
)
|
||||
|
||||
def _generate_id(self) -> str:
|
||||
"""生成唯一ID"""
|
||||
return str(uuid.uuid4())
|
||||
|
||||
def _trigger_analysis(self, candidate_id: str) -> None:
|
||||
"""触发分析任务"""
|
||||
if self.on_analysis_triggered:
|
||||
try:
|
||||
self.on_analysis_triggered(candidate_id)
|
||||
except Exception as e:
|
||||
print(f"Failed to trigger analysis for {candidate_id}: {e}")
|
||||
|
||||
def batch_ingest(
|
||||
self,
|
||||
source: CandidateSource,
|
||||
raw_data_list: list
|
||||
) -> list:
|
||||
"""
|
||||
批量入库
|
||||
|
||||
Args:
|
||||
source: 数据来源
|
||||
raw_data_list: 原始数据列表
|
||||
|
||||
Returns:
|
||||
入库结果列表
|
||||
"""
|
||||
results = []
|
||||
for raw_data in raw_data_list:
|
||||
result = self.ingest(source, raw_data)
|
||||
results.append(result)
|
||||
return results
|
||||
@@ -0,0 +1,21 @@
|
||||
"""Notification service layer - Multi-channel notification"""
|
||||
|
||||
from .notification_service import NotificationService, NotificationResult
|
||||
from .message_template import MessageTemplate, MessageTemplateEngine
|
||||
from .channels.base_channel import NotificationChannel, NotificationMessage, SendResult
|
||||
from .channels.wechat_work_channel import WeChatWorkChannel
|
||||
from .channels.dingtalk_channel import DingTalkChannel
|
||||
from .channels.email_channel import EmailChannel
|
||||
|
||||
__all__ = [
|
||||
"NotificationService",
|
||||
"NotificationResult",
|
||||
"MessageTemplate",
|
||||
"MessageTemplateEngine",
|
||||
"NotificationChannel",
|
||||
"NotificationMessage",
|
||||
"SendResult",
|
||||
"WeChatWorkChannel",
|
||||
"DingTalkChannel",
|
||||
"EmailChannel",
|
||||
]
|
||||
@@ -0,0 +1,15 @@
|
||||
"""Notification channels"""
|
||||
|
||||
from .base_channel import NotificationChannel, NotificationMessage, SendResult
|
||||
from .wechat_work_channel import WeChatWorkChannel
|
||||
from .dingtalk_channel import DingTalkChannel
|
||||
from .email_channel import EmailChannel
|
||||
|
||||
__all__ = [
|
||||
"NotificationChannel",
|
||||
"NotificationMessage",
|
||||
"SendResult",
|
||||
"WeChatWorkChannel",
|
||||
"DingTalkChannel",
|
||||
"EmailChannel",
|
||||
]
|
||||
@@ -0,0 +1,89 @@
|
||||
"""Base notification channel"""
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass, field
|
||||
from typing import Optional, Dict, Any, List
|
||||
from datetime import datetime
|
||||
|
||||
from ....domain.candidate import Candidate
|
||||
from ....domain.evaluation import Evaluation
|
||||
from ....domain.enums import ChannelType
|
||||
|
||||
|
||||
@dataclass
|
||||
class NotificationMessage:
|
||||
"""通知消息"""
|
||||
title: str = ""
|
||||
content: str = ""
|
||||
candidate: Optional[Candidate] = None
|
||||
evaluation: Optional[Evaluation] = None
|
||||
extra_data: Dict[str, Any] = field(default_factory=dict)
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
|
||||
def to_dict(self) -> Dict[str, Any]:
|
||||
"""转换为字典"""
|
||||
return {
|
||||
"title": self.title,
|
||||
"content": self.content,
|
||||
"candidate": self.candidate.to_dict() if self.candidate else None,
|
||||
"evaluation": self.evaluation.to_dict() if self.evaluation else None,
|
||||
"extra_data": self.extra_data,
|
||||
"timestamp": self.timestamp.isoformat()
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class SendResult:
|
||||
"""发送结果"""
|
||||
success: bool
|
||||
message_id: Optional[str] = None
|
||||
error_message: Optional[str] = None
|
||||
response_data: Optional[Dict[str, Any]] = None
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
|
||||
|
||||
class NotificationChannel(ABC):
|
||||
"""
|
||||
通知渠道抽象基类
|
||||
|
||||
所有通知渠道需要实现此接口
|
||||
"""
|
||||
|
||||
@property
|
||||
@abstractmethod
|
||||
def channel_type(self) -> ChannelType:
|
||||
"""返回渠道类型"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
async def send(self, message: NotificationMessage) -> SendResult:
|
||||
"""
|
||||
发送消息
|
||||
|
||||
Args:
|
||||
message: 通知消息
|
||||
|
||||
Returns:
|
||||
发送结果
|
||||
"""
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def is_configured(self) -> bool:
|
||||
"""检查渠道是否已配置"""
|
||||
pass
|
||||
|
||||
def format_message(self, message: NotificationMessage) -> Dict[str, Any]:
|
||||
"""
|
||||
格式化消息为渠道特定格式
|
||||
|
||||
Args:
|
||||
message: 通知消息
|
||||
|
||||
Returns:
|
||||
格式化后的消息字典
|
||||
"""
|
||||
# 默认实现,子类可以覆盖
|
||||
return {
|
||||
"title": message.title,
|
||||
"content": message.content
|
||||
}
|
||||
@@ -0,0 +1,183 @@
|
||||
"""DingTalk notification channel"""
|
||||
from typing import Optional, Dict, Any
|
||||
import json
|
||||
import hmac
|
||||
import hashlib
|
||||
import base64
|
||||
import time
|
||||
|
||||
from .base_channel import NotificationChannel, NotificationMessage, SendResult
|
||||
from ....domain.enums import ChannelType
|
||||
|
||||
|
||||
class DingTalkChannel(NotificationChannel):
|
||||
"""
|
||||
钉钉通知渠道
|
||||
|
||||
通过钉钉机器人 Webhook 发送消息
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
webhook_url: str,
|
||||
secret: Optional[str] = None,
|
||||
at_mobiles: Optional[list] = None,
|
||||
is_at_all: bool = False
|
||||
):
|
||||
"""
|
||||
初始化钉钉渠道
|
||||
|
||||
Args:
|
||||
webhook_url: 钉钉机器人 Webhook 地址
|
||||
secret: 安全设置中的加签密钥
|
||||
at_mobiles: @提醒的手机号列表
|
||||
is_at_all: 是否@所有人
|
||||
"""
|
||||
self.webhook_url = webhook_url
|
||||
self.secret = secret
|
||||
self.at_mobiles = at_mobiles or []
|
||||
self.is_at_all = is_at_all
|
||||
|
||||
@property
|
||||
def channel_type(self) -> ChannelType:
|
||||
return ChannelType.DINGTALK
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
"""检查是否已配置"""
|
||||
return bool(self.webhook_url)
|
||||
|
||||
async def send(self, message: NotificationMessage) -> SendResult:
|
||||
"""发送钉钉消息"""
|
||||
if not self.is_configured():
|
||||
return SendResult(
|
||||
success=False,
|
||||
error_message="Webhook URL not configured"
|
||||
)
|
||||
|
||||
try:
|
||||
# 构建带签名的 URL
|
||||
url = self._build_signed_url()
|
||||
|
||||
# 构建消息体
|
||||
payload = self._build_payload(message)
|
||||
|
||||
# 发送请求
|
||||
import aiohttp
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
url,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=30)
|
||||
) as response:
|
||||
result = await response.json()
|
||||
|
||||
if result.get("errcode") == 0:
|
||||
return SendResult(
|
||||
success=True,
|
||||
response_data=result
|
||||
)
|
||||
else:
|
||||
return SendResult(
|
||||
success=False,
|
||||
error_message=f"DingTalk API error: {result.get('errmsg')}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return SendResult(
|
||||
success=False,
|
||||
error_message=f"Failed to send DingTalk message: {str(e)}"
|
||||
)
|
||||
|
||||
def _build_signed_url(self) -> str:
|
||||
"""构建带签名的 URL"""
|
||||
if not self.secret:
|
||||
return self.webhook_url
|
||||
|
||||
timestamp = str(round(time.time() * 1000))
|
||||
string_to_sign = f"{timestamp}\n{self.secret}"
|
||||
|
||||
hmac_code = hmac.new(
|
||||
self.secret.encode('utf-8'),
|
||||
string_to_sign.encode('utf-8'),
|
||||
digestmod=hashlib.sha256
|
||||
).digest()
|
||||
|
||||
sign = base64.b64encode(hmac_code).decode('utf-8')
|
||||
|
||||
return f"{self.webhook_url}×tamp={timestamp}&sign={sign}"
|
||||
|
||||
def _build_payload(self, message: NotificationMessage) -> Dict[str, Any]:
|
||||
"""构建钉钉消息体"""
|
||||
# 使用 markdown 格式
|
||||
return {
|
||||
"msgtype": "markdown",
|
||||
"markdown": {
|
||||
"title": message.title,
|
||||
"text": self._format_content(message)
|
||||
},
|
||||
"at": {
|
||||
"atMobiles": self.at_mobiles,
|
||||
"isAtAll": self.is_at_all
|
||||
}
|
||||
}
|
||||
|
||||
def _format_content(self, message: NotificationMessage) -> str:
|
||||
"""格式化消息内容"""
|
||||
lines = []
|
||||
|
||||
# 标题
|
||||
if message.title:
|
||||
lines.append(f"### {message.title}")
|
||||
|
||||
# 内容
|
||||
if message.content:
|
||||
lines.append(message.content)
|
||||
|
||||
# 候选人信息
|
||||
if message.candidate:
|
||||
candidate = message.candidate
|
||||
lines.append("\n**候选人信息:**")
|
||||
lines.append(f"- 姓名:{candidate.name}")
|
||||
if candidate.age:
|
||||
lines.append(f"- 年龄:{candidate.age}岁")
|
||||
if candidate.work_years:
|
||||
lines.append(f"- 工作年限:{candidate.work_years}年")
|
||||
if candidate.current_company:
|
||||
lines.append(f"- 当前公司:{candidate.current_company}")
|
||||
if candidate.current_position:
|
||||
lines.append(f"- 当前职位:{candidate.current_position}")
|
||||
if candidate.phone:
|
||||
lines.append(f"- 联系方式:{candidate.phone}")
|
||||
|
||||
# 评价信息
|
||||
if message.evaluation:
|
||||
evaluation = message.evaluation
|
||||
lines.append("\n**AI 评价:**")
|
||||
lines.append(f"- 综合评分:**{evaluation.overall_score}/100**")
|
||||
if evaluation.recommendation:
|
||||
lines.append(f"- 推荐意见:{self._format_recommendation(evaluation.recommendation.value)}")
|
||||
if evaluation.summary:
|
||||
lines.append(f"- 评价摘要:{evaluation.summary}")
|
||||
|
||||
if evaluation.strengths:
|
||||
lines.append(f"- 优势:{', '.join(evaluation.strengths[:3])}")
|
||||
|
||||
# @提醒
|
||||
if self.at_mobiles:
|
||||
for mobile in self.at_mobiles:
|
||||
lines.append(f"@{mobile}")
|
||||
|
||||
if self.is_at_all:
|
||||
lines.append("@所有人")
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
def _format_recommendation(self, value: str) -> str:
|
||||
"""格式化推荐意见"""
|
||||
mapping = {
|
||||
"strong_recommend": "**强烈推荐**",
|
||||
"recommend": "**推荐**",
|
||||
"consider": "**考虑**",
|
||||
"not_recommend": "**不推荐**"
|
||||
}
|
||||
return mapping.get(value, value)
|
||||
@@ -0,0 +1,226 @@
|
||||
"""Email notification channel"""
|
||||
from typing import Optional, Dict, Any, List
|
||||
from email.mime.text import MIMEText
|
||||
from email.mime.multipart import MIMEMultipart
|
||||
|
||||
from .base_channel import NotificationChannel, NotificationMessage, SendResult
|
||||
from ....domain.enums import ChannelType
|
||||
|
||||
|
||||
class EmailChannel(NotificationChannel):
|
||||
"""
|
||||
邮件通知渠道
|
||||
|
||||
通过 SMTP 发送邮件通知
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
smtp_host: str,
|
||||
smtp_port: int,
|
||||
username: str,
|
||||
password: str,
|
||||
from_addr: str,
|
||||
to_addrs: List[str],
|
||||
use_tls: bool = True
|
||||
):
|
||||
"""
|
||||
初始化邮件渠道
|
||||
|
||||
Args:
|
||||
smtp_host: SMTP 服务器地址
|
||||
smtp_port: SMTP 端口
|
||||
username: 用户名
|
||||
password: 密码
|
||||
from_addr: 发件人地址
|
||||
to_addrs: 收件人地址列表
|
||||
use_tls: 是否使用 TLS
|
||||
"""
|
||||
self.smtp_host = smtp_host
|
||||
self.smtp_port = smtp_port
|
||||
self.username = username
|
||||
self.password = password
|
||||
self.from_addr = from_addr
|
||||
self.to_addrs = to_addrs
|
||||
self.use_tls = use_tls
|
||||
|
||||
@property
|
||||
def channel_type(self) -> ChannelType:
|
||||
return ChannelType.EMAIL
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
"""检查是否已配置"""
|
||||
return all([
|
||||
self.smtp_host,
|
||||
self.smtp_port,
|
||||
self.username,
|
||||
self.password,
|
||||
self.from_addr,
|
||||
self.to_addrs
|
||||
])
|
||||
|
||||
async def send(self, message: NotificationMessage) -> SendResult:
|
||||
"""发送邮件"""
|
||||
if not self.is_configured():
|
||||
return SendResult(
|
||||
success=False,
|
||||
error_message="Email not configured"
|
||||
)
|
||||
|
||||
try:
|
||||
# 构建邮件
|
||||
msg = self._build_message(message)
|
||||
|
||||
# 发送邮件(使用线程池避免阻塞)
|
||||
import asyncio
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, self._send_sync, msg)
|
||||
|
||||
return SendResult(success=True)
|
||||
|
||||
except Exception as e:
|
||||
return SendResult(
|
||||
success=False,
|
||||
error_message=f"Failed to send email: {str(e)}"
|
||||
)
|
||||
|
||||
def _build_message(self, message: NotificationMessage) -> MIMEMultipart:
|
||||
"""构建邮件消息"""
|
||||
msg = MIMEMultipart('alternative')
|
||||
msg['Subject'] = message.title or "候选人推荐通知"
|
||||
msg['From'] = self.from_addr
|
||||
msg['To'] = ', '.join(self.to_addrs)
|
||||
|
||||
# 纯文本内容
|
||||
text_content = self._format_text_content(message)
|
||||
msg.attach(MIMEText(text_content, 'plain', 'utf-8'))
|
||||
|
||||
# HTML 内容
|
||||
html_content = self._format_html_content(message)
|
||||
msg.attach(MIMEText(html_content, 'html', 'utf-8'))
|
||||
|
||||
return msg
|
||||
|
||||
def _send_sync(self, msg: MIMEMultipart):
|
||||
"""同步发送邮件"""
|
||||
import smtplib
|
||||
|
||||
with smtplib.SMTP(self.smtp_host, self.smtp_port) as server:
|
||||
if self.use_tls:
|
||||
server.starttls()
|
||||
server.login(self.username, self.password)
|
||||
server.send_message(msg)
|
||||
|
||||
def _format_text_content(self, message: NotificationMessage) -> str:
|
||||
"""格式化纯文本内容"""
|
||||
lines = []
|
||||
|
||||
if message.title:
|
||||
lines.append(message.title)
|
||||
lines.append("=" * len(message.title))
|
||||
|
||||
if message.content:
|
||||
lines.append(message.content)
|
||||
|
||||
if message.candidate:
|
||||
candidate = message.candidate
|
||||
lines.append("\n【候选人信息】")
|
||||
lines.append(f"姓名:{candidate.name}")
|
||||
if candidate.age:
|
||||
lines.append(f"年龄:{candidate.age}岁")
|
||||
if candidate.work_years:
|
||||
lines.append(f"工作年限:{candidate.work_years}年")
|
||||
if candidate.current_company:
|
||||
lines.append(f"当前公司:{candidate.current_company}")
|
||||
if candidate.current_position:
|
||||
lines.append(f"当前职位:{candidate.current_position}")
|
||||
if candidate.phone:
|
||||
lines.append(f"联系方式:{candidate.phone}")
|
||||
if candidate.email:
|
||||
lines.append(f"邮箱:{candidate.email}")
|
||||
|
||||
if message.evaluation:
|
||||
evaluation = message.evaluation
|
||||
lines.append("\n【AI 评价】")
|
||||
lines.append(f"综合评分:{evaluation.overall_score}/100")
|
||||
if evaluation.recommendation:
|
||||
lines.append(f"推荐意见:{self._format_recommendation(evaluation.recommendation.value)}")
|
||||
if evaluation.summary:
|
||||
lines.append(f"评价摘要:{evaluation.summary}")
|
||||
if evaluation.strengths:
|
||||
lines.append(f"优势:{', '.join(evaluation.strengths)}")
|
||||
if evaluation.weaknesses:
|
||||
lines.append(f"不足:{', '.join(evaluation.weaknesses)}")
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
def _format_html_content(self, message: NotificationMessage) -> str:
|
||||
"""格式化 HTML 内容"""
|
||||
html_parts = []
|
||||
|
||||
html_parts.append("<html><body style='font-family: Arial, sans-serif;'>")
|
||||
|
||||
if message.title:
|
||||
html_parts.append(f"<h2>{message.title}</h2>")
|
||||
|
||||
if message.content:
|
||||
html_parts.append(f"<p>{message.content}</p>")
|
||||
|
||||
if message.candidate:
|
||||
candidate = message.candidate
|
||||
html_parts.append("<h3>候选人信息</h3>")
|
||||
html_parts.append("<ul>")
|
||||
html_parts.append(f"<li><strong>姓名:</strong>{candidate.name}</li>")
|
||||
if candidate.age:
|
||||
html_parts.append(f"<li><strong>年龄:</strong>{candidate.age}岁</li>")
|
||||
if candidate.work_years:
|
||||
html_parts.append(f"<li><strong>工作年限:</strong>{candidate.work_years}年</li>")
|
||||
if candidate.current_company:
|
||||
html_parts.append(f"<li><strong>当前公司:</strong>{candidate.current_company}</li>")
|
||||
if candidate.current_position:
|
||||
html_parts.append(f"<li><strong>当前职位:</strong>{candidate.current_position}</li>")
|
||||
if candidate.phone:
|
||||
html_parts.append(f"<li><strong>联系方式:</strong>{candidate.phone}</li>")
|
||||
if candidate.email:
|
||||
html_parts.append(f"<li><strong>邮箱:</strong>{candidate.email}</li>")
|
||||
html_parts.append("</ul>")
|
||||
|
||||
if message.evaluation:
|
||||
evaluation = message.evaluation
|
||||
html_parts.append("<h3>AI 评价</h3>")
|
||||
html_parts.append("<ul>")
|
||||
html_parts.append(f"<li><strong>综合评分:</strong><span style='color: #1890ff; font-size: 18px;'>{evaluation.overall_score}/100</span></li>")
|
||||
if evaluation.recommendation:
|
||||
color = self._get_recommendation_color(evaluation.recommendation.value)
|
||||
html_parts.append(f"<li><strong>推荐意见:</strong><span style='color: {color};'>{self._format_recommendation(evaluation.recommendation.value)}</span></li>")
|
||||
if evaluation.summary:
|
||||
html_parts.append(f"<li><strong>评价摘要:</strong>{evaluation.summary}</li>")
|
||||
if evaluation.strengths:
|
||||
html_parts.append(f"<li><strong>优势:</strong>{', '.join(evaluation.strengths)}</li>")
|
||||
if evaluation.weaknesses:
|
||||
html_parts.append(f"<li><strong>不足:</strong>{', '.join(evaluation.weaknesses)}</li>")
|
||||
html_parts.append("</ul>")
|
||||
|
||||
html_parts.append("</body></html>")
|
||||
|
||||
return ''.join(html_parts)
|
||||
|
||||
def _format_recommendation(self, value: str) -> str:
|
||||
"""格式化推荐意见"""
|
||||
mapping = {
|
||||
"strong_recommend": "强烈推荐",
|
||||
"recommend": "推荐",
|
||||
"consider": "考虑",
|
||||
"not_recommend": "不推荐"
|
||||
}
|
||||
return mapping.get(value, value)
|
||||
|
||||
def _get_recommendation_color(self, value: str) -> str:
|
||||
"""获取推荐意见对应的颜色"""
|
||||
mapping = {
|
||||
"strong_recommend": "#52c41a",
|
||||
"recommend": "#1890ff",
|
||||
"consider": "#faad14",
|
||||
"not_recommend": "#f5222d"
|
||||
}
|
||||
return mapping.get(value, "#000000")
|
||||
@@ -0,0 +1,167 @@
|
||||
"""WeChat Work (Enterprise WeChat) notification channel"""
|
||||
from typing import Optional, Dict, Any
|
||||
import json
|
||||
|
||||
from .base_channel import NotificationChannel, NotificationMessage, SendResult
|
||||
from ....domain.enums import ChannelType
|
||||
|
||||
|
||||
class WeChatWorkChannel(NotificationChannel):
|
||||
"""
|
||||
企业微信通知渠道
|
||||
|
||||
通过企业微信机器人 Webhook 发送消息
|
||||
"""
|
||||
|
||||
def __init__(self, webhook_url: str, mentioned_list: Optional[list] = None):
|
||||
"""
|
||||
初始化企业微信渠道
|
||||
|
||||
Args:
|
||||
webhook_url: 企业微信机器人 Webhook 地址
|
||||
mentioned_list: @提醒的成员列表,如 ["@all"] 或 ["UserID1", "UserID2"]
|
||||
"""
|
||||
self.webhook_url = webhook_url
|
||||
self.mentioned_list = mentioned_list or []
|
||||
self._session = None
|
||||
|
||||
@property
|
||||
def channel_type(self) -> ChannelType:
|
||||
return ChannelType.WECHAT_WORK
|
||||
|
||||
def is_configured(self) -> bool:
|
||||
"""检查是否已配置"""
|
||||
return bool(self.webhook_url)
|
||||
|
||||
async def send(self, message: NotificationMessage) -> SendResult:
|
||||
"""发送企业微信消息"""
|
||||
if not self.is_configured():
|
||||
return SendResult(
|
||||
success=False,
|
||||
error_message="Webhook URL not configured"
|
||||
)
|
||||
|
||||
try:
|
||||
payload = self._build_payload(message)
|
||||
|
||||
# 发送请求
|
||||
import aiohttp
|
||||
async with aiohttp.ClientSession() as session:
|
||||
async with session.post(
|
||||
self.webhook_url,
|
||||
json=payload,
|
||||
timeout=aiohttp.ClientTimeout(total=30)
|
||||
) as response:
|
||||
result = await response.json()
|
||||
|
||||
if result.get("errcode") == 0:
|
||||
return SendResult(
|
||||
success=True,
|
||||
message_id=result.get("msgid"),
|
||||
response_data=result
|
||||
)
|
||||
else:
|
||||
return SendResult(
|
||||
success=False,
|
||||
error_message=f"WeChat Work API error: {result.get('errmsg')}"
|
||||
)
|
||||
|
||||
except Exception as e:
|
||||
return SendResult(
|
||||
success=False,
|
||||
error_message=f"Failed to send WeChat Work message: {str(e)}"
|
||||
)
|
||||
|
||||
def _build_payload(self, message: NotificationMessage) -> Dict[str, Any]:
|
||||
"""构建企业微信消息体"""
|
||||
# 使用 markdown 格式
|
||||
content = self._format_content(message)
|
||||
|
||||
return {
|
||||
"msgtype": "markdown",
|
||||
"markdown": {
|
||||
"content": content
|
||||
}
|
||||
}
|
||||
|
||||
def _format_content(self, message: NotificationMessage) -> str:
|
||||
"""格式化消息内容"""
|
||||
lines = []
|
||||
|
||||
# 标题
|
||||
if message.title:
|
||||
lines.append(f"## {message.title}")
|
||||
|
||||
# 内容
|
||||
if message.content:
|
||||
lines.append(message.content)
|
||||
|
||||
# 候选人信息
|
||||
if message.candidate:
|
||||
candidate = message.candidate
|
||||
lines.append("\n**候选人信息:**")
|
||||
lines.append(f"- 姓名:{candidate.name}")
|
||||
if candidate.age:
|
||||
lines.append(f"- 年龄:{candidate.age}岁")
|
||||
if candidate.work_years:
|
||||
lines.append(f"- 工作年限:{candidate.work_years}年")
|
||||
if candidate.current_company:
|
||||
lines.append(f"- 当前公司:{candidate.current_company}")
|
||||
if candidate.current_position:
|
||||
lines.append(f"- 当前职位:{candidate.current_position}")
|
||||
if candidate.phone:
|
||||
lines.append(f"- 联系方式:{candidate.phone}")
|
||||
|
||||
# 评价信息
|
||||
if message.evaluation:
|
||||
evaluation = message.evaluation
|
||||
lines.append("\n**AI 评价:**")
|
||||
lines.append(f"- 综合评分:<font color=\"info\">{evaluation.overall_score}/100</font>")
|
||||
if evaluation.recommendation:
|
||||
lines.append(f"- 推荐意见:{self._format_recommendation(evaluation.recommendation.value)}")
|
||||
if evaluation.summary:
|
||||
lines.append(f"- 评价摘要:{evaluation.summary}")
|
||||
|
||||
if evaluation.strengths:
|
||||
lines.append(f"- 优势:{', '.join(evaluation.strengths[:3])}")
|
||||
|
||||
# @提醒
|
||||
if self.mentioned_list:
|
||||
mentions = ' '.join(self.mentioned_list)
|
||||
lines.append(f"\n{mentions}")
|
||||
|
||||
return '\n'.join(lines)
|
||||
|
||||
def _format_recommendation(self, value: str) -> str:
|
||||
"""格式化推荐意见"""
|
||||
mapping = {
|
||||
"strong_recommend": "<font color=\"info\">强烈推荐</font>",
|
||||
"recommend": "<font color=\"info\">推荐</font>",
|
||||
"consider": "<font color=\"warning\">考虑</font>",
|
||||
"not_recommend": "<font color=\"comment\">不推荐</font>"
|
||||
}
|
||||
return mapping.get(value, value)
|
||||
|
||||
|
||||
class WeChatWorkTextChannel(WeChatWorkChannel):
|
||||
"""
|
||||
企业微信文本消息渠道
|
||||
|
||||
发送纯文本消息
|
||||
"""
|
||||
|
||||
def _build_payload(self, message: NotificationMessage) -> Dict[str, Any]:
|
||||
"""构建文本消息体"""
|
||||
content = message.content or message.title
|
||||
|
||||
payload = {
|
||||
"msgtype": "text",
|
||||
"text": {
|
||||
"content": content
|
||||
}
|
||||
}
|
||||
|
||||
if self.mentioned_list:
|
||||
payload["text"]["mentioned_list"] = self.mentioned_list
|
||||
|
||||
return payload
|
||||
@@ -0,0 +1,245 @@
|
||||
"""Message template engine"""
|
||||
from typing import Optional, Dict, Any
|
||||
import re
|
||||
|
||||
from ...domain.candidate import Candidate
|
||||
from ...domain.evaluation import Evaluation
|
||||
|
||||
|
||||
class MessageTemplate:
|
||||
"""
|
||||
消息模板
|
||||
|
||||
支持简单的变量替换语法:{{variable}} 或 {{object.property}}
|
||||
"""
|
||||
|
||||
DEFAULT_TEMPLATE = """【人才推荐】{{candidate.name}}
|
||||
|
||||
基本信息:
|
||||
- 年龄:{{candidate.age}}岁
|
||||
- 工作年限:{{candidate.work_years}}年
|
||||
- 当前公司:{{candidate.current_company}}
|
||||
- 当前职位:{{candidate.current_position}}
|
||||
- 学历:{{candidate.education}}
|
||||
- 期望薪资:{{candidate.salary_expectation}}
|
||||
|
||||
AI评价:
|
||||
- 综合评分:{{evaluation.overall_score}}/100
|
||||
- 推荐意见:{{evaluation.recommendation}}
|
||||
- 评价摘要:{{evaluation.summary}}
|
||||
|
||||
优势:
|
||||
{{#each evaluation.strengths}}
|
||||
• {{this}}
|
||||
{{/each}}
|
||||
|
||||
联系方式:{{candidate.phone}}
|
||||
"""
|
||||
|
||||
def __init__(self, template: Optional[str] = None):
|
||||
"""
|
||||
初始化消息模板
|
||||
|
||||
Args:
|
||||
template: 模板字符串,不传使用默认模板
|
||||
"""
|
||||
self.template = template or self.DEFAULT_TEMPLATE
|
||||
|
||||
def render(
|
||||
self,
|
||||
candidate: Candidate,
|
||||
evaluation: Evaluation,
|
||||
extra_data: Optional[Dict[str, Any]] = None
|
||||
) -> str:
|
||||
"""
|
||||
渲染模板
|
||||
|
||||
Args:
|
||||
candidate: 候选人信息
|
||||
evaluation: 评价结果
|
||||
extra_data: 额外数据
|
||||
|
||||
Returns:
|
||||
渲染后的消息内容
|
||||
"""
|
||||
data = {
|
||||
"candidate": candidate,
|
||||
"evaluation": evaluation,
|
||||
**(extra_data or {})
|
||||
}
|
||||
|
||||
return self._render_template(self.template, data)
|
||||
|
||||
def _render_template(self, template: str, data: Dict[str, Any]) -> str:
|
||||
"""渲染模板"""
|
||||
result = template
|
||||
|
||||
# 处理 each 循环
|
||||
result = self._process_each(result, data)
|
||||
|
||||
# 处理简单变量
|
||||
result = self._process_variables(result, data)
|
||||
|
||||
return result
|
||||
|
||||
def _process_variables(self, template: str, data: Dict[str, Any]) -> str:
|
||||
"""处理变量替换"""
|
||||
def replace_var(match):
|
||||
var_path = match.group(1).strip()
|
||||
value = self._get_value(data, var_path)
|
||||
|
||||
if value is None:
|
||||
return ""
|
||||
|
||||
# 处理枚举类型
|
||||
if hasattr(value, 'value'):
|
||||
value = self._format_enum_value(value.value)
|
||||
|
||||
return str(value)
|
||||
|
||||
# 匹配 {{variable}} 格式
|
||||
pattern = r'\{\{(.*?)\}\}'
|
||||
return re.sub(pattern, replace_var, template)
|
||||
|
||||
def _process_each(self, template: str, data: Dict[str, Any]) -> str:
|
||||
"""处理 each 循环"""
|
||||
pattern = r'\{\{#each\s+(.*?)\}\}(.*?)\{\{/each\}\}'
|
||||
|
||||
def replace_each(match):
|
||||
var_path = match.group(1).strip()
|
||||
inner_template = match.group(2)
|
||||
|
||||
items = self._get_value(data, var_path)
|
||||
if not items or not isinstance(items, list):
|
||||
return ""
|
||||
|
||||
results = []
|
||||
for item in items:
|
||||
item_data = {**data, "this": item}
|
||||
rendered = self._process_variables(inner_template, item_data)
|
||||
results.append(rendered)
|
||||
|
||||
return ''.join(results)
|
||||
|
||||
return re.sub(pattern, replace_each, template, flags=re.DOTALL)
|
||||
|
||||
def _get_value(self, data: Dict[str, Any], path: str) -> Any:
|
||||
"""根据路径获取值"""
|
||||
parts = path.split('.')
|
||||
value = data
|
||||
|
||||
for part in parts:
|
||||
if value is None:
|
||||
return None
|
||||
|
||||
if isinstance(value, dict):
|
||||
value = value.get(part)
|
||||
elif hasattr(value, part):
|
||||
value = getattr(value, part)
|
||||
else:
|
||||
return None
|
||||
|
||||
return value
|
||||
|
||||
def _format_enum_value(self, value: str) -> str:
|
||||
"""格式化枚举值"""
|
||||
mapping = {
|
||||
"strong_recommend": "强烈推荐",
|
||||
"recommend": "推荐",
|
||||
"consider": "考虑",
|
||||
"not_recommend": "不推荐",
|
||||
"male": "男",
|
||||
"female": "女",
|
||||
"unknown": "未知"
|
||||
}
|
||||
return mapping.get(value, value)
|
||||
|
||||
|
||||
class MessageTemplateEngine:
|
||||
"""
|
||||
消息模板引擎
|
||||
|
||||
管理多个模板,支持按名称获取
|
||||
"""
|
||||
|
||||
def __init__(self):
|
||||
self._templates: Dict[str, MessageTemplate] = {}
|
||||
self._register_default_templates()
|
||||
|
||||
def _register_default_templates(self):
|
||||
"""注册默认模板"""
|
||||
# 简洁模板
|
||||
self.register("simple", """【人才推荐】{{candidate.name}}
|
||||
|
||||
{{candidate.current_company}} | {{candidate.current_position}} | {{candidate.work_years}}年经验
|
||||
评分:{{evaluation.overall_score}}/100 | {{evaluation.recommendation}}
|
||||
联系方式:{{candidate.phone}}
|
||||
""")
|
||||
|
||||
# 详细模板
|
||||
self.register("detailed", MessageTemplate.DEFAULT_TEMPLATE)
|
||||
|
||||
# 仅评价模板
|
||||
self.register("evaluation_only", """【AI评价】{{candidate.name}}
|
||||
|
||||
综合评分:{{evaluation.overall_score}}/100
|
||||
推荐意见:{{evaluation.recommendation}}
|
||||
评价摘要:{{evaluation.summary}}
|
||||
|
||||
优势:{{#each evaluation.strengths}} {{this}} {{/each}}
|
||||
不足:{{#each evaluation.weaknesses}} {{this}} {{/each}}
|
||||
""")
|
||||
|
||||
def register(self, name: str, template: str) -> None:
|
||||
"""
|
||||
注册模板
|
||||
|
||||
Args:
|
||||
name: 模板名称
|
||||
template: 模板字符串或 MessageTemplate 对象
|
||||
"""
|
||||
if isinstance(template, str):
|
||||
template = MessageTemplate(template)
|
||||
self._templates[name] = template
|
||||
|
||||
def get(self, name: str) -> Optional[MessageTemplate]:
|
||||
"""
|
||||
获取模板
|
||||
|
||||
Args:
|
||||
name: 模板名称
|
||||
|
||||
Returns:
|
||||
模板对象,不存在返回 None
|
||||
"""
|
||||
return self._templates.get(name)
|
||||
|
||||
def render(
|
||||
self,
|
||||
template_name: str,
|
||||
candidate: Candidate,
|
||||
evaluation: Evaluation,
|
||||
extra_data: Optional[Dict[str, Any]] = None
|
||||
) -> str:
|
||||
"""
|
||||
使用指定模板渲染
|
||||
|
||||
Args:
|
||||
template_name: 模板名称
|
||||
candidate: 候选人信息
|
||||
evaluation: 评价结果
|
||||
extra_data: 额外数据
|
||||
|
||||
Returns:
|
||||
渲染后的内容
|
||||
"""
|
||||
template = self.get(template_name)
|
||||
if not template:
|
||||
# 使用默认模板
|
||||
template = MessageTemplate()
|
||||
|
||||
return template.render(candidate, evaluation, extra_data)
|
||||
|
||||
def list_templates(self) -> list:
|
||||
"""获取所有模板名称"""
|
||||
return list(self._templates.keys())
|
||||
@@ -0,0 +1,211 @@
|
||||
"""Notification service - Multi-channel notification"""
|
||||
from typing import Dict, List, Optional
|
||||
from dataclasses import dataclass, field
|
||||
from datetime import datetime
|
||||
|
||||
from .channels.base_channel import NotificationChannel, NotificationMessage, SendResult
|
||||
from .message_template import MessageTemplateEngine
|
||||
from ...domain.candidate import Candidate
|
||||
from ...domain.evaluation import Evaluation
|
||||
from ...domain.enums import ChannelType
|
||||
|
||||
|
||||
@dataclass
|
||||
class NotificationResult:
|
||||
"""通知结果"""
|
||||
success: bool
|
||||
channel_results: Dict[ChannelType, SendResult] = field(default_factory=dict)
|
||||
failed_channels: List[ChannelType] = field(default_factory=list)
|
||||
message: str = ""
|
||||
timestamp: datetime = field(default_factory=datetime.now)
|
||||
|
||||
@property
|
||||
def all_success(self) -> bool:
|
||||
"""是否所有渠道都成功"""
|
||||
return len(self.failed_channels) == 0
|
||||
|
||||
@property
|
||||
def success_count(self) -> int:
|
||||
"""成功渠道数"""
|
||||
return len(self.channel_results) - len(self.failed_channels)
|
||||
|
||||
|
||||
class NotificationService:
|
||||
"""
|
||||
通知服务
|
||||
|
||||
统一的多渠道通知入口,支持:
|
||||
- 企业微信
|
||||
- 钉钉
|
||||
- 邮件
|
||||
- Webhook
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
channels: Optional[Dict[ChannelType, NotificationChannel]] = None,
|
||||
template_engine: Optional[MessageTemplateEngine] = None
|
||||
):
|
||||
"""
|
||||
初始化通知服务
|
||||
|
||||
Args:
|
||||
channels: 渠道配置字典
|
||||
template_engine: 模板引擎
|
||||
"""
|
||||
self.channels = channels or {}
|
||||
self.template_engine = template_engine or MessageTemplateEngine()
|
||||
|
||||
def register_channel(self, channel: NotificationChannel) -> None:
|
||||
"""
|
||||
注册通知渠道
|
||||
|
||||
Args:
|
||||
channel: 渠道实例
|
||||
"""
|
||||
self.channels[channel.channel_type] = channel
|
||||
|
||||
def unregister_channel(self, channel_type: ChannelType) -> None:
|
||||
"""
|
||||
注销通知渠道
|
||||
|
||||
Args:
|
||||
channel_type: 渠道类型
|
||||
"""
|
||||
if channel_type in self.channels:
|
||||
del self.channels[channel_type]
|
||||
|
||||
async def notify(
|
||||
self,
|
||||
candidate: Candidate,
|
||||
evaluation: Evaluation,
|
||||
channels: Optional[List[ChannelType]] = None,
|
||||
template_name: Optional[str] = None,
|
||||
title: Optional[str] = None,
|
||||
extra_data: Optional[Dict] = None
|
||||
) -> NotificationResult:
|
||||
"""
|
||||
发送候选人通知
|
||||
|
||||
Args:
|
||||
candidate: 候选人信息
|
||||
evaluation: 评价结果
|
||||
channels: 通知渠道列表,不传使用所有已配置渠道
|
||||
template_name: 消息模板名称
|
||||
title: 消息标题(覆盖模板中的标题)
|
||||
extra_data: 额外数据
|
||||
|
||||
Returns:
|
||||
通知结果
|
||||
"""
|
||||
# 确定要使用的渠道
|
||||
target_channels = channels or list(self.channels.keys())
|
||||
|
||||
# 构建消息内容
|
||||
content = self._build_message(
|
||||
candidate=candidate,
|
||||
evaluation=evaluation,
|
||||
template_name=template_name,
|
||||
extra_data=extra_data
|
||||
)
|
||||
|
||||
# 构建消息对象
|
||||
message = NotificationMessage(
|
||||
title=title or f"【人才推荐】{candidate.name}",
|
||||
content=content,
|
||||
candidate=candidate,
|
||||
evaluation=evaluation,
|
||||
extra_data=extra_data or {}
|
||||
)
|
||||
|
||||
# 发送到各渠道
|
||||
channel_results = {}
|
||||
failed_channels = []
|
||||
|
||||
for channel_type in target_channels:
|
||||
channel = self.channels.get(channel_type)
|
||||
|
||||
if not channel:
|
||||
failed_channels.append(channel_type)
|
||||
continue
|
||||
|
||||
if not channel.is_configured():
|
||||
failed_channels.append(channel_type)
|
||||
continue
|
||||
|
||||
try:
|
||||
result = await channel.send(message)
|
||||
channel_results[channel_type] = result
|
||||
|
||||
if not result.success:
|
||||
failed_channels.append(channel_type)
|
||||
|
||||
except Exception as e:
|
||||
channel_results[channel_type] = SendResult(
|
||||
success=False,
|
||||
error_message=str(e)
|
||||
)
|
||||
failed_channels.append(channel_type)
|
||||
|
||||
# 构建结果
|
||||
success = len(failed_channels) < len(target_channels)
|
||||
message = self._build_result_message(success, failed_channels, target_channels)
|
||||
|
||||
return NotificationResult(
|
||||
success=success,
|
||||
channel_results=channel_results,
|
||||
failed_channels=failed_channels,
|
||||
message=message
|
||||
)
|
||||
|
||||
def _build_message(
|
||||
self,
|
||||
candidate: Candidate,
|
||||
evaluation: Evaluation,
|
||||
template_name: Optional[str] = None,
|
||||
extra_data: Optional[Dict] = None
|
||||
) -> str:
|
||||
"""构建消息内容"""
|
||||
if template_name:
|
||||
return self.template_engine.render(
|
||||
template_name=template_name,
|
||||
candidate=candidate,
|
||||
evaluation=evaluation,
|
||||
extra_data=extra_data
|
||||
)
|
||||
|
||||
# 使用默认模板
|
||||
return self.template_engine.render(
|
||||
template_name="detailed",
|
||||
candidate=candidate,
|
||||
evaluation=evaluation,
|
||||
extra_data=extra_data
|
||||
)
|
||||
|
||||
def _build_result_message(
|
||||
self,
|
||||
success: bool,
|
||||
failed_channels: List[ChannelType],
|
||||
target_channels: List[ChannelType]
|
||||
) -> str:
|
||||
"""构建结果消息"""
|
||||
if success and not failed_channels:
|
||||
return f"通知发送成功,共 {len(target_channels)} 个渠道"
|
||||
|
||||
if not success and len(failed_channels) == len(target_channels):
|
||||
return f"通知发送失败,{len(failed_channels)} 个渠道全部失败"
|
||||
|
||||
success_count = len(target_channels) - len(failed_channels)
|
||||
return f"通知部分成功:{success_count}/{len(target_channels)} 个渠道成功"
|
||||
|
||||
def get_configured_channels(self) -> List[ChannelType]:
|
||||
"""获取已配置的渠道列表"""
|
||||
return [
|
||||
ct for ct, channel in self.channels.items()
|
||||
if channel.is_configured()
|
||||
]
|
||||
|
||||
def is_channel_configured(self, channel_type: ChannelType) -> bool:
|
||||
"""检查指定渠道是否已配置"""
|
||||
channel = self.channels.get(channel_type)
|
||||
return channel.is_configured() if channel else False
|
||||
Reference in New Issue
Block a user