Implement feature extraction and inference enhancements, including async processing and MongoDB integration
This commit is contained in:
@@ -391,6 +391,46 @@ def process_single_txt(file_path, output_dir=OUTPUT_DIR):
|
||||
raise RuntimeError(f"保存JSON失败:{str(e)}") from e
|
||||
|
||||
|
||||
async def process_single(content : str):
|
||||
# 3. 构建提示词并调用API
|
||||
prompt = build_extraction_prompt(content)
|
||||
try:
|
||||
print("正在调用API提取特征...")
|
||||
response = client.chat.completions.create(
|
||||
model=MODEL,
|
||||
messages=[{"role": "user", "content": prompt}],
|
||||
temperature=TEMPERATURE,
|
||||
max_tokens=8000,
|
||||
timeout=30
|
||||
)
|
||||
except Exception as e:
|
||||
raise RuntimeError(f"API调用失败:{str(e)}") from e
|
||||
|
||||
# 4. 提取并清洗JSON
|
||||
feature_json_str = response.choices[0].message.content.strip()
|
||||
json_match = re.search(r"\{[\s\S]*\}", feature_json_str)
|
||||
if not json_match:
|
||||
raise RuntimeError(f"API返回无有效JSON:{feature_json_str[:200]}...")
|
||||
cleaned_json = clean_and_fix_json(json_match.group())
|
||||
|
||||
# 5. 解析并验证JSON
|
||||
try:
|
||||
parsed_dict = json.loads(cleaned_json)
|
||||
except json.JSONDecodeError as e:
|
||||
raise RuntimeError(f"JSON解析失败:{str(e)} | 清洗后内容:{cleaned_json[:500]}") from e
|
||||
|
||||
# 验证核心字段
|
||||
if "Follow_up_Priority" not in parsed_dict:
|
||||
raise RuntimeError("核心字段Follow_up_Priority缺失")
|
||||
fu_prio = parsed_dict["Follow_up_Priority"]
|
||||
if not isinstance(fu_prio, dict) or "value" not in fu_prio or "evidence" not in fu_prio:
|
||||
raise RuntimeError("Follow_up_Priority格式错误(需包含value和evidence)")
|
||||
if not isinstance(fu_prio["evidence"], list):
|
||||
raise RuntimeError("evidence必须是数组类型")
|
||||
if len(str(fu_prio["value"])) >= 20:
|
||||
raise RuntimeError(f"value超20字限制:{fu_prio['value']}")
|
||||
|
||||
return parsed_dict
|
||||
|
||||
if __name__ == "__main__":
|
||||
# 要处理的源文件夹路径
|
||||
|
||||
Reference in New Issue
Block a user