Enhance backend startup logging and API endpoint display
- Updated `run.py` to conditionally print startup information only in the reloader process to avoid duplicate logs in debug mode. - Modified `__init__.py` to log startup and completion messages based on the reloader process condition. - Added warnings suppression in `graph_builder.py` for Pydantic v2 regarding Field usage. - Revised `ontology_generator.py` to enforce strict design guidelines for entity types and relationships, ensuring compliance with new requirements. - Improved logging behavior in `logger.py` to prevent log propagation to the root logger, avoiding duplicate outputs.
This commit is contained in:
parent
3156f9453d
commit
e98da6b53e
5 changed files with 165 additions and 50 deletions
|
|
@ -2,6 +2,7 @@
|
||||||
MiroFish Backend - Flask应用工厂
|
MiroFish Backend - Flask应用工厂
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
from flask import Flask, request
|
from flask import Flask, request
|
||||||
from flask_cors import CORS
|
from flask_cors import CORS
|
||||||
|
|
||||||
|
|
@ -16,6 +17,13 @@ def create_app(config_class=Config):
|
||||||
|
|
||||||
# 设置日志
|
# 设置日志
|
||||||
logger = setup_logger('mirofish')
|
logger = setup_logger('mirofish')
|
||||||
|
|
||||||
|
# 只在 reloader 子进程中打印启动信息(避免 debug 模式下打印两次)
|
||||||
|
is_reloader_process = os.environ.get('WERKZEUG_RUN_MAIN') == 'true'
|
||||||
|
debug_mode = app.config.get('DEBUG', False)
|
||||||
|
should_log_startup = not debug_mode or is_reloader_process
|
||||||
|
|
||||||
|
if should_log_startup:
|
||||||
logger.info("=" * 50)
|
logger.info("=" * 50)
|
||||||
logger.info("MiroFish Backend 启动中...")
|
logger.info("MiroFish Backend 启动中...")
|
||||||
logger.info("=" * 50)
|
logger.info("=" * 50)
|
||||||
|
|
@ -46,6 +54,7 @@ def create_app(config_class=Config):
|
||||||
def health():
|
def health():
|
||||||
return {'status': 'ok', 'service': 'MiroFish Backend'}
|
return {'status': 'ok', 'service': 'MiroFish Backend'}
|
||||||
|
|
||||||
|
if should_log_startup:
|
||||||
logger.info("MiroFish Backend 启动完成")
|
logger.info("MiroFish Backend 启动完成")
|
||||||
|
|
||||||
return app
|
return app
|
||||||
|
|
|
||||||
|
|
@ -197,10 +197,15 @@ class GraphBuilderService:
|
||||||
|
|
||||||
def set_ontology(self, graph_id: str, ontology: Dict[str, Any]):
|
def set_ontology(self, graph_id: str, ontology: Dict[str, Any]):
|
||||||
"""设置图谱本体(公开方法)"""
|
"""设置图谱本体(公开方法)"""
|
||||||
|
import warnings
|
||||||
from typing import Optional
|
from typing import Optional
|
||||||
from pydantic import Field
|
from pydantic import Field
|
||||||
from zep_cloud.external_clients.ontology import EntityModel, EntityText, EdgeModel
|
from zep_cloud.external_clients.ontology import EntityModel, EntityText, EdgeModel
|
||||||
|
|
||||||
|
# 抑制 Pydantic v2 关于 Field(default=None) 的警告
|
||||||
|
# 这是 Zep SDK 要求的用法,警告来自动态类创建,可以安全忽略
|
||||||
|
warnings.filterwarnings('ignore', category=UserWarning, module='pydantic')
|
||||||
|
|
||||||
# Zep 保留名称,不能作为属性名
|
# Zep 保留名称,不能作为属性名
|
||||||
RESERVED_NAMES = {'uuid', 'name', 'group_id', 'name_embedding', 'summary', 'created_at'}
|
RESERVED_NAMES = {'uuid', 'name', 'group_id', 'name_embedding', 'summary', 'created_at'}
|
||||||
|
|
||||||
|
|
@ -223,6 +228,7 @@ class GraphBuilderService:
|
||||||
for attr_def in entity_def.get("attributes", []):
|
for attr_def in entity_def.get("attributes", []):
|
||||||
attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称
|
attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称
|
||||||
attr_desc = attr_def.get("description", attr_name)
|
attr_desc = attr_def.get("description", attr_name)
|
||||||
|
# Zep API 需要 Field 的 description,这是必需的
|
||||||
attrs[attr_name] = Field(description=attr_desc, default=None)
|
attrs[attr_name] = Field(description=attr_desc, default=None)
|
||||||
annotations[attr_name] = Optional[EntityText] # 类型注解
|
annotations[attr_name] = Optional[EntityText] # 类型注解
|
||||||
|
|
||||||
|
|
@ -246,6 +252,7 @@ class GraphBuilderService:
|
||||||
for attr_def in edge_def.get("attributes", []):
|
for attr_def in edge_def.get("attributes", []):
|
||||||
attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称
|
attr_name = safe_attr_name(attr_def["name"]) # 使用安全名称
|
||||||
attr_desc = attr_def.get("description", attr_name)
|
attr_desc = attr_def.get("description", attr_name)
|
||||||
|
# Zep API 需要 Field 的 description,这是必需的
|
||||||
attrs[attr_name] = Field(description=attr_desc, default=None)
|
attrs[attr_name] = Field(description=attr_desc, default=None)
|
||||||
annotations[attr_name] = Optional[str] # 边属性用str类型
|
annotations[attr_name] = Optional[str] # 边属性用str类型
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -22,8 +22,8 @@ ONTOLOGY_SYSTEM_PROMPT = """你是一个专业的知识图谱本体设计专家
|
||||||
|
|
||||||
因此,**实体必须是现实中真实存在的、可以在社媒上发声和互动的主体**:
|
因此,**实体必须是现实中真实存在的、可以在社媒上发声和互动的主体**:
|
||||||
|
|
||||||
**可以是(鼓励多样化划分)**:
|
**可以是**:
|
||||||
- 具体的个人(公众人物、当事人、意见领袖、专家学者)
|
- 具体的个人(公众人物、当事人、意见领袖、专家学者、普通人)
|
||||||
- 公司、企业(包括其官方账号)
|
- 公司、企业(包括其官方账号)
|
||||||
- 组织机构(大学、协会、NGO、工会等)
|
- 组织机构(大学、协会、NGO、工会等)
|
||||||
- 政府部门、监管机构
|
- 政府部门、监管机构
|
||||||
|
|
@ -35,7 +35,6 @@ ONTOLOGY_SYSTEM_PROMPT = """你是一个专业的知识图谱本体设计专家
|
||||||
- 抽象概念(如"舆论"、"情绪"、"趋势")
|
- 抽象概念(如"舆论"、"情绪"、"趋势")
|
||||||
- 主题/话题(如"学术诚信"、"教育改革")
|
- 主题/话题(如"学术诚信"、"教育改革")
|
||||||
- 观点/态度(如"支持方"、"反对方")
|
- 观点/态度(如"支持方"、"反对方")
|
||||||
- 泛指群体(如"网友"、"公众"、"学生群体")
|
|
||||||
|
|
||||||
## 输出格式
|
## 输出格式
|
||||||
|
|
||||||
|
|
@ -71,49 +70,78 @@ ONTOLOGY_SYSTEM_PROMPT = """你是一个专业的知识图谱本体设计专家
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
## 设计指南
|
## 设计指南(极其重要!)
|
||||||
|
|
||||||
1. **实体类型设计(重要!请尽量多划分)**:
|
### 1. 实体类型设计 - 必须严格遵守
|
||||||
- **数量要求:至少5个,最多10个实体类型**
|
|
||||||
- 每个实体类型代表一类可以在社媒上发声的主体
|
|
||||||
- 尽量细分不同角色,例如:
|
|
||||||
- 不要只用"Person",可以细分为"PublicFigure"、"Expert"、"Whistleblower"等
|
|
||||||
- 不要只用"Organization",可以细分为"University"、"Company"、"NGO"等
|
|
||||||
- description必须清晰说明什么样的实体应该被提取
|
|
||||||
- 每个类型提供2-3个具体示例
|
|
||||||
|
|
||||||
2. **关系类型设计**:
|
**数量要求:必须正好10个实体类型**
|
||||||
- 关系应该反映社媒互动中的真实联系
|
|
||||||
- 关注可能影响舆论传播的关系:
|
|
||||||
- 信息传播:REPORTS_ON, COMMENTS_ON, SHARES
|
|
||||||
- 组织关系:AFFILIATED_WITH, WORKS_FOR, REPRESENTS
|
|
||||||
- 互动关系:RESPONDS_TO, SUPPORTS, OPPOSES
|
|
||||||
- 关系类型:5-10个为宜
|
|
||||||
|
|
||||||
3. **属性设计**:
|
**层次结构要求(必须同时包含具体类型和兜底类型)**:
|
||||||
- 每个实体类型1-3个关键属性
|
|
||||||
- 属性应有助于识别实体的社媒影响力(如role、influence_level等)
|
|
||||||
|
|
||||||
## 实体类型参考(请根据文本内容灵活选择和扩展)
|
你的10个实体类型必须包含以下层次:
|
||||||
|
|
||||||
- Person: 普通个人
|
A. **兜底类型(必须包含,放在列表最后2个)**:
|
||||||
- PublicFigure: 公众人物(明星、网红、意见领袖)
|
- `Person`: 任何自然人个体的兜底类型。当一个人不属于其他更具体的人物类型时,归入此类。
|
||||||
- Expert: 专家学者
|
- `Organization`: 任何组织机构的兜底类型。当一个组织不属于其他更具体的组织类型时,归入此类。
|
||||||
|
|
||||||
|
B. **具体类型(8个,根据文本内容设计)**:
|
||||||
|
- 针对文本中出现的主要角色,设计更具体的类型
|
||||||
|
- 例如:如果文本涉及学术事件,可以有 `Student`, `Professor`, `University`
|
||||||
|
- 例如:如果文本涉及商业事件,可以有 `Company`, `CEO`, `Employee`
|
||||||
|
|
||||||
|
**为什么需要兜底类型**:
|
||||||
|
- 文本中会出现各种人物,如"中小学教师"、"路人甲"、"某位网友"
|
||||||
|
- 如果没有专门的类型匹配,他们应该被归入 `Person`
|
||||||
|
- 同理,小型组织、临时团体等应该归入 `Organization`
|
||||||
|
|
||||||
|
**具体类型的设计原则**:
|
||||||
|
- 从文本中识别出高频出现或关键的角色类型
|
||||||
|
- 每个具体类型应该有明确的边界,避免重叠
|
||||||
|
- description 必须清晰说明这个类型和兜底类型的区别
|
||||||
|
|
||||||
|
### 2. 关系类型设计
|
||||||
|
|
||||||
|
- 数量:6-10个
|
||||||
|
- 关系应该反映社媒互动中的真实联系
|
||||||
|
- 确保关系的 source_targets 涵盖你定义的实体类型
|
||||||
|
|
||||||
|
### 3. 属性设计
|
||||||
|
|
||||||
|
- 每个实体类型1-3个关键属性
|
||||||
|
- **注意**:属性名不能使用 `name`、`uuid`、`group_id`、`created_at`、`summary`(这些是系统保留字)
|
||||||
|
- 推荐使用:`full_name`, `title`, `role`, `position`, `location`, `description` 等
|
||||||
|
|
||||||
|
## 实体类型参考
|
||||||
|
|
||||||
|
**个人类(具体)**:
|
||||||
|
- Student: 学生
|
||||||
|
- Professor: 教授/学者
|
||||||
- Journalist: 记者
|
- Journalist: 记者
|
||||||
- Company: 公司企业
|
- Celebrity: 明星/网红
|
||||||
|
- Executive: 高管
|
||||||
|
- Official: 政府官员
|
||||||
|
- Lawyer: 律师
|
||||||
|
- Doctor: 医生
|
||||||
|
|
||||||
|
**个人类(兜底)**:
|
||||||
|
- Person: 任何自然人(不属于上述具体类型时使用)
|
||||||
|
|
||||||
|
**组织类(具体)**:
|
||||||
- University: 高校
|
- University: 高校
|
||||||
|
- Company: 公司企业
|
||||||
- GovernmentAgency: 政府机构
|
- GovernmentAgency: 政府机构
|
||||||
- MediaOutlet: 传统媒体
|
- MediaOutlet: 媒体机构
|
||||||
- SelfMedia: 自媒体账号
|
- Hospital: 医院
|
||||||
- SocialPlatform: 社交媒体平台
|
- School: 中小学
|
||||||
- NGO: 非政府组织
|
- NGO: 非政府组织
|
||||||
- IndustryAssociation: 行业协会
|
|
||||||
- AlumniAssociation: 校友会
|
**组织类(兜底)**:
|
||||||
- FanGroup: 粉丝群体/支持群体
|
- Organization: 任何组织机构(不属于上述具体类型时使用)
|
||||||
|
|
||||||
## 关系类型参考
|
## 关系类型参考
|
||||||
|
|
||||||
- WORKS_FOR: 工作于
|
- WORKS_FOR: 工作于
|
||||||
|
- STUDIES_AT: 就读于
|
||||||
- AFFILIATED_WITH: 隶属于
|
- AFFILIATED_WITH: 隶属于
|
||||||
- REPRESENTS: 代表
|
- REPRESENTS: 代表
|
||||||
- REGULATES: 监管
|
- REGULATES: 监管
|
||||||
|
|
@ -215,7 +243,13 @@ class OntologyGenerator:
|
||||||
|
|
||||||
message += """
|
message += """
|
||||||
请根据以上内容,设计适合社会舆论模拟的实体类型和关系类型。
|
请根据以上内容,设计适合社会舆论模拟的实体类型和关系类型。
|
||||||
记住:所有实体类型必须是现实中可以发声的主体,不能是抽象概念。
|
|
||||||
|
**必须遵守的规则**:
|
||||||
|
1. 必须正好输出10个实体类型
|
||||||
|
2. 最后2个必须是兜底类型:Person(个人兜底)和 Organization(组织兜底)
|
||||||
|
3. 前8个是根据文本内容设计的具体类型
|
||||||
|
4. 所有实体类型必须是现实中可以发声的主体,不能是抽象概念
|
||||||
|
5. 属性名不能使用 name、uuid、group_id 等保留字,用 full_name、org_name 等替代
|
||||||
"""
|
"""
|
||||||
|
|
||||||
return message
|
return message
|
||||||
|
|
@ -250,6 +284,64 @@ class OntologyGenerator:
|
||||||
if len(edge.get("description", "")) > 100:
|
if len(edge.get("description", "")) > 100:
|
||||||
edge["description"] = edge["description"][:97] + "..."
|
edge["description"] = edge["description"][:97] + "..."
|
||||||
|
|
||||||
|
# Zep API 限制:最多 10 个自定义实体类型,最多 10 个自定义边类型
|
||||||
|
MAX_ENTITY_TYPES = 10
|
||||||
|
MAX_EDGE_TYPES = 10
|
||||||
|
|
||||||
|
# 兜底类型定义
|
||||||
|
person_fallback = {
|
||||||
|
"name": "Person",
|
||||||
|
"description": "Any individual person not fitting other specific person types.",
|
||||||
|
"attributes": [
|
||||||
|
{"name": "full_name", "type": "text", "description": "Full name of the person"},
|
||||||
|
{"name": "role", "type": "text", "description": "Role or occupation"}
|
||||||
|
],
|
||||||
|
"examples": ["ordinary citizen", "anonymous netizen"]
|
||||||
|
}
|
||||||
|
|
||||||
|
organization_fallback = {
|
||||||
|
"name": "Organization",
|
||||||
|
"description": "Any organization not fitting other specific organization types.",
|
||||||
|
"attributes": [
|
||||||
|
{"name": "org_name", "type": "text", "description": "Name of the organization"},
|
||||||
|
{"name": "org_type", "type": "text", "description": "Type of organization"}
|
||||||
|
],
|
||||||
|
"examples": ["small business", "community group"]
|
||||||
|
}
|
||||||
|
|
||||||
|
# 检查是否已有兜底类型
|
||||||
|
entity_names = {e["name"] for e in result["entity_types"]}
|
||||||
|
has_person = "Person" in entity_names
|
||||||
|
has_organization = "Organization" in entity_names
|
||||||
|
|
||||||
|
# 需要添加的兜底类型
|
||||||
|
fallbacks_to_add = []
|
||||||
|
if not has_person:
|
||||||
|
fallbacks_to_add.append(person_fallback)
|
||||||
|
if not has_organization:
|
||||||
|
fallbacks_to_add.append(organization_fallback)
|
||||||
|
|
||||||
|
if fallbacks_to_add:
|
||||||
|
current_count = len(result["entity_types"])
|
||||||
|
needed_slots = len(fallbacks_to_add)
|
||||||
|
|
||||||
|
# 如果添加后会超过 10 个,需要移除一些现有类型
|
||||||
|
if current_count + needed_slots > MAX_ENTITY_TYPES:
|
||||||
|
# 计算需要移除多少个
|
||||||
|
to_remove = current_count + needed_slots - MAX_ENTITY_TYPES
|
||||||
|
# 从末尾移除(保留前面更重要的具体类型)
|
||||||
|
result["entity_types"] = result["entity_types"][:-to_remove]
|
||||||
|
|
||||||
|
# 添加兜底类型
|
||||||
|
result["entity_types"].extend(fallbacks_to_add)
|
||||||
|
|
||||||
|
# 最终确保不超过限制(防御性编程)
|
||||||
|
if len(result["entity_types"]) > MAX_ENTITY_TYPES:
|
||||||
|
result["entity_types"] = result["entity_types"][:MAX_ENTITY_TYPES]
|
||||||
|
|
||||||
|
if len(result["edge_types"]) > MAX_EDGE_TYPES:
|
||||||
|
result["edge_types"] = result["edge_types"][:MAX_EDGE_TYPES]
|
||||||
|
|
||||||
return result
|
return result
|
||||||
|
|
||||||
def generate_python_code(self, ontology: Dict[str, Any]) -> str:
|
def generate_python_code(self, ontology: Dict[str, Any]) -> str:
|
||||||
|
|
|
||||||
|
|
@ -31,6 +31,9 @@ def setup_logger(name: str = 'mirofish', level: int = logging.DEBUG) -> logging.
|
||||||
logger = logging.getLogger(name)
|
logger = logging.getLogger(name)
|
||||||
logger.setLevel(level)
|
logger.setLevel(level)
|
||||||
|
|
||||||
|
# 阻止日志向上传播到根 logger,避免重复输出
|
||||||
|
logger.propagate = False
|
||||||
|
|
||||||
# 如果已经有处理器,不重复添加
|
# 如果已经有处理器,不重复添加
|
||||||
if logger.handlers:
|
if logger.handlers:
|
||||||
return logger
|
return logger
|
||||||
|
|
|
||||||
|
|
@ -31,20 +31,24 @@ def main():
|
||||||
port = int(os.environ.get('FLASK_PORT', 5001))
|
port = int(os.environ.get('FLASK_PORT', 5001))
|
||||||
debug = Config.DEBUG
|
debug = Config.DEBUG
|
||||||
|
|
||||||
|
# 只在 reloader 子进程中打印启动信息(避免 debug 模式下打印两次)
|
||||||
|
# WERKZEUG_RUN_MAIN=true 表示当前是 reloader 启动的子进程
|
||||||
|
is_reloader_process = os.environ.get('WERKZEUG_RUN_MAIN') == 'true'
|
||||||
|
if not debug or is_reloader_process:
|
||||||
print(f"""
|
print(f"""
|
||||||
╔══════════════════════════════════════════════════╗
|
╔══════════════════════════════════════════════════╗
|
||||||
║ MiroFish Backend Server ║
|
║ MiroFish Backend Server ║
|
||||||
╠══════════════════════════════════════════════════╣
|
╠══════════════════════════════════════════════════╣
|
||||||
║ Running on: http://{host}:{port}
|
║ Running on: http://{host}:{port} ║
|
||||||
║ Debug mode: {debug}
|
║ Debug mode: {debug} ║
|
||||||
║
|
║ ║
|
||||||
║ API Endpoints:
|
║ API Endpoints: ║
|
||||||
║ POST /api/graph/ontology/generate - 生成本体
|
║ POST /api/graph/ontology/generate - 生成本体 ║
|
||||||
║ POST /api/graph/build - 构建图谱
|
║ POST /api/graph/build - 构建图谱 ║
|
||||||
║ GET /api/graph/task/<task_id> - 查询任务
|
║ GET /api/graph/task/<task_id> - 查询任务 ║
|
||||||
║ GET /api/graph/tasks - 列出任务
|
║ GET /api/graph/tasks - 列出任务 ║
|
||||||
║ GET /api/graph/data/<graph_id> - 获取图数据
|
║ GET /api/graph/data/<graph_id> - 获取图数据║
|
||||||
║ DELETE /api/graph/delete/<graph_id>- 删除图谱
|
║ DELETE /api/graph/delete/<graph_id>- 删除图谱 ║
|
||||||
╚══════════════════════════════════════════════════╝
|
╚══════════════════════════════════════════════════╝
|
||||||
""")
|
""")
|
||||||
|
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue