MiroFish/txt2graph/ontology.py
666ghj 9657061b26 Add initial implementation of txt2graph tool for knowledge graph generation
- Created a new Streamlit application for visualizing knowledge graphs.
- Implemented text extraction from PDF, Markdown, and TXT files.
- Developed graph building logic using Zep Cloud API.
- Added support for custom entity types and relationships.
- Included interactive HTML visualization for generated graphs.
- Updated .gitignore to include new directories and files.
- Added example environment configuration file (.env.example) for API key setup.
- Created README.md with installation and usage instructions.
- Introduced various utility scripts and styles for enhanced functionality.
2025-11-28 14:07:42 +08:00

163 lines
4 KiB
Python

"""
自定义实体类型定义
只提取现实生活中真实存在的、可以有行动的实体
"""
from pydantic import Field
from zep_cloud.external_clients.ontology import EntityModel, EntityText, EdgeModel
# ============== 实体类型定义 ==============
class Person(EntityModel):
"""A real, named individual. Must have a specific name like "马化腾" or "Elon Musk". NOT generic terms like "某人", "用户", pronouns, or abstract roles."""
role: EntityText = Field(
description="职业或职位",
default=None
)
affiliation: EntityText = Field(
description="所属组织",
default=None
)
class Organization(EntityModel):
"""A real organization with an official name like "武汉大学", "联合国". NOT generic terms like "大学", "政府", "组织"."""
org_type: EntityText = Field(
description="组织类型",
default=None
)
location: EntityText = Field(
description="所在地",
default=None
)
class Company(EntityModel):
"""A real company with registered name like "腾讯", "Apple Inc.". NOT generic terms like "科技公司", "某企业"."""
industry: EntityText = Field(
description="所属行业",
default=None
)
headquarters: EntityText = Field(
description="总部位置",
default=None
)
class Location(EntityModel):
"""A real geographic location like "北京", "硅谷", "故宫". NOT generic terms like "某地", "一个城市"."""
location_type: EntityText = Field(
description="地点类型",
default=None
)
country: EntityText = Field(
description="所属国家",
default=None
)
class Product(EntityModel):
"""A real product/service with brand name like "iPhone", "微信". NOT generic categories like "手机", "软件"."""
category: EntityText = Field(
description="产品类别",
default=None
)
manufacturer: EntityText = Field(
description="制造商",
default=None
)
class Event(EntityModel):
"""A real, documented event like "2024巴黎奥运会", "新冠疫情". NOT generic terms like "会议", "活动"."""
event_type: EntityText = Field(
description="事件类型",
default=None
)
date: EntityText = Field(
description="发生日期",
default=None
)
class Media(EntityModel):
"""A real media outlet like "人民日报", "CNN", "微博". NOT generic terms like "媒体", "新闻"."""
media_type: EntityText = Field(
description="媒体类型",
default=None
)
# ============== 边类型定义 ==============
class WorksFor(EdgeModel):
"""Employment or affiliation relationship."""
position: EntityText = Field(
description="职位",
default=None
)
class LocatedIn(EdgeModel):
"""Geographic location relationship."""
pass
class PartOf(EdgeModel):
"""Part-of or subsidiary relationship."""
pass
class Produces(EdgeModel):
"""Production or creation relationship."""
pass
class ParticipatesIn(EdgeModel):
"""Participation in an event."""
role: EntityText = Field(
description="参与角色",
default=None
)
class Collaborates(EdgeModel):
"""Collaboration or partnership relationship."""
pass
class Competes(EdgeModel):
"""Competitive relationship."""
pass
class Reports(EdgeModel):
"""Media reporting or coverage relationship."""
pass
# ============== 本体配置 ==============
# 实体类型字典
ENTITY_TYPES = {
"Person": Person,
"Organization": Organization,
"Company": Company,
"Location": Location,
"Product": Product,
"Event": Event,
"Media": Media,
}
# 边类型字典
EDGE_TYPES = {
"WORKS_FOR": WorksFor,
"LOCATED_IN": LocatedIn,
"PART_OF": PartOf,
"PRODUCES": Produces,
"PARTICIPATES_IN": ParticipatesIn,
"COLLABORATES": Collaborates,
"COMPETES": Competes,
"REPORTS": Reports,
}