Computer Use:截屏控制 / 浏览器自动化 / 桌面操作的完整实战与安全防护
第二十五章:Memory Tool:外部记忆存储与跨会话知识持久化
25.1 为什么需要 Memory Tool
Claude 的原生上下文窗口是有边界的。即便 Claude 3.7 Sonnet 支持 200K token 的超长上下文,单次对话仍然是一次性的——会话结束,所有信息随之消散。对于需要跨天、跨周、甚至跨月积累知识的 Agent 来说,这构成了根本性的限制。
Memory Tool 的核心价值在于将"短期工作记忆"转化为"持久长期记忆"。 它是一个明确定义的工具调用接口,允许 Claude 在执行任务时主动将关键信息写入外部存储,并在需要时检索回来。
与简单地把历史对话拼接进 prompt 不同,Memory Tool 支持:
- 结构化存储:按类别、标签、时间戳组织信息,而非线性堆叠
- 选择性检索:只取与当前任务相关的记忆片段,而非全量回放
- 跨会话持久化:信息在会话之间保留,不受上下文窗口限制
- 主动遗忘:可以显式删除过期或错误的记忆条目
记忆的三个层次
在设计 Memory Tool 之前,有必要理解 AI Agent 记忆的三个层次:
| 层次 | 存储位置 | 生命周期 | 典型内容 |
|---|---|---|---|
| 工作记忆 | 上下文窗口(In-Context) | 单次会话 | 当前对话历史、工具调用结果 |
| 情节记忆 | 外部数据库 | 数周至数月 | 用户偏好、历史决策、项目背景 |
| 语义记忆 | 向量数据库 | 长期 | 领域知识、事实信息、文档内容 |
Memory Tool 主要服务于情节记忆和语义记忆的存取。
25.2 Memory Tool 的标准定义
在 Anthropic 的工具调用(Tool Use)框架下,Memory Tool 被定义为一组标准工具。以下是完整的 JSON Schema 定义:
{
"name": "memory_store",
"description": "将重要信息存储到持久化记忆系统中。当你发现对未来有用的信息(用户偏好、项目进展、关键决策、重要事实)时调用此工具。",
"input_schema": {
"type": "object",
"properties": {
"content": {
"type": "string",
"description": "要存储的信息内容,应当简洁明确,包含足够的上下文使其可以独立理解"
},
"category": {
"type": "string",
"enum": ["user_preference", "project_context", "factual_knowledge", "decision_log", "relationship", "task_progress"],
"description": "记忆类别,用于后续检索时过滤"
},
"tags": {
"type": "array",
"items": {"type": "string"},
"description": "关键词标签列表,用于语义检索"
},
"importance": {
"type": "integer",
"minimum": 1,
"maximum": 5,
"description": "重要程度,1-5分,影响检索优先级和遗忘策略"
},
"expires_at": {
"type": "string",
"format": "date-time",
"description": "可选的过期时间,ISO 8601 格式。不设置则永久保留"
}
},
"required": ["content", "category", "importance"]
}
}
{
"name": "memory_retrieve",
"description": "从持久化记忆系统中检索相关信息。在开始复杂任务前、需要了解用户背景时调用此工具。",
"input_schema": {
"type": "object",
"properties": {
"query": {
"type": "string",
"description": "检索查询,用自然语言描述你想找什么"
},
"categories": {
"type": "array",
"items": {"type": "string"},
"description": "限定检索的类别范围,为空则检索所有类别"
},
"limit": {
"type": "integer",
"default": 10,
"description": "返回结果数量上限"
},
"min_importance": {
"type": "integer",
"minimum": 1,
"maximum": 5,
"default": 1,
"description": "最低重要程度过滤"
}
},
"required": ["query"]
}
}
{
"name": "memory_delete",
"description": "删除不再有效的记忆条目。当信息过期、发生矛盾或用户要求遗忘时调用。",
"input_schema": {
"type": "object",
"properties": {
"memory_id": {
"type": "string",
"description": "要删除的记忆条目 ID"
},
"reason": {
"type": "string",
"description": "删除原因,用于审计日志"
}
},
"required": ["memory_id", "reason"]
}
}
25.3 存储后端:向量数据库 vs 键值存储
Memory Tool 的威力来自于后端存储的设计。两种主流方案各有适用场景。
方案一:向量数据库(Vector Database)
向量数据库是语义记忆的理想后端。它将每条记忆转化为高维向量,通过余弦相似度实现语义检索,即便用词不同也能找到相关内容。
推荐选择:
- Qdrant:本地部署首选,Rust 编写,性能优异,支持过滤
- Chroma:Python 生态集成最佳,开发调试方便
- Pinecone:云托管,无运维负担,适合生产环境
- pgvector:PostgreSQL 扩展,适合已有 Postgres 的团队
import anthropic
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, PointStruct
from sentence_transformers import SentenceTransformer
import uuid
import json
from datetime import datetime
class VectorMemoryBackend:
"""基于 Qdrant 的向量记忆后端"""
def __init__(self, collection_name: str = "agent_memory"):
self.client = QdrantClient(host="localhost", port=6333)
self.encoder = SentenceTransformer("BAAI/bge-m3") # 中英文双语模型
self.collection = collection_name
self._ensure_collection()
def _ensure_collection(self):
collections = [c.name for c in self.client.get_collections().collections]
if self.collection not in collections:
self.client.create_collection(
collection_name=self.collection,
vectors_config=VectorParams(size=1024, distance=Distance.COSINE)
)
def store(self, content: str, category: str, tags: list[str],
importance: int, expires_at: str | None = None) -> str:
"""存储记忆,返回 memory_id"""
memory_id = str(uuid.uuid4())
vector = self.encoder.encode(content).tolist()
payload = {
"content": content,
"category": category,
"tags": tags,
"importance": importance,
"created_at": datetime.utcnow().isoformat(),
"expires_at": expires_at
}
self.client.upsert(
collection_name=self.collection,
points=[PointStruct(id=memory_id, vector=vector, payload=payload)]
)
return memory_id
def retrieve(self, query: str, categories: list[str] | None = None,
limit: int = 10, min_importance: int = 1) -> list[dict]:
"""语义检索记忆"""
query_vector = self.encoder.encode(query).tolist()
# 构建过滤条件
filter_conditions = []
if categories:
filter_conditions.append({
"key": "category",
"match": {"any": categories}
})
if min_importance > 1:
filter_conditions.append({
"key": "importance",
"range": {"gte": min_importance}
})
query_filter = None
if filter_conditions:
query_filter = {"must": filter_conditions}
results = self.client.search(
collection_name=self.collection,
query_vector=query_vector,
query_filter=query_filter,
limit=limit,
with_payload=True
)
return [
{
"id": str(r.id),
"content": r.payload["content"],
"category": r.payload["category"],
"importance": r.payload["importance"],
"score": r.score,
"created_at": r.payload["created_at"]
}
for r in results
if not r.payload.get("expires_at") or
r.payload["expires_at"] > datetime.utcnow().isoformat()
]
def delete(self, memory_id: str, reason: str):
"""删除记忆条目"""
self.client.delete(
collection_name=self.collection,
points_selector={"points": [memory_id]}
)
# 记录审计日志
print(f"[Memory Audit] Deleted {memory_id}: {reason}")
方案二:键值存储(Key-Value Store)
对于结构化程度高、需要精确匹配的记忆(如用户设置、项目元数据),键值存储更合适。
import sqlite3
import json
from datetime import datetime
class KVMemoryBackend:
"""基于 SQLite 的键值记忆后端"""
def __init__(self, db_path: str = "memory.db"):
self.conn = sqlite3.connect(db_path, check_same_thread=False)
self._init_schema()
def _init_schema(self):
self.conn.execute("""
CREATE TABLE IF NOT EXISTS memories (
id TEXT PRIMARY KEY,
content TEXT NOT NULL,
category TEXT NOT NULL,
tags TEXT, -- JSON array
importance INTEGER DEFAULT 3,
created_at TEXT NOT NULL,
expires_at TEXT,
deleted_at TEXT
)
""")
self.conn.execute(
"CREATE INDEX IF NOT EXISTS idx_category ON memories(category)"
)
self.conn.execute(
"CREATE INDEX IF NOT EXISTS idx_importance ON memories(importance)"
)
self.conn.commit()
def store(self, content: str, category: str, tags: list[str],
importance: int, expires_at: str | None = None) -> str:
memory_id = str(uuid.uuid4())
self.conn.execute(
"""INSERT INTO memories (id, content, category, tags, importance,
created_at, expires_at) VALUES (?, ?, ?, ?, ?, ?, ?)""",
(memory_id, content, category, json.dumps(tags), importance,
datetime.utcnow().isoformat(), expires_at)
)
self.conn.commit()
return memory_id
def retrieve_by_category(self, category: str, limit: int = 20) -> list[dict]:
cursor = self.conn.execute(
"""SELECT id, content, category, importance, created_at
FROM memories
WHERE category = ? AND deleted_at IS NULL
AND (expires_at IS NULL OR expires_at > ?)
ORDER BY importance DESC, created_at DESC
LIMIT ?""",
(category, datetime.utcnow().isoformat(), limit)
)
return [dict(zip([d[0] for d in cursor.description], row))
for row in cursor.fetchall()]
25.4 与 Claude API 集成:完整实现
以下是将 Memory Tool 与 Claude API 集成的完整示例,展示了 Agent 如何在对话中自主决策何时存储和检索记忆:
import anthropic
import json
class MemoryEnabledAgent:
"""带有持久化记忆能力的 Claude Agent"""
def __init__(self, user_id: str):
self.client = anthropic.Anthropic()
self.memory = VectorMemoryBackend(f"memory_{user_id}")
self.user_id = user_id
self.tools = self._define_tools()
def _define_tools(self) -> list[dict]:
return [
{
"name": "memory_store",
"description": "将重要信息存储到持久化记忆中",
"input_schema": {
"type": "object",
"properties": {
"content": {"type": "string"},
"category": {
"type": "string",
"enum": ["user_preference", "project_context",
"factual_knowledge", "decision_log",
"relationship", "task_progress"]
},
"tags": {"type": "array", "items": {"type": "string"}},
"importance": {"type": "integer", "minimum": 1, "maximum": 5}
},
"required": ["content", "category", "importance"]
}
},
{
"name": "memory_retrieve",
"description": "从持久化记忆中检索相关信息",
"input_schema": {
"type": "object",
"properties": {
"query": {"type": "string"},
"categories": {"type": "array", "items": {"type": "string"}},
"limit": {"type": "integer", "default": 5}
},
"required": ["query"]
}
},
{
"name": "memory_delete",
"description": "删除不再有效的记忆条目",
"input_schema": {
"type": "object",
"properties": {
"memory_id": {"type": "string"},
"reason": {"type": "string"}
},
"required": ["memory_id", "reason"]
}
}
]
def _execute_tool(self, tool_name: str, tool_input: dict) -> str:
if tool_name == "memory_store":
memory_id = self.memory.store(
content=tool_input["content"],
category=tool_input["category"],
tags=tool_input.get("tags", []),
importance=tool_input["importance"],
expires_at=tool_input.get("expires_at")
)
return json.dumps({"success": True, "memory_id": memory_id})
elif tool_name == "memory_retrieve":
results = self.memory.retrieve(
query=tool_input["query"],
categories=tool_input.get("categories"),
limit=tool_input.get("limit", 5)
)
return json.dumps({"memories": results})
elif tool_name == "memory_delete":
self.memory.delete(
memory_id=tool_input["memory_id"],
reason=tool_input["reason"]
)
return json.dumps({"success": True})
return json.dumps({"error": f"Unknown tool: {tool_name}"})
def chat(self, user_message: str) -> str:
"""处理一轮对话,支持自动记忆管理"""
messages = [{"role": "user", "content": user_message}]
system_prompt = """你是一个有持久记忆能力的助手。
在每次对话开始时,你应当:
1. 先用 memory_retrieve 检索与当前请求相关的历史记忆
2. 结合检索到的记忆和当前对话内容进行回复
3. 在对话中发现重要信息时,用 memory_store 保存
4. 发现矛盾或过期信息时,用 memory_delete 清理
记忆存储的原则:
- 用户的明确偏好(importance: 5)
- 长期项目的关键决策(importance: 4)
- 有用但不紧急的背景信息(importance: 3)
- 临时性信息不需要存储"""
while True:
response = self.client.messages.create(
model="claude-opus-4-5",
max_tokens=2048,
system=system_prompt,
tools=self.tools,
messages=messages
)
# 处理工具调用
if response.stop_reason == "tool_use":
tool_results = []
for block in response.content:
if block.type == "tool_use":
result = self._execute_tool(block.name, block.input)
tool_results.append({
"type": "tool_result",
"tool_use_id": block.id,
"content": result
})
# 将工具结果加入对话
messages.append({"role": "assistant", "content": response.content})
messages.append({"role": "user", "content": tool_results})
continue
# 提取最终文本回复
for block in response.content:
if hasattr(block, "text"):
return block.text
return ""
# 使用示例
agent = MemoryEnabledAgent(user_id="user_001")
# 第一次对话
response1 = agent.chat("我是一个 Python 开发者,正在用 FastAPI 构建一个微服务项目,偏好使用 async/await 风格")
print(response1)
# 第二次对话(新会话,但记忆持久化)
response2 = agent.chat("帮我写一个 HTTP 客户端封装")
# Agent 会先检索记忆,发现用户偏好 FastAPI + async/await,生成对应风格的代码
print(response2)
25.5 检索策略的设计
记忆的价值在于"找得到"。以下是几种关键的检索策略:
策略一:对话开始时的主动预取
def _prefetch_relevant_memories(self, user_message: str) -> str:
"""在系统提示中注入预取的相关记忆"""
memories = self.memory.retrieve(
query=user_message,
limit=5,
min_importance=3
)
if not memories:
return ""
memory_text = "\n".join([
f"- [{m['category']}] {m['content']} (重要程度: {m['importance']})"
for m in memories
])
return f"\n\n## 相关历史记忆\n{memory_text}"
策略二:重要程度衰减
记忆的重要程度应随时间衰减,避免陈旧信息干扰决策:
import math
from datetime import datetime
def compute_effective_importance(importance: int, created_at: str,
half_life_days: float = 30.0) -> float:
"""计算考虑时间衰减的有效重要程度"""
days_elapsed = (datetime.utcnow() - datetime.fromisoformat(created_at)).days
decay_factor = math.exp(-0.693 * days_elapsed / half_life_days)
return importance * decay_factor
策略三:矛盾检测与合并
def store_with_conflict_check(self, content: str, category: str,
importance: int) -> str:
"""存储前检查是否有矛盾的旧记忆"""
existing = self.memory.retrieve(
query=content,
categories=[category],
limit=3
)
# 相似度超过阈值则认为是同一类信息的更新
if existing and existing[0]["score"] > 0.92:
self.memory.delete(
memory_id=existing[0]["id"],
reason=f"被更新记忆替代: {content[:50]}"
)
return self.memory.store(content, category, [], importance)
25.6 生产环境最佳实践
记忆容量管理
向量数据库并非无限容量。需要设计遗忘策略:
def prune_memories(self, max_entries: int = 10000):
"""清理低重要程度的旧记忆,控制存储规模"""
# 按有效重要程度排序,删除末尾条目
all_memories = self.memory.retrieve(query="*", limit=max_entries + 1000)
scored = [
(m, compute_effective_importance(m["importance"], m["created_at"]))
for m in all_memories
]
scored.sort(key=lambda x: x[1])
# 删除最低分的条目直到达到目标数量
to_delete = max(0, len(scored) - max_entries)
for memory, _ in scored[:to_delete]:
self.memory.delete(memory["id"], reason="容量管理自动清理")
隐私与安全
- 每个用户的记忆应当严格隔离,使用用户 ID 作为命名空间
- 敏感信息(密码、密钥)绝对不应存入记忆系统
- 提供用户自主删除全部记忆的接口(GDPR 合规)
- 记忆内容应当加密存储,避免数据库泄露风险
性能优化
- 检索操作应当异步执行,不阻塞主对话流程
- 对高频检索的类别建立专属索引
- 利用 Qdrant 的过滤器在向量搜索时同步过滤,避免后处理
小结
Memory Tool 是 Agent 从"单次对话助手"进化为"长期知识伙伴"的关键机制。通过标准化的工具接口、语义向量存储、以及精心设计的检索策略,Agent 可以在数周乃至数月的时间跨度内保持对用户上下文的深刻理解。
核心要点:
- 使用标准 JSON Schema 定义三类工具:store、retrieve、delete
- 向量数据库适合语义检索,键值存储适合结构化精确匹配
- 检索策略决定记忆的实际价值:主动预取、重要程度衰减、矛盾检测
- 生产环境需要处理容量管理、用户隔离、加密存储等工程问题
下一章将探讨如何在单次会话中通过 Context Editing 精确控制 Claude 所接收到的信息。