第 60 章

Google Vertex AI 集成：多区域端点 / 数据驻留 / 与直接 API 的功能差异

第六十章：与 LlamaIndex 集成：文档智能与企业知识库

60.1 LlamaIndex 的定位：文档智能的基础设施

LlamaIndex（原 GPT Index）是专注于数据连接与检索的 LLM 应用框架。如果说 LangChain 是"LLM 应用的瑞士军刀"，那么 LlamaIndex 就是"文档智能的专业工具箱"。其核心价值在于：

统一的数据连接器：支持 100+ 数据源（PDF、Word、Excel、Confluence、Notion、数据库等）
高性能索引引擎：向量索引、关键词索引、知识图谱索引，可组合使用
查询引擎：将自然语言查询转化为结构化检索，再由 LLM 生成答案
Agent 框架：基于检索的 ReAct Agent

将 Claude 作为 LlamaIndex 的 LLM 后端，可以构建出理解能力强、幻觉少、能处理超长上下文的企业知识库系统。

60.2 环境配置

pip install llama-index llama-index-llms-anthropic llama-index-embeddings-voyageai
pip install llama-index-readers-file  # 文件读取器
pip install llama-index-vector-stores-chroma  # 向量存储（可选）

60.2.1 配置 Claude 为 LlamaIndex 的 LLM

from llama_index.llms.anthropic import Anthropic
from llama_index.core import Settings

# 设置全局 LLM
llm = Anthropic(
    model="claude-opus-4-5",
    api_key="your-api-key",  # 或 ANTHROPIC_API_KEY 环境变量
    max_tokens=4096,
    temperature=0,
)
Settings.llm = llm

# 配置 Embedding 模型（推荐使用 Voyage AI，Anthropic 旗下产品）
from llama_index.embeddings.voyageai import VoyageEmbedding
embed_model = VoyageEmbedding(
    model_name="voyage-3",
    voyage_api_key="your-voyage-key"
)
Settings.embed_model = embed_model

# 配置 Chunk 大小（Claude 有 200K 上下文，可以设得更大）
Settings.chunk_size = 1024
Settings.chunk_overlap = 128

60.3 构建基础文档索引

60.3.1 从本地文件构建索引

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import StorageContext, load_index_from_storage
import os

# 读取文档目录
documents = SimpleDirectoryReader(
    input_dir="./company_docs",
    recursive=True,
    required_exts=[".pdf", ".docx", ".md", ".txt"],
    filename_as_id=True  # 使用文件名作为文档 ID
).load_data()

print(f"加载了 {len(documents)} 个文档块")

# 构建向量索引
index = VectorStoreIndex.from_documents(
    documents,
    show_progress=True
)

# 持久化到磁盘
index.storage_context.persist(persist_dir="./index_storage")
print("索引已保存")

# 下次直接加载，无需重新构建
if os.path.exists("./index_storage"):
    storage_context = StorageContext.from_defaults(persist_dir="./index_storage")
    index = load_index_from_storage(storage_context)

60.3.2 从多种数据源加载文档

from llama_index.core import Document
from llama_index.readers.file import PDFReader, DocxReader
from llama_index.core.node_parser import SentenceSplitter

# PDF 读取器（带元数据提取）
pdf_reader = PDFReader()
pdf_docs = pdf_reader.load_data("./reports/annual_report_2024.pdf")

# 自定义 Document（API 返回的数据）
api_docs = [
    Document(
        text=article["content"],
        metadata={
            "source": "company_wiki",
            "author": article["author"],
            "created_at": article["created_at"],
            "department": article["department"],
            "doc_type": "policy"
        }
    )
    for article in wiki_api.get_articles()
]

# 合并所有文档
all_docs = pdf_docs + api_docs

# 自定义分割策略（按句子边界分割，保持语义完整性）
splitter = SentenceSplitter(
    chunk_size=1024,
    chunk_overlap=128,
    paragraph_separator="\n\n"
)

# 构建索引时使用自定义分割器
index = VectorStoreIndex.from_documents(
    all_docs,
    transformations=[splitter],
    show_progress=True
)

60.4 查询引擎：从文档中提取知识

60.4.1 基础查询引擎

from llama_index.core import VectorStoreIndex
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import VectorIndexRetriever

# 创建检索器（top-k = 5，检索最相关的 5 个文档块）
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=5
)

# 创建查询引擎
query_engine = index.as_query_engine(
    similarity_top_k=5,
    response_mode="tree_summarize",  # 适合长文档的分层摘要模式
    verbose=True
)

# 执行查询
response = query_engine.query("公司的差旅报销标准是什么？住宿费上限是多少？")
print(response.response)

# 查看来源文档
for node in response.source_nodes:
    print(f"\n来源: {node.metadata.get('file_name', '未知')}")
    print(f"相关度: {node.score:.3f}")
    print(f"内容片段: {node.text[:200]}...")

60.4.2 高级查询模式

from llama_index.core.query_engine import SubQuestionQueryEngine
from llama_index.core.tools import QueryEngineTool

# 为不同类型的文档创建独立索引
policy_index = VectorStoreIndex.from_documents(policy_docs)
technical_index = VectorStoreIndex.from_documents(technical_docs)
hr_index = VectorStoreIndex.from_documents(hr_docs)

# 将每个索引包装成工具
tools = [
    QueryEngineTool.from_defaults(
        query_engine=policy_index.as_query_engine(),
        name="policy_search",
        description="搜索公司政策和规章制度，包括报销、考勤、采购等"
    ),
    QueryEngineTool.from_defaults(
        query_engine=technical_index.as_query_engine(),
        name="technical_docs",
        description="搜索技术文档、API 文档、架构设计文档"
    ),
    QueryEngineTool.from_defaults(
        query_engine=hr_index.as_query_engine(),
        name="hr_knowledge",
        description="搜索人力资源相关文档，包括福利、假期、晋升流程"
    )
]

# SubQuestion 查询引擎：自动将复杂问题拆解为子问题
sub_question_engine = SubQuestionQueryEngine.from_defaults(
    query_engine_tools=tools,
    verbose=True
)

# 复杂问题会被自动拆解
response = sub_question_engine.query(
    "我想了解公司的出差政策：住宿标准、交通报销规则，以及出差期间的加班费如何计算？"
)
print(response.response)

60.4.3 流式响应

from llama_index.core import VectorStoreIndex

query_engine = index.as_query_engine(streaming=True)

streaming_response = query_engine.query("总结公司2024年的主要产品里程碑")

# 流式输出
for text in streaming_response.response_gen:
    print(text, end="", flush=True)
print()  # 换行

# 完成后获取来源
for node in streaming_response.source_nodes:
    print(f"来源: {node.metadata.get('source')}")

60.5 构建企业知识库 Agent

60.5.1 基于 ReAct 的知识库 Agent

from llama_index.core.agent import ReActAgent
from llama_index.core.tools import QueryEngineTool, FunctionTool
from llama_index.llms.anthropic import Anthropic

llm = Anthropic(model="claude-opus-4-5", max_tokens=4096)

# 知识库检索工具
kb_tool = QueryEngineTool.from_defaults(
    query_engine=query_engine,
    name="knowledge_base",
    description="""搜索公司内部知识库。适合以下类型的问题：
    - 公司政策查询（报销、考勤、假期）
    - 产品文档和技术规范
    - 历史项目信息
    - 组织结构和联系人
    不适合：需要实时数据的问题（请使用其他工具）"""
)

# 自定义函数工具：获取实时数据
def get_employee_info(employee_id: str) -> str:
    """从 HR 系统获取员工实时信息（工号、部门、直属上司）"""
    # 调用真实的 HR API
    return f"员工 {employee_id}: 姓名=张三, 部门=技术研发部, 直属上司=李四"

employee_tool = FunctionTool.from_defaults(
    fn=get_employee_info,
    name="get_employee_info",
    description="根据员工工号查询员工信息，返回姓名、部门、直属上司等"
)

def get_current_projects() -> str:
    """获取当前在进行中的项目列表"""
    return "进行中的项目：\n1. 智能客服系统（Q2 2025）\n2. 数据中台建设（Q3 2025）"

projects_tool = FunctionTool.from_defaults(
    fn=get_current_projects,
    name="get_current_projects",
    description="获取公司当前进行中的项目列表"
)

# 创建 ReAct Agent
agent = ReActAgent.from_tools(
    tools=[kb_tool, employee_tool, projects_tool],
    llm=llm,
    verbose=True,
    max_iterations=8,
    context="""你是公司的智能知识助手 Aria。
你的职责是帮助员工找到需要的信息，包括公司政策、技术文档、人员信息等。
回答时要准确引用来源，对不确定的信息要明确说明。"""
)

# 测试 Agent
response = agent.chat("新入职员工工号 E12345 的报销额度是多少？他属于哪个项目？")
print(response.response)

60.5.2 多模态知识库（处理图片和表格）

from llama_index.core.multi_modal_llms.anthropic import AnthropicMultiModal
from llama_index.core.schema import ImageDocument
from llama_index.core import SimpleDirectoryReader

# 多模态 LLM 配置
mm_llm = AnthropicMultiModal(
    model="claude-opus-4-5",
    max_new_tokens=1024
)

# 加载包含图片的文档（流程图、架构图等）
image_docs = SimpleDirectoryReader(
    input_dir="./diagrams",
    required_exts=[".png", ".jpg", ".jpeg"]
).load_data()

# 使用 Claude 的视觉能力为图片生成文字描述，加入索引
text_descriptions = []
for img_doc in image_docs:
    # Claude 理解图片内容
    description = mm_llm.complete(
        prompt="请详细描述这张图片的内容，特别是图中的流程步骤、组件关系或数据信息。",
        image_documents=[img_doc]
    )
    
    text_descriptions.append(Document(
        text=description.text,
        metadata={
            "source_image": img_doc.metadata.get("file_name"),
            "doc_type": "image_description"
        }
    ))

# 将图片描述加入知识库索引
all_docs = text_docs + text_descriptions
index = VectorStoreIndex.from_documents(all_docs)

60.6 增量索引更新

企业知识库的内容是动态变化的，需要支持增量更新而非每次全量重建。

from llama_index.core import VectorStoreIndex
from llama_index.core.storage.docstore import SimpleDocumentStore
from llama_index.core.ingestion import IngestionPipeline
from llama_index.core.node_parser import SentenceSplitter
from llama_index.embeddings.voyageai import VoyageEmbedding

embed_model = VoyageEmbedding(model_name="voyage-3")

# 构建 Ingestion Pipeline（可复用的文档处理流水线）
pipeline = IngestionPipeline(
    transformations=[
        SentenceSplitter(chunk_size=1024, chunk_overlap=128),
        embed_model  # 在 pipeline 中计算 embedding
    ],
    docstore=SimpleDocumentStore(),  # 追踪已处理的文档
)

# 全量初始化
nodes = pipeline.run(documents=initial_docs, show_progress=True)
index = VectorStoreIndex(nodes)
index.storage_context.persist(persist_dir="./index_storage")
pipeline.persist("./pipeline_storage")

# 增量更新（只处理新增或修改的文档）
def update_knowledge_base(new_or_modified_docs: list):
    """增量更新知识库，自动跳过未修改的文档"""
    # 加载已有 pipeline（包含 docstore 历史）
    pipeline = IngestionPipeline.from_persist_dir("./pipeline_storage")
    
    # run() 会自动检测重复文档（通过 doc_id + hash）
    new_nodes = pipeline.run(documents=new_or_modified_docs)
    
    if new_nodes:
        # 加载已有索引
        storage_context = StorageContext.from_defaults(persist_dir="./index_storage")
        index = load_index_from_storage(storage_context)
        
        # 插入新节点
        for node in new_nodes:
            index.insert_nodes([node])
        
        # 保存更新后的索引
        index.storage_context.persist(persist_dir="./index_storage")
        pipeline.persist("./pipeline_storage")
        
        print(f"已更新 {len(new_nodes)} 个新节点")
    else:
        print("没有检测到文档变化")

# 定期调用增量更新
update_knowledge_base(fetch_modified_docs_since_last_update())

60.7 检索质量评估

from llama_index.core.evaluation import (
    FaithfulnessEvaluator,
    RelevancyEvaluator,
    CorrectnessEvaluator
)
from llama_index.llms.anthropic import Anthropic

eval_llm = Anthropic(model="claude-opus-4-5")

# 忠实度评估：答案是否基于检索到的上下文（防止幻觉）
faithfulness_evaluator = FaithfulnessEvaluator(llm=eval_llm)

# 相关性评估：检索到的文档是否与问题相关
relevancy_evaluator = RelevancyEvaluator(llm=eval_llm)

# 正确性评估：需要参考答案
correctness_evaluator = CorrectnessEvaluator(llm=eval_llm)

# 评估一批查询
test_questions = [
    "员工年假天数是多少？",
    "如何申请差旅报销？",
    "试用期员工的福利待遇是什么？"
]

for question in test_questions:
    response = query_engine.query(question)
    
    # 忠实度评估
    faithfulness_result = faithfulness_evaluator.evaluate_response(response=response)
    
    # 相关性评估
    relevancy_result = relevancy_evaluator.evaluate_response(
        query=question, response=response
    )
    
    print(f"\n问题: {question}")
    print(f"忠实度: {faithfulness_result.score:.2f} ({faithfulness_result.feedback})")
    print(f"相关性: {relevancy_result.score:.2f}")

60.8 生产化部署建议

60.8.1 使用 Chroma 或 Qdrant 作为生产向量数据库

from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext, VectorStoreIndex
import chromadb

# 连接 Chroma 服务器
chroma_client = chromadb.HttpClient(host="localhost", port=8000)
chroma_collection = chroma_client.get_or_create_collection("company_knowledge")

# 创建 LlamaIndex 向量存储适配器
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# 构建索引（数据存储在 Chroma 中）
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
    show_progress=True
)

60.8.2 缓存策略

from llama_index.core.storage.index_store import RedisIndexStore
from llama_index.core.storage.docstore import RedisDocumentStore

# 使用 Redis 缓存索引存储（避免重复加载大索引）
storage_context = StorageContext.from_defaults(
    index_store=RedisIndexStore.from_host_and_port(host="localhost", port=6379),
    docstore=RedisDocumentStore.from_host_and_port(host="localhost", port=6379)
)

小结

LlamaIndex 与 Claude 的组合在企业知识库场景中具有显著优势：Claude 的 200K token 上下文窗口允许一次性处理超长文档，而 LlamaIndex 提供了索引构建、增量更新、多源数据融合的完整工具链。核心架构是：多源数据加载 → SentenceSplitter 分块 → Voyage Embedding → 向量存储 → 查询引擎 → Claude 生成答案。生产部署中，增量更新 Pipeline 和向量数据库（Chroma/Qdrant）是保持知识库实时性的关键，而 FaithfulnessEvaluator 是控制幻觉风险的重要工具。

本章评分

4.6 / 5 (3 评分)