Case Study 1: Enterprise Code Review System (Complete Architecture with Plugin + Hook + LSP + CI/CD)
Chapter 78: Building an AI Customer Service System: Intent Recognition, Knowledge Base Q&A, and Human Fallback
78.1 System Architecture Overview
An AI customer service system is one of the most mature and highest-ROI deployment scenarios for Claude. A complete system is not a single "chatbot" but an intelligent pipeline composed of cooperating modules:
User message input
โ
[Pre-processing] Language detection โ Message cleaning โ Conversation context assembly
โ
[Intent Recognition] Intent classification โ Entity extraction โ Confidence evaluation
โ
[Routing] Rule-based routing / model-based routing โ Determines processing path
โ โ โ
[FAQ answer] [KB Q&A] [Human escalation]
โ โ โ
[Post-processing] Response formatting โ Brand voice check โ Send
โ
[Feedback loop] User rating โ Logging โ Continuous optimization
This chapter systematically deconstructs each layer of this architecture with production-ready code.
78.2 Intent Recognition
78.2.1 Designing the Intent Taxonomy
Intent classification is the routing core of the entire system. Design considerations:
- Granularity: Intents cannot be too coarse (a single "inquiry" intent can't distinguish product types) or too fine (insufficient coverage)
- Hierarchy: Use a tree structure โ broad categories first, then subcategories
- Confidence thresholds: Each intent needs a minimum confidence level before triggering automated handling
from anthropic import Anthropic
import json
client = Anthropic()
INTENT_TAXONOMY = {
"account_issues": {
"description": "Account-related problems",
"subtypes": {
"login_problem": "Login/password issues",
"account_verification": "Account verification",
"account_suspension": "Account ban/restriction",
"account_deletion": "Account deletion request"
},
"auto_handle_threshold": 0.85
},
"billing": {
"description": "Billing/payment issues",
"subtypes": {
"payment_failed": "Payment failure",
"refund_request": "Refund request",
"invoice_request": "Invoice request",
"subscription_inquiry": "Subscription inquiry"
},
"auto_handle_threshold": 0.80 # billing requires higher confidence
},
"product_inquiry": {
"description": "Product feature questions",
"subtypes": {
"feature_question": "Feature usage questions",
"pricing_inquiry": "Pricing inquiry",
"compatibility": "Compatibility issues",
"feature_request": "Feature suggestions"
},
"auto_handle_threshold": 0.75
},
"technical_support": {
"description": "Technical problem reports",
"subtypes": {
"bug_report": "Bug report",
"performance_issue": "Performance problem",
"integration_help": "Integration assistance",
"api_issue": "API problem"
},
"auto_handle_threshold": 0.70
},
"complaint": {
"description": "Complaints/dissatisfaction",
"subtypes": {
"service_complaint": "Service complaint",
"product_complaint": "Product complaint",
"escalation_request": "Requesting human agent"
},
"auto_handle_threshold": 0.0 # complaints always go to human
}
}
INTENT_CLASSIFIER_SYSTEM = f"""You are a precise customer service intent classifier.
Intent taxonomy:
{json.dumps(INTENT_TAXONOMY, indent=2)}
Classification rules:
1. Identify the primary and secondary intents
2. If the message contains multiple intents, rank by importance (maximum 2)
3. Extract key entities (product names, order IDs, amounts, etc.)
4. Assess the user's emotional state: neutral/frustrated/urgent/angry
Output JSON format:
{{
"primary_intent": "billing/refund_request",
"secondary_intent": null,
"entities": {{"order_id": "ORD-12345", "amount": "$29.99"}},
"emotion": "frustrated",
"confidence": 0.92,
"requires_context": false,
"raw_issue": "User wants a refund"
}}"""
def classify_intent(user_message: str, conversation_history: list = None) -> dict:
messages = []
if conversation_history:
history_text = "\n".join([
f"User: {turn['user']}\nAgent: {turn['assistant']}"
for turn in conversation_history[-3:]
])
messages.append({
"role": "user",
"content": f"Conversation history:\n{history_text}\n\nCurrent message: {user_message}"
})
else:
messages.append({"role": "user", "content": user_message})
response = client.messages.create(
model="claude-haiku-4-5",
max_tokens=300,
system=INTENT_CLASSIFIER_SYSTEM,
messages=messages
)
try:
return json.loads(response.content[0].text)
except json.JSONDecodeError:
return {"primary_intent": "unknown", "confidence": 0.0, "error": "Classification failed"}
78.2.2 Emotion Detection and Escalation Triggers
ESCALATION_TRIGGERS = {
"explicit_request": ["human agent", "real person", "speak to someone", "transfer me"],
"emotion_threshold": "angry",
"intent_always_escalate": ["complaint", "escalation_request"],
"failed_attempts": 3,
"high_risk_keywords": ["lawyer", "legal action", "BBB complaint", "news", "refund denied"]
}
def should_escalate(intent_result, conversation_history, failed_attempts) -> dict:
reasons = []
primary = intent_result.get("primary_intent", "")
for always_escalate in ESCALATION_TRIGGERS["intent_always_escalate"]:
if always_escalate in primary:
reasons.append(f"Intent type requires human: {always_escalate}")
if intent_result.get("emotion") == "angry":
reasons.append("User is emotionally distressed")
confidence = intent_result.get("confidence", 0)
intent_category = primary.split("/")[0] if "/" in primary else primary
threshold = INTENT_TAXONOMY.get(intent_category, {}).get("auto_handle_threshold", 0.8)
if confidence < threshold:
reasons.append(f"Confidence {confidence:.2f} below threshold {threshold}")
if failed_attempts >= ESCALATION_TRIGGERS["failed_attempts"]:
reasons.append(f"Unresolved after {failed_attempts} attempts")
latest_message = ""
if conversation_history:
latest_message = conversation_history[-1].get("user", "").lower()
for keyword in ESCALATION_TRIGGERS["high_risk_keywords"]:
if keyword.lower() in latest_message:
reasons.append(f"High-risk keyword detected: {keyword}")
return {
"should_escalate": len(reasons) > 0,
"reasons": reasons,
"priority": "high" if any("distressed" in r or "high-risk" in r for r in reasons) else "normal"
}
78.3 Knowledge Base Q&A
78.3.1 Knowledge Base Architecture
An AI customer service knowledge base typically consists of three content types: FAQ (question-answer pairs, updated weekly), product documentation (updated per release), policy documents (updated monthly), and case history (continuously updated successful resolution cases).
78.3.2 Retrieval-Augmented Q&A
def answer_with_knowledge_base(user_query, knowledge_base, company_name, intent_result) -> dict:
# Retrieve relevant knowledge
relevant_docs = knowledge_base.search(user_query, top_k=5)
if not relevant_docs:
return {
"answer": None,
"confidence": 0.0,
"source": "no_knowledge",
"should_escalate": True
}
context_text = "\n\n".join([
f"[{doc['type'].upper()}] {doc['content']}"
for doc in relevant_docs
])
qa_system = f"""You are a professional customer service representative for {company_name}.
Answer rules:
1. Answer ONLY based on the provided knowledge base content
2. If the knowledge base has no relevant information, clearly state you cannot answer and recommend human support
3. Do NOT fabricate information or make commitments beyond what the knowledge base says
4. Maintain a professional, warm, and helpful tone
5. When citing policies, reference the specific clause
User's current emotional state: {intent_result.get('emotion', 'neutral')}
(If the user is emotionally negative, acknowledge their feelings before addressing the issue)"""
qa_prompt = f"""User question: {user_query}
Relevant knowledge base content:
{context_text}
Answer the user's question based on the knowledge base. If the content is insufficient, say so directly."""
response = client.messages.create(
model="claude-opus-4-5",
max_tokens=500,
system=qa_system,
messages=[{"role": "user", "content": qa_prompt}]
)
answer = response.content[0].text
has_answer = not any(phrase in answer.lower() for phrase in
["cannot answer", "don't have information", "please contact"])
return {
"answer": answer,
"confidence": 0.85 if has_answer else 0.2,
"source": "knowledge_base",
"referenced_docs": [doc["type"] for doc in relevant_docs[:3]],
"should_escalate": not has_answer
}
78.4 Conversation Flow Management
78.4.1 Multi-Turn State Machine
from enum import Enum
class ConversationState(Enum):
GREETING = "greeting"
INTENT_COLLECTION = "intent_collection"
INFO_GATHERING = "info_gathering"
RESOLVING = "resolving"
ESCALATING = "escalating"
RESOLVED = "resolved"
CLOSED = "closed"
class CustomerServiceConversation:
def __init__(self, session_id: str, company_config: dict):
self.session_id = session_id
self.config = company_config
self.state = ConversationState.GREETING
self.history = []
self.intent_history = []
self.failed_attempts = 0
self.gathered_info = {}
self.escalation_info = None
def process_message(self, user_message: str) -> dict:
# Step 1: Intent recognition
intent = classify_intent(user_message, self.history[-5:] if self.history else None)
self.intent_history.append(intent)
# Step 2: Escalation check
escalation_check = should_escalate(intent, self.history, self.failed_attempts)
if escalation_check["should_escalate"]:
return self._handle_escalation(user_message, escalation_check)
# Step 3: Clarify if confidence is too low
if intent.get("confidence", 0) < 0.6:
return self._clarify_intent(user_message)
# Step 4: KB lookup and answer
kb = self.config.get("knowledge_base")
if kb:
qa_result = answer_with_knowledge_base(
user_message, kb, self.config["company_name"], intent
)
if qa_result["confidence"] > 0.7:
response = qa_result["answer"]
self._record_turn(user_message, response, intent, "knowledge_base")
return {"response": response, "state": "resolved", "requires_followup": True}
# Step 5: General LLM fallback
self.failed_attempts += 1
return self._general_response(user_message, intent)
def _handle_escalation(self, user_message: str, escalation_check: dict) -> dict:
self.state = ConversationState.ESCALATING
summary = self._generate_handoff_summary()
wait_time = "2-3 minutes" if escalation_check["priority"] == "high" else "5-10 minutes"
escalation_message = f"""I understand this situation requires the assistance of a specialized human agent. I've transferred your conversation history and issue details to our support team.
Estimated wait time: {wait_time}
While you wait, you can also:
- Visit our Help Center (help.example.com)
- Email us at [email protected]
Thank you for your patience!"""
self.escalation_info = {
"trigger_reasons": escalation_check["reasons"],
"priority": escalation_check["priority"],
"conversation_summary": summary,
"user_info": self.gathered_info,
"intent_history": [i.get("primary_intent") for i in self.intent_history]
}
return {
"response": escalation_message,
"state": "escalating",
"escalation_data": self.escalation_info
}
def _generate_handoff_summary(self) -> str:
if not self.history:
return "New session, no history"
history_text = "\n".join([
f"User: {turn['user']}\nAI: {turn['assistant']}"
for turn in self.history[-5:]
])
summary_prompt = f"""Generate a concise handoff summary (under 150 words) for the following customer service conversation.
Conversation:
{history_text}
Summary should include:
1. The user's primary issue
2. Solutions already attempted
3. Why escalation is needed
4. User's emotional state"""
response = client.messages.create(
model="claude-haiku-4-5",
max_tokens=300,
messages=[{"role": "user", "content": summary_prompt}]
)
return response.content[0].text
def _record_turn(self, user_msg, assistant_msg, intent, source):
self.history.append({
"user": user_msg,
"assistant": assistant_msg,
"intent": intent.get("primary_intent"),
"source": source
})
78.5 Human Handoff Protocol
78.5.1 Handoff Data Package
class HandoffPackage:
"""Complete data package for human agent takeover."""
def __init__(self, conversation: CustomerServiceConversation):
self.conversation = conversation
def generate(self) -> dict:
from datetime import datetime
return {
"session_id": self.conversation.session_id,
"timestamp": datetime.now().isoformat(),
"priority": self.conversation.escalation_info.get("priority", "normal"),
"user_profile": {
"identified_info": self.conversation.gathered_info,
"emotion_trajectory": [
intent.get("emotion", "neutral")
for intent in self.conversation.intent_history
],
"interaction_count": len(self.conversation.history)
},
"issue_summary": {
"primary_issue": (self.conversation.intent_history[0].get("raw_issue", "Unknown")
if self.conversation.intent_history else "Unknown"),
"intent_history": [i.get("primary_intent") for i in self.conversation.intent_history],
"attempted_solutions": [
turn.get("source") for turn in self.conversation.history
],
"escalation_reasons": self.conversation.escalation_info.get("trigger_reasons", [])
},
"recommended_action": self._recommend_action(),
"full_conversation": self.conversation.history
}
def _recommend_action(self) -> str:
intents = [i.get("primary_intent", "") for i in self.conversation.intent_history]
if any("refund" in i for i in intents):
return "Verify order details and process refund request per policy"
if any("complaint" in i for i in intents):
return "Lead with empathy, understand specific grievance, follow complaint escalation process"
return "Continue diagnosing the issue and provide resolution"
78.6 Key Metrics and Continuous Optimization
CUSTOMER_SERVICE_METRICS = {
"deflection_rate": {
"description": "Percentage AI resolves without human intervention",
"target": ">= 0.70"
},
"escalation_accuracy": {
"description": "Proportion of escalations confirmed necessary by human agents",
"target": ">= 0.85"
},
"first_contact_resolution": {
"description": "Single-session resolution rate",
"target": ">= 0.65"
},
"intent_recognition_accuracy": {
"description": "Intent classification accuracy",
"target": ">= 0.90"
},
"user_satisfaction_score": {
"description": "User satisfaction (1-5 scale)",
"target": ">= 4.2"
}
}
Summary
The core value of an AI customer service system lies in three things: precise intent recognition to route issues to the correct handling path, knowledge base Q&A to efficiently resolve standardized problems, and a trustworthy escalation mechanism to safely hand off users to human agents at the boundaries of AI capability.
The fundamental design principle is conservative over aggressive: it is better to escalate to human agents one extra time than to trap a user with a legitimate need in a dead-end automated flow. Escalation triggers should be permissive, the escalation experience should be seamless, and the handoff data package should be complete โ these three points are the foundation of a trustworthy AI customer service system.