Introduction
AI agents are autonomous systems that can perceive their environment, make decisions, and take actions to achieve goals. Building production-grade agents requires careful architecture, tool integration, error handling, and monitoring. This guide covers agent design patterns, tool integration, decision-making frameworks, and deployment strategies.
Key Statistics:
- AI agent market growing 45% annually
- Agents reduce manual work by 60-80%
- Tool integration complexity: 40% of development time
- Production agent reliability: 99%+ required
Core Concepts
1. Agent
Autonomous system that perceives, decides, and acts.
2. Tool
External capability an agent can use (API, function, service).
3. Perception
Agent’s ability to understand its environment.
4. Decision-Making
Agent’s reasoning process to choose actions.
5. Action
Agent’s ability to affect its environment.
6. Goal
Desired outcome the agent is trying to achieve.
7. State
Agent’s current understanding of the world.
8. Planning
Agent’s process of determining action sequences.
9. Feedback Loop
Agent’s ability to learn from outcomes.
10. Orchestration
Managing multiple agents working together.
Agent Architecture
from enum import Enum
from typing import Optional, List, Dict
from dataclasses import dataclass
class AgentState(Enum):
IDLE = "idle"
THINKING = "thinking"
ACTING = "acting"
WAITING = "waiting"
COMPLETE = "complete"
ERROR = "error"
@dataclass
class AgentAction:
tool: str
input: Dict
reasoning: str
class AIAgent:
"""Production-grade AI agent"""
def __init__(self, name: str, model_client, tools: Dict):
self.name = name
self.model = model_client
self.tools = tools
self.state = AgentState.IDLE
self.memory = []
self.max_iterations = 10
def run(self, goal: str) -> str:
"""Run agent to achieve goal"""
self.state = AgentState.THINKING
iteration = 0
while iteration < self.max_iterations:
iteration += 1
# Get agent's next action
action = self._decide_action(goal)
if action is None:
self.state = AgentState.COMPLETE
return self._get_final_answer(goal)
# Execute action
self.state = AgentState.ACTING
result = self._execute_action(action)
# Store in memory
self.memory.append({
'iteration': iteration,
'action': action,
'result': result
})
# Check if goal achieved
if self._is_goal_achieved(goal, result):
self.state = AgentState.COMPLETE
return result
self.state = AgentState.ERROR
return "Max iterations reached"
def _decide_action(self, goal: str) -> Optional[AgentAction]:
"""Decide next action using LLM"""
# Build context
context = f"Goal: {goal}\n"
context += f"Available tools: {list(self.tools.keys())}\n"
context += f"Memory: {self.memory[-3:]}\n" # Last 3 actions
# Get LLM decision
response = self.model.complete(
f"{context}\nWhat should I do next? Respond with tool name and input."
)
# Parse response
if "DONE" in response:
return None
# Extract tool and input
tool_name = self._extract_tool_name(response)
tool_input = self._extract_tool_input(response)
return AgentAction(
tool=tool_name,
input=tool_input,
reasoning=response
)
def _execute_action(self, action: AgentAction) -> str:
"""Execute action using tool"""
if action.tool not in self.tools:
return f"Error: Tool {action.tool} not found"
try:
tool = self.tools[action.tool]
result = tool(**action.input)
return str(result)
except Exception as e:
return f"Error executing {action.tool}: {str(e)}"
def _is_goal_achieved(self, goal: str, result: str) -> bool:
"""Check if goal is achieved"""
# Simple check - in production, use more sophisticated methods
return "success" in result.lower() or "complete" in result.lower()
def _get_final_answer(self, goal: str) -> str:
"""Get final answer from agent"""
context = f"Goal: {goal}\n"
context += f"Actions taken: {len(self.memory)}\n"
context += f"Results: {[m['result'] for m in self.memory]}\n"
response = self.model.complete(
f"{context}\nProvide final answer to the goal."
)
return response
def _extract_tool_name(self, response: str) -> str:
"""Extract tool name from response"""
for tool_name in self.tools.keys():
if tool_name in response.lower():
return tool_name
return "unknown"
def _extract_tool_input(self, response: str) -> Dict:
"""Extract tool input from response"""
# In production, use more robust parsing
return {}
# Usage
tools = {
'search': lambda query: f"Search results for {query}",
'calculate': lambda expr: eval(expr),
'fetch_data': lambda url: f"Data from {url}"
}
agent = AIAgent("ResearchAgent", model_client, tools)
result = agent.run("Find the population of France and calculate 10% of it")
print(result)
Tool Integration
Tool Registry
class ToolRegistry:
"""Manage agent tools"""
def __init__(self):
self.tools = {}
self.tool_schemas = {}
def register_tool(self, name: str, func, schema: Dict):
"""Register a tool"""
self.tools[name] = func
self.tool_schemas[name] = schema
def get_tool_descriptions(self) -> str:
"""Get descriptions of all tools"""
descriptions = []
for name, schema in self.tool_schemas.items():
descriptions.append(f"{name}: {schema['description']}")
return "\n".join(descriptions)
def call_tool(self, name: str, **kwargs) -> str:
"""Call a tool"""
if name not in self.tools:
return f"Tool {name} not found"
try:
result = self.tools[name](**kwargs)
return str(result)
except Exception as e:
return f"Error: {str(e)}"
# Usage
registry = ToolRegistry()
registry.register_tool(
'weather',
lambda city: f"Weather in {city}: 72ยฐF",
{'description': 'Get weather for a city', 'params': ['city']}
)
registry.register_tool(
'calculator',
lambda expr: eval(expr),
{'description': 'Calculate mathematical expressions', 'params': ['expr']}
)
print(registry.get_tool_descriptions())
Best Practices
- Clear Goals: Define specific, measurable goals
- Tool Design: Make tools simple and focused
- Error Handling: Handle tool failures gracefully
- Monitoring: Track agent decisions and actions
- Feedback Loops: Learn from outcomes
- Testing: Test agents thoroughly
- Safety: Implement safety constraints
- Transparency: Log all decisions
- Scalability: Design for multiple agents
- Optimization: Minimize iterations
Advanced Agent Patterns
ReAct (Reasoning + Acting)
class ReActAgent:
"""ReAct pattern: Reasoning + Acting"""
def __init__(self, model_client, tools):
self.model = model_client
self.tools = tools
self.thought_history = []
self.action_history = []
def run(self, task: str) -> str:
"""Run ReAct agent"""
prompt = f"""
Task: {task}
Use the following format:
Thought: What do I need to do?
Action: Which tool to use?
Observation: What did the tool return?
Repeat until you reach the final answer.
"""
for i in range(10):
# Get thought and action
response = self.model.complete(prompt)
# Parse thought
thought = self._extract_section(response, "Thought")
self.thought_history.append(thought)
# Parse action
action = self._extract_section(response, "Action")
if "Final Answer" in action:
return self._extract_section(response, "Final Answer")
# Execute action
tool_name, tool_input = self._parse_action(action)
observation = self._execute_tool(tool_name, tool_input)
self.action_history.append({
'tool': tool_name,
'input': tool_input,
'observation': observation
})
# Update prompt with observation
prompt += f"\nThought: {thought}\nAction: {action}\nObservation: {observation}\n"
return "Max iterations reached"
def _extract_section(self, text: str, section: str) -> str:
"""Extract section from response"""
lines = text.split('\n')
for i, line in enumerate(lines):
if section in line:
return lines[i+1] if i+1 < len(lines) else ""
return ""
def _parse_action(self, action: str) -> tuple:
"""Parse action into tool and input"""
# Implementation
return "tool_name", {}
def _execute_tool(self, tool_name: str, tool_input: dict) -> str:
"""Execute tool"""
if tool_name in self.tools:
return str(self.tools[tool_name](**tool_input))
return f"Tool {tool_name} not found"
Multi-Agent Orchestration
class MultiAgentOrchestrator:
"""Coordinate multiple agents"""
def __init__(self):
self.agents = {}
self.task_queue = []
self.results = {}
def register_agent(self, name: str, agent, specialization: str):
"""Register an agent"""
self.agents[name] = {
'agent': agent,
'specialization': specialization,
'tasks_completed': 0
}
def assign_task(self, task: str, required_specialization: str) -> str:
"""Assign task to appropriate agent"""
# Find best agent
best_agent = None
for name, info in self.agents.items():
if required_specialization in info['specialization']:
best_agent = name
break
if not best_agent:
return "No suitable agent found"
# Execute task
agent = self.agents[best_agent]['agent']
result = agent.run(task)
# Track result
self.agents[best_agent]['tasks_completed'] += 1
self.results[task] = result
return result
def get_agent_stats(self) -> Dict:
"""Get agent statistics"""
return {
name: info['tasks_completed']
for name, info in self.agents.items()
}
Memory Management
Agent Memory Systems
class AgentMemory:
"""Manage agent memory"""
def __init__(self, max_size: int = 100):
self.short_term = [] # Recent actions
self.long_term = [] # Important facts
self.max_size = max_size
def add_short_term(self, item: Dict):
"""Add to short-term memory"""
self.short_term.append(item)
if len(self.short_term) > self.max_size:
# Move old items to long-term
important = self.short_term.pop(0)
if self._is_important(important):
self.long_term.append(important)
def add_long_term(self, item: Dict):
"""Add to long-term memory"""
self.long_term.append(item)
def recall(self, query: str) -> List[Dict]:
"""Recall relevant memories"""
results = []
# Search short-term
for item in self.short_term:
if self._matches_query(item, query):
results.append(item)
# Search long-term
for item in self.long_term:
if self._matches_query(item, query):
results.append(item)
return results
def _is_important(self, item: Dict) -> bool:
"""Determine if item is important"""
return item.get('importance', 0) > 0.5
def _matches_query(self, item: Dict, query: str) -> bool:
"""Check if item matches query"""
return query.lower() in str(item).lower()
Monitoring and Observability
Agent Monitoring
import time
from datetime import datetime
class AgentMonitor:
"""Monitor agent performance"""
def __init__(self):
self.metrics = {
'total_runs': 0,
'successful_runs': 0,
'failed_runs': 0,
'total_iterations': 0,
'total_time': 0,
'tool_usage': {}
}
def record_run(self, agent_name: str, success: bool,
iterations: int, duration: float, tools_used: List[str]):
"""Record agent run"""
self.metrics['total_runs'] += 1
if success:
self.metrics['successful_runs'] += 1
else:
self.metrics['failed_runs'] += 1
self.metrics['total_iterations'] += iterations
self.metrics['total_time'] += duration
# Track tool usage
for tool in tools_used:
if tool not in self.metrics['tool_usage']:
self.metrics['tool_usage'][tool] = 0
self.metrics['tool_usage'][tool] += 1
def get_success_rate(self) -> float:
"""Get success rate"""
if self.metrics['total_runs'] == 0:
return 0
return self.metrics['successful_runs'] / self.metrics['total_runs']
def get_avg_iterations(self) -> float:
"""Get average iterations"""
if self.metrics['total_runs'] == 0:
return 0
return self.metrics['total_iterations'] / self.metrics['total_runs']
def get_avg_time(self) -> float:
"""Get average time"""
if self.metrics['total_runs'] == 0:
return 0
return self.metrics['total_time'] / self.metrics['total_runs']
def print_report(self):
"""Print monitoring report"""
print(f"Agent Performance Report")
print(f"Total Runs: {self.metrics['total_runs']}")
print(f"Success Rate: {self.get_success_rate():.2%}")
print(f"Avg Iterations: {self.get_avg_iterations():.1f}")
print(f"Avg Time: {self.get_avg_time():.2f}s")
print(f"Tool Usage: {self.metrics['tool_usage']}")
Common Pitfalls
- Infinite Loops: Set max iterations limit
- Tool Failures: Implement fallback strategies
- Poor Tool Design: Keep tools focused and simple
- No Error Handling: Handle all failure modes
- Lack of Monitoring: Track all decisions
- Memory Leaks: Clean up old memories
- Slow Decisions: Optimize LLM calls
- No Safety Constraints: Implement guardrails
- Poor Goal Definition: Be specific about objectives
- Insufficient Testing: Test edge cases
Production Deployment
Agent Service
from fastapi import FastAPI
from pydantic import BaseModel
app = FastAPI()
class AgentRequest(BaseModel):
goal: str
agent_type: str
class AgentResponse(BaseModel):
result: str
iterations: int
duration: float
@app.post("/agent/run")
async def run_agent(request: AgentRequest) -> AgentResponse:
"""Run agent via API"""
start_time = time.time()
# Get agent
agent = get_agent(request.agent_type)
# Run agent
result = agent.run(request.goal)
# Calculate metrics
duration = time.time() - start_time
iterations = len(agent.memory)
return AgentResponse(
result=result,
iterations=iterations,
duration=duration
)
@app.get("/agent/status/{agent_id}")
async def get_agent_status(agent_id: str):
"""Get agent status"""
agent = get_agent(agent_id)
return {
'state': agent.state.value,
'iterations': len(agent.memory),
'last_action': agent.memory[-1] if agent.memory else None
}
Real-World Examples
Example 1: Research Agent
# Agent that researches topics
research_tools = {
'search': search_web,
'fetch_article': fetch_article,
'summarize': summarize_text,
'extract_facts': extract_facts
}
research_agent = AIAgent("ResearchAgent", model_client, research_tools)
result = research_agent.run("Research the history of AI and provide key milestones")
Example 2: Code Generation Agent
# Agent that generates code
code_tools = {
'search_docs': search_documentation,
'test_code': run_tests,
'format_code': format_code,
'check_syntax': check_syntax
}
code_agent = AIAgent("CodeAgent", model_client, code_tools)
result = code_agent.run("Generate a Python function to calculate Fibonacci numbers")
Example 3: Data Analysis Agent
# Agent that analyzes data
analysis_tools = {
'load_data': load_dataset,
'query_data': query_database,
'calculate_stats': calculate_statistics,
'create_visualization': create_chart
}
analysis_agent = AIAgent("AnalysisAgent", model_client, analysis_tools)
result = analysis_agent.run("Analyze sales data and identify trends")
Conclusion
Building production AI agents requires careful architecture, robust tool integration, comprehensive monitoring, and thorough testing. By implementing the patterns in this guide, you can create autonomous systems that reliably achieve goals while maintaining safety, transparency, and performance.
Key Takeaways:
- Design agents with clear goals and focused tools
- Implement robust error handling and fallbacks
- Monitor all decisions and performance metrics
- Use memory systems for context and learning
- Test thoroughly before production deployment
- Implement safety constraints and guardrails
- Scale with multi-agent orchestration
- Continuously optimize based on metrics
Next Steps:
- Define agent goals and specialization
- Design focused, testable tools
- Implement agent loop with error handling
- Add comprehensive monitoring
- Deploy and iterate based on metrics
Agent Evaluation and Testing
Performance Metrics
class AgentEvaluator:
"""Evaluate agent performance"""
def __init__(self):
self.metrics = {
'success_rate': 0,
'avg_iterations': 0,
'avg_time': 0,
'tool_accuracy': {},
'goal_achievement': 0
}
def evaluate_agent(self, agent, test_cases: list) -> dict:
"""Evaluate agent on test cases"""
results = {
'passed': 0,
'failed': 0,
'avg_iterations': 0,
'avg_time': 0,
'errors': []
}
total_iterations = 0
total_time = 0
for test_case in test_cases:
goal = test_case['goal']
expected = test_case['expected']
start_time = time.time()
result = agent.run(goal)
elapsed = time.time() - start_time
# Check if result matches expected
if self._matches_expected(result, expected):
results['passed'] += 1
else:
results['failed'] += 1
results['errors'].append({
'goal': goal,
'expected': expected,
'got': result
})
total_iterations += len(agent.memory)
total_time += elapsed
total_tests = len(test_cases)
results['success_rate'] = results['passed'] / total_tests
results['avg_iterations'] = total_iterations / total_tests
results['avg_time'] = total_time / total_tests
return results
def _matches_expected(self, result: str, expected: str) -> bool:
"""Check if result matches expected"""
# Simple string matching - in production use semantic similarity
return expected.lower() in result.lower()
Unit Testing Agents
import unittest
class TestAIAgent(unittest.TestCase):
"""Unit tests for AI agents"""
def setUp(self):
"""Set up test fixtures"""
self.tools = {
'search': lambda query: f"Results for {query}",
'calculate': lambda expr: str(eval(expr)),
'fetch': lambda url: f"Data from {url}"
}
self.agent = AIAgent("TestAgent", mock_model, self.tools)
def test_agent_initialization(self):
"""Test agent initialization"""
self.assertEqual(self.agent.name, "TestAgent")
self.assertEqual(len(self.agent.tools), 3)
self.assertEqual(self.agent.state, AgentState.IDLE)
def test_agent_runs_successfully(self):
"""Test agent runs successfully"""
result = self.agent.run("Test goal")
self.assertIsNotNone(result)
self.assertGreater(len(self.agent.memory), 0)
def test_agent_handles_errors(self):
"""Test agent handles errors gracefully"""
result = self.agent.run("Invalid goal with bad tool")
self.assertIsNotNone(result)
self.assertIn("Error", result)
def test_agent_respects_max_iterations(self):
"""Test agent respects max iterations"""
self.agent.max_iterations = 5
result = self.agent.run("Complex goal")
self.assertLessEqual(len(self.agent.memory), 5)
Real-World Agent Architectures
Customer Support Agent
class CustomerSupportAgent:
"""AI agent for customer support"""
def __init__(self, model_client, knowledge_base):
self.model = model_client
self.kb = knowledge_base
self.tools = {
'search_kb': self._search_knowledge_base,
'create_ticket': self._create_support_ticket,
'escalate': self._escalate_to_human,
'send_email': self._send_email
}
self.agent = AIAgent("SupportAgent", model_client, self.tools)
def handle_customer_query(self, query: str, customer_id: str) -> str:
"""Handle customer support query"""
# Add context
context = f"Customer ID: {customer_id}\nQuery: {query}"
# Run agent
result = self.agent.run(context)
return result
def _search_knowledge_base(self, query: str) -> str:
"""Search knowledge base"""
results = self.kb.search(query, top_k=3)
return "\n".join(results)
def _create_support_ticket(self, issue: str, priority: str = "normal") -> str:
"""Create support ticket"""
ticket_id = f"TICKET-{int(time.time())}"
# Create ticket in system
return f"Ticket created: {ticket_id}"
def _escalate_to_human(self, reason: str) -> str:
"""Escalate to human agent"""
# Notify human agent
return "Escalated to human agent"
def _send_email(self, recipient: str, subject: str, body: str) -> str:
"""Send email"""
# Send email
return f"Email sent to {recipient}"
Data Analysis Agent
class DataAnalysisAgent:
"""AI agent for data analysis"""
def __init__(self, model_client, database):
self.model = model_client
self.db = database
self.tools = {
'query_data': self._query_database,
'calculate_stats': self._calculate_statistics,
'create_chart': self._create_visualization,
'generate_report': self._generate_report
}
self.agent = AIAgent("AnalysisAgent", model_client, self.tools)
def analyze_data(self, query: str) -> dict:
"""Analyze data based on query"""
result = self.agent.run(query)
return {
'analysis': result,
'steps': len(self.agent.memory),
'tools_used': [m['action'].tool for m in self.agent.memory]
}
def _query_database(self, sql: str) -> str:
"""Query database"""
results = self.db.execute(sql)
return str(results)
def _calculate_statistics(self, data: str) -> str:
"""Calculate statistics"""
import json
data_list = json.loads(data)
stats = {
'mean': sum(data_list) / len(data_list),
'median': sorted(data_list)[len(data_list)//2],
'std': self._calculate_std(data_list)
}
return str(stats)
def _create_visualization(self, data: str, chart_type: str) -> str:
"""Create visualization"""
# Create chart
return f"Chart created: {chart_type}"
def _generate_report(self, analysis: str) -> str:
"""Generate report"""
# Generate report
return "Report generated"
def _calculate_std(self, data: list) -> float:
"""Calculate standard deviation"""
mean = sum(data) / len(data)
variance = sum((x - mean) ** 2 for x in data) / len(data)
return variance ** 0.5
Deployment Considerations
Scaling Agents
from concurrent.futures import ThreadPoolExecutor
import asyncio
class ScalableAgentPool:
"""Manage pool of agents for scaling"""
def __init__(self, num_agents: int = 5):
self.agents = [
AIAgent(f"Agent-{i}", model_client, tools)
for i in range(num_agents)
]
self.executor = ThreadPoolExecutor(max_workers=num_agents)
self.queue = asyncio.Queue()
async def process_requests(self, requests: list) -> list:
"""Process multiple requests in parallel"""
tasks = []
for request in requests:
agent = self._get_available_agent()
task = asyncio.create_task(
self._run_agent_async(agent, request)
)
tasks.append(task)
results = await asyncio.gather(*tasks)
return results
def _get_available_agent(self) -> AIAgent:
"""Get available agent from pool"""
# Simple round-robin
return self.agents[len(self.agents) % len(self.agents)]
async def _run_agent_async(self, agent: AIAgent, request: str) -> str:
"""Run agent asynchronously"""
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
self.executor,
agent.run,
request
)
return result
Agent Persistence
import pickle
import json
class AgentPersistence:
"""Persist agent state"""
def __init__(self, storage_path: str = "./agents"):
self.storage_path = storage_path
def save_agent(self, agent: AIAgent, agent_id: str):
"""Save agent state"""
state = {
'name': agent.name,
'memory': agent.memory,
'state': agent.state.value,
'tools': list(agent.tools.keys())
}
filepath = f"{self.storage_path}/{agent_id}.json"
with open(filepath, 'w') as f:
json.dump(state, f)
def load_agent(self, agent_id: str) -> dict:
"""Load agent state"""
filepath = f"{self.storage_path}/{agent_id}.json"
with open(filepath, 'r') as f:
state = json.load(f)
return state
def restore_agent(self, agent_id: str, model_client, tools) -> AIAgent:
"""Restore agent from saved state"""
state = self.load_agent(agent_id)
agent = AIAgent(state['name'], model_client, tools)
agent.memory = state['memory']
agent.state = AgentState[state['state']]
return agent
Conclusion
Building production AI agents requires careful architecture, robust tool integration, comprehensive monitoring, and thorough testing. By implementing the patterns in this guide, you can create autonomous systems that reliably achieve goals while maintaining safety, transparency, and performance.
Key Takeaways:
- Design agents with clear goals and focused tools
- Implement robust error handling and fallbacks
- Monitor all decisions and performance metrics
- Use memory systems for context and learning
- Test thoroughly before production deployment
- Implement safety constraints and guardrails
- Scale with multi-agent orchestration
- Continuously optimize based on metrics
- Persist agent state for recovery
- Evaluate performance systematically
Next Steps:
- Define agent goals and specialization
- Design focused, testable tools
- Implement agent loop with error handling
- Add comprehensive monitoring
- Deploy and iterate based on metrics
Comments