Skip to main content
โšก Calmops

LLM API Comparison: OpenAI vs Anthropic vs Open Source

Introduction

Choosing the right LLM API is critical for cost, performance, and capability. With options ranging from OpenAI to Anthropic to open-source models, understanding the tradeoffs is essential.

Key Statistics:

  • OpenAI: 70% market share
  • Anthropic: Growing 300% annually
  • Open-source: 40% cost savings potential
  • Average API cost: $1-15 per 1M tokens

Comparison Matrix

โ”Œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”
โ”‚                    LLM API Comparison                                 โ”‚
โ”œโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ค
โ”‚                                                                  โ”‚
โ”‚  Provider     โ”‚ Model       โ”‚ Context โ”‚ Input   โ”‚ Output  โ”‚ Speedโ”‚
โ”‚  โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”ผโ”€โ”€โ”€โ”€โ”€โ”€โ”‚
โ”‚  OpenAI       โ”‚ GPT-4o      โ”‚ 128K   โ”‚ $5.00   โ”‚ $15.00  โ”‚ Fast โ”‚
โ”‚  OpenAI       โ”‚ GPT-4 Turbo โ”‚ 128K   โ”‚ $10.00  โ”‚ $30.00  โ”‚ Fast โ”‚
โ”‚  OpenAI       โ”‚ GPT-3.5     โ”‚ 16K    โ”‚ $0.50   โ”‚ $1.50   โ”‚ V.Fastโ”‚
โ”‚  Anthropic    โ”‚ Claude 3.5  โ”‚ 200K   โ”‚ $3.00   โ”‚ $15.00  โ”‚ Fast โ”‚
โ”‚  Anthropic    โ”‚ Claude 3     โ”‚ 200K   โ”‚ $15.00  โ”‚ $75.00  โ”‚ Fast โ”‚
โ”‚  Anthropic    โ”‚ Claude 2.1   โ”‚ 200K   โ”‚ $8.00   โ”‚ $24.00  โ”‚ Med  โ”‚
โ”‚  Meta         โ”‚ Llama 3.1   โ”‚ 128K   โ”‚ $0.00*  โ”‚ $0.00*  โ”‚ Var  โ”‚
โ”‚  Mistral      โ”‚ Mixtral     โ”‚ 32K    โ”‚ $0.00*  โ”‚ $0.00*  โ”‚ Var  โ”‚
โ”‚                                                                  โ”‚
โ”‚  * Self-hosted cost                                              โ”‚
โ”‚                                                                  โ”‚
โ””โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”€โ”˜

OpenAI API

#!/usr/bin/env python3
"""OpenAI API integration."""

from openai import OpenAI

class OpenAIClient:
    """OpenAI API wrapper."""
    
    def __init__(self, api_key):
        self.client = OpenAI(api_key=api_key)
    
    def chat(self, model='gpt-4o', messages=None, **kwargs):
        """Chat completion."""
        
        response = self.client.chat.completions.create(
            model=model,
            messages=messages,
            **kwargs
        )
        
        return {
            'content': response.choices[0].message.content,
            'model': response.model,
            'usage': {
                'prompt_tokens': response.usage.prompt_tokens,
                'completion_tokens': response.usage.completion_tokens,
                'total_tokens': response.usage.total_tokens
            }
        }
    
    def stream_chat(self, model='gpt-4o', messages=None, **kwargs):
        """Streaming chat."""
        
        response = self.client.chat.completions.create(
            model=model,
            messages=messages,
            stream=True,
            **kwargs
        )
        
        for chunk in response:
            if chunk.choices[0].delta.content:
                yield chunk.choices[0].delta.content
    
    def embed(self, text, model='text-embedding-3-small'):
        """Get embeddings."""
        
        response = self.client.embeddings.create(
            model=model,
            input=text
        )
        
        return response.data[0].embedding
    
    def get_image(self, prompt, model='dall-e-3', size='1024x1024'):
        """Generate image."""
        
        response = self.client.images.generate(
            model=model,
            prompt=prompt,
            size=size,
            n=1
        )
        
        return response.data[0].url

Anthropic API

#!/usr/bin/env python3
"""Anthropic Claude API integration."""

import anthropic

class AnthropicClient:
    """Anthropic API wrapper."""
    
    def __init__(self, api_key):
        self.client = anthropic.Anthropic(api_key=api_key)
    
    def message(self, model='claude-3-5-sonnet-20241022', 
                messages=None, max_tokens=1024, **kwargs):
        """Claude message API."""
        
        response = self.client.messages.create(
            model=model,
            messages=messages,
            max_tokens=max_tokens,
            **kwargs
        )
        
        return {
            'content': response.content[0].text,
            'model': response.model,
            'usage': {
                'input_tokens': response.usage.input_tokens,
                'output_tokens': response.usage.output_tokens
            }
        }
    
    def stream_message(self, model='claude-3-5-sonnet-20241022',
                     messages=None, max_tokens=1024):
        """Streaming message."""
        
        with self.client.messages.stream(
            model=model,
            messages=messages,
            max_tokens=max_tokens
        ) as stream:
            for text in stream.text_stream:
                yield text
    
    def count_tokens(self, text, model='claude-3-5-sonnet-20241022'):
        """Count tokens."""
        
        return self.client.count_tokens(text)

Open-Source Models

Self-Hosted Llama

#!/usr/bin/env python3
"""Llama self-hosted inference."""

from llama_cpp import Llama

class LlamaClient:
    """Llama inference wrapper."""
    
    def __init__(self, model_path, n_ctx=4096, n_gpu_layers=0):
        self.llm = Llama(
            model_path=model_path,
            n_ctx=n_ctx,
            n_gpu_layers=n_gpu_layers,
            verbose=False
        )
    
    def chat(self, messages, temperature=0.7, max_tokens=1024):
        """Chat completion."""
        
        # Convert messages to Llama format
        prompt = self.format_messages(messages)
        
        response = self.llm(
            prompt,
            temperature=temperature,
            max_tokens=max_tokens,
            stop=['<|eot_id|>', '<|start_header_id|>']
        )
        
        return {
            'content': response['choices'][0]['text'],
            'model': 'llama',
            'usage': {
                'prompt_tokens': response['usage']['prompt_tokens'],
                'completion_tokens': response['usage']['completion_tokens'],
                'total_tokens': response['usage']['total_tokens']
            }
        }
    
    def format_messages(self, messages):
        """Format messages for Llama."""
        
        prompt = "<|begin_of_text|>"
        
        for msg in messages:
            role = msg['role']
            content = msg['content']
            prompt += f"<|start_header_id|>{role}<|end_header_id|>\n\n{content}<|eot_id|>"
        
        prompt += "<|start_header_id|>assistant<|end_header_id|>\n\n"
        
        return prompt

Using Ollama

# Install Ollama
curl -fsSL https://ollama.com/install.sh | sh

# Pull model
ollama pull llama3.1
ollama pull mistral
ollama pull codellama

# Run API
ollama serve
import ollama

class OllamaClient:
    """Ollama API client."""
    
    def __init__(self, base_url='http://localhost:11434'):
        self.base_url = base_url
    
    def chat(self, model='llama3.1', messages=None, **kwargs):
        """Chat completion."""
        
        response = ollama.chat(
            model=model,
            messages=messages,
            **kwargs
        )
        
        return {
            'content': response['message']['content'],
            'model': response['model']
        }
    
    def embed(self, model='nomic-embed-text', text):
        """Get embeddings."""
        
        response = ollama.embeddings(
            model=model,
            prompt=text
        )
        
        return response['embedding']

Cost Comparison

#!/usr/bin/env python3
"""Cost comparison calculator."""

def compare_costs(monthly_tokens):
    """Compare API costs."""
    
    # Monthly tokens in millions
    prompt, completion = monthly_tokens['prompt'], monthly_tokens['completion']
    
    providers = {
        'OpenAI GPT-4o': {
            'prompt': 5.0,
            'completion': 15.0,
            'calc': lambda p, c: p * 5 + c * 15
        },
        'OpenAI GPT-3.5': {
            'prompt': 0.5,
            'completion': 1.5,
            'calc': lambda p, c: p * 0.5 + c * 1.5
        },
        'Anthropic Claude 3.5': {
            'prompt': 3.0,
            'completion': 15.0,
            'calc': lambda p, c: p * 3 + c * 15
        },
        'Self-hosted Llama 3.1': {
            # Approximate self-hosted cost (8x A100 GPU)
            'calc': lambda p, c: (p + c) * 0.50  # ~$0.50 per 1M tokens
        }
    }
    
    results = {}
    
    for name, config in providers.items():
        cost = config['calc'](prompt, completion)
        results[name] = cost
    
    return results

When to Choose

Use Case Recommended
General chat, GPT-4 best OpenAI GPT-4o
Long context needed Anthropic Claude
Cost-sensitive Open-source
Coding tasks OpenAI Codex / Claude
On-premise required Llama / Mistral
Enterprise compliance Anthropic / OpenAI
Low latency OpenAI / Anthropic

External Resources


Comments