Introduction
PostgreSQL has emerged as a powerful platform for AI applications, particularly with the pgvector extension enabling vector similarity search. Combined with its existing features for JSON, full-text search, and reliability, PostgreSQL provides an excellent foundation for building AI-powered applications in 2026.
pgvector: Vector Similarity Search
Installation
# From source
git clone https://github.com/pgvector/pgvector.git
cd pgvector
make
make install
# Or use Docker with pgvector
docker run -d --name pgvector \
-e POSTGRES_PASSWORD=password \
-e POSTGRES_DB=vector_db \
-p 5432:5432 \
pgvector/pgvector:pg17
Enable Extension
-- Enable pgvector
CREATE EXTENSION vector;
-- Verify installation
SELECT * FROM pg_extension WHERE extname = 'vector';
-- Check available functions
\df *vector*
Creating and Managing Vector Data
Vector Data Types
-- Create table with vector column
CREATE TABLE documents (
id SERIAL PRIMARY KEY,
title VARCHAR(500),
content TEXT,
embedding VECTOR(1536), -- OpenAI ada-002
metadata JSONB,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
-- Vector dimensions for different models:
-- OpenAI ada-002: 1536
-- OpenAI text-embedding-3-small: 1536
-- OpenAI text-embedding-3-large: 3072
-- sentence-transformers all-MiniLM-L6-v2: 384
-- sentence-transformers all-mpnet-base-v2: 768
-- Alternative: Half precision (saves storage)
CREATE TABLE documents_half (
id SERIAL PRIMARY KEY,
embedding VECTOR(1536) -- Can store as half precision
);
Inserting Vectors
-- Insert a single vector
INSERT INTO documents (title, content, embedding)
VALUES (
'PostgreSQL Tutorial',
'Learn PostgreSQL from basics to advanced',
'[0.1, 0.2, 0.3, 0.4, 0.5]'::vector
);
-- Insert multiple vectors
INSERT INTO documents (title, content, embedding)
VALUES
('Redis Guide', 'In-memory data structure store', '[0.2, 0.1, 0.4, 0.3, 0.5]'::vector),
('MongoDB Basics', 'NoSQL document database', '[0.3, 0.2, 0.1, 0.5, 0.4]'::vector);
-- Generate embeddings programmatically
-- Using Python with OpenAI
Python: Generate and Store Embeddings
import psycopg2
import openai
# Connect to PostgreSQL
conn = psycopg2.connect(
host="localhost",
database="vector_db",
user="postgres",
password="password"
)
# Generate embedding
def get_embedding(text):
response = openai.Embedding.create(
model="text-embedding-3-small",
input=text
)
return response['data'][0]['embedding']
# Store document with embedding
def index_document(title, content, metadata=None):
embedding = get_embedding(content)
with conn.cursor() as cur:
cur.execute("""
INSERT INTO documents (title, content, embedding, metadata)
VALUES (%s, %s, %s, %s)
RETURNING id
""", (title, content, embedding, metadata))
conn.commit()
return cur.fetchone()[0]
# Batch index documents
def index_documents_batch(documents):
with conn.cursor() as cur:
for doc in documents:
embedding = get_embedding(doc['content'])
cur.execute("""
INSERT INTO documents (title, content, embedding, metadata)
VALUES (%s, %s, %s, %s)
""", (doc['title'], doc['content'], embedding, doc.get('metadata')))
conn.commit()
Vector Indexes
Index Types
-- HNSW Index (Hierarchical Navigable Small World)
-- Fast build, fast query, more memory
CREATE INDEX idx_documents_embedding_hnsw
ON documents
USING HNSW (embedding vector_cosine_ops)
WITH (m = 16, ef_construction = 64);
-- Parameters:
-- m: Number of connections per layer (default 16)
-- ef_construction: Search width during build (default 64)
-- Higher values = better recall, slower build, more memory
-- IVFFlat Index (Inverted File with Flat vectors)
-- Faster build, slower query, less memory
CREATE INDEX idx_documents_embedding_ivf
ON documents
USING IVFFlat (embedding vector_cosine_ops)
WITH (lists = 100);
-- Parameters:
-- lists: Number of clusters (100 for < 1M rows, 300 for 1-10M)
-- Higher values = better recall, slower build
Index Comparison
| Index Type | Build Time | Query Speed | Memory | Best For |
|---|---|---|---|---|
| HNSW | Fast | Very Fast | High | Real-time applications |
| IVFFlat | Fast | Fast | Medium | Large datasets |
| None (raw) | N/A | Slow | None | Small datasets, testing |
Index Best Practices
-- Choose distance metric:
-- vector_cosine_ops: Cosine distance (recommended)
-- vector_l2_ops: Euclidean distance
-- vector_ip_ops: Inner product
-- Create index after inserting data
-- Partial index for filtered searches
CREATE INDEX idx_documents_embedding_active
ON documents USING HNSW (embedding vector_cosine_ops)
WHERE status = 'active';
-- Check index usage
EXPLAIN ANALYZE
SELECT * FROM documents
ORDER BY embedding <=> '[0.1,0.2,0.3]'::vector
LIMIT 5;
Similarity Search
Vector Search Queries
-- Cosine similarity (most common)
SELECT id, title,
1 - (embedding <=> $query_embedding) AS similarity
FROM documents
ORDER BY embedding <=> $query_embedding
LIMIT 5;
-- Euclidean distance
SELECT id, title,
embedding <-> $query_embedding AS distance
FROM documents
ORDER BY embedding <-> $query_embedding
LIMIT 5;
-- Inner product
SELECT id, title,
embedding <#> $query_embedding AS similarity
FROM documents
ORDER BY embedding <#> $query_embedding
LIMIT 5;
Filtered Vector Search
-- Add filters to vector search
SELECT id, title,
1 - (embedding <=> $query_embedding) AS similarity
FROM documents
WHERE status = 'published'
AND created_at > '2025-01-01'
ORDER BY embedding <=> $query_embedding
LIMIT 10;
-- Use pre-filter for small result sets
SELECT * FROM (
SELECT id, title,
1 - (embedding <=> $query_embedding) AS similarity
FROM documents
WHERE category = 'tutorial'
) AS filtered
ORDER BY similarity DESC
LIMIT 5;
-- Post-filter with complex conditions
SELECT id, title, similarity
FROM documents,
LATERAL (
SELECT 1 - (embedding <=> $query_embedding) AS similarity
) AS sim
WHERE (metadata->>'author') = 'John'
ORDER BY similarity DESC
LIMIT 5;
RAG Pipeline Implementation
Complete RAG Architecture
from openai import OpenAI
import psycopg2
from sentence_transformers import SentenceTransformer
class RAGPipeline:
def __init__(self, db_conn, openai_key):
self.conn = db_conn
self.client = OpenAI(api_key=openai_key)
self.embedding_model = SentenceTransformer('all-MiniLM-L6-v2')
def generate_embedding(self, text):
"""Generate embedding using sentence-transformers"""
return self.embedding_model.encode(text).tolist()
def retrieve_context(self, query, top_k=5, filters=None):
"""Retrieve relevant documents"""
query_embedding = self.generate_embedding(query)
# Build SQL query
sql = """
SELECT id, title, content, metadata,
1 - (embedding <=> %s) AS similarity
FROM documents
WHERE 1=1
"""
params = [query_embedding]
if filters:
for key, value in filters.items():
sql += f" AND metadata->>'{key}' = %s"
params.append(value)
sql += f" ORDER BY embedding <=> %s LIMIT {top_k}"
params.append(query_embedding)
with self.conn.cursor() as cur:
cur.execute(sql, params)
return cur.fetchall()
def generate_response(self, query, context_documents):
"""Generate response with retrieved context"""
# Build context from documents
context = "\n\n".join([
f"Document {i+1}: {doc[2]}"
for i, doc in enumerate(context_documents)
])
# Create prompt
prompt = f"""Based on the following context, answer the question.
Context:
{context}
Question: {query}
Answer:"""
# Generate response
response = self.client.chat.completions.create(
model="gpt-4",
messages=[
{"role": "system", "content": "You are a helpful assistant."},
{"role": "user", "content": prompt}
]
)
return response.choices[0].message.content
def rag_query(self, query, top_k=5, filters=None):
"""Complete RAG: retrieve + generate"""
docs = self.retrieve_context(query, top_k, filters)
answer = self.generate_response(query, docs)
return {
'answer': answer,
'sources': [
{'id': d[0], 'title': d[1], 'content': d[2][:200], 'similarity': float(d[4])}
for d in docs
]
}
RAG Pipeline Usage
# Initialize pipeline
rag = RAGPipeline(conn, openai_key="sk-...")
# Index documents
documents = [
{
'title': 'PostgreSQL Basics',
'content': 'PostgreSQL is a powerful, open source object-relational database...',
'metadata': {'category': 'database', 'author': 'John'}
},
{
'title': 'Advanced SQL',
'content': 'Learn advanced SQL techniques including window functions...',
'metadata': {'category': 'database', 'author': 'Jane'}
}
]
for doc in documents:
embedding = rag.generate_embedding(doc['content'])
with conn.cursor() as cur:
cur.execute("""
INSERT INTO documents (title, content, embedding, metadata)
VALUES (%s, %s, %s, %s)
""", (doc['title'], doc['content'], embedding, doc['metadata']))
conn.commit()
# Query with RAG
result = rag.rag_query(
query="What is PostgreSQL?",
top_k=3,
filters={'category': 'database'}
)
print(result['answer'])
print("Sources:", result['sources'])
Hybrid Search
Combining Vector and Full-Text Search
-- Create full-text search index
ALTER TABLE documents ADD COLUMN search_vector tsvector
GENERATED ALWAYS AS (to_tsvector('english', title || ' ' || content)) STORED;
CREATE INDEX idx_documents_search ON documents USING GIN (search_vector);
-- Hybrid search with ranking
SELECT
id,
title,
(
0.7 * (1 - (embedding <=> $query_embedding))) +
0.3 * ts_rank(search_vector, query_fts)
) AS combined_rank
FROM documents,
to_tsquery('english', 'postgresql') query_fts
WHERE search_vector @@ query_fts
OR embedding <=> $query_embedding < 0.5
ORDER BY combined_rank DESC
LIMIT 10;
Python Hybrid Search
def hybrid_search(query, top_k=10):
# Generate query embedding
query_embedding = get_embedding(query)
# Full-text search
fts_query = " | ".join(query.split())
with conn.cursor() as cur:
cur.execute("""
SELECT
id,
title,
content,
ts_rank(to_tsvector('english', title || ' ' || content),
plainto_tsquery('english', %s)) AS fts_rank,
1 - (embedding <=> %s) AS vector_rank
FROM documents
WHERE to_tsvector('english', title || ' ' || content)
@@ plainto_tsquery('english', %s)
OR embedding <=> %s < 0.8
ORDER BY (0.5 * fts_rank + 0.5 * (1 - (embedding <=> %s))) DESC
LIMIT %s
""", (query, query_embedding, query, query_embedding, query_embedding, top_k))
return cur.fetchall()
Session Management for LLM Applications
class LLMSessionManager:
def __init__(self, conn):
self.conn = conn
def create_session(self, user_id):
"""Create new chat session"""
with self.conn.cursor() as cur:
cur.execute("""
INSERT INTO chat_sessions (user_id, messages)
VALUES (%s, '[]'::jsonb)
RETURNING session_id
""", (user_id,))
self.conn.commit()
return cur.fetchone()[0]
def add_message(self, session_id, role, content):
"""Add message to session"""
with self.conn.cursor() as cur:
cur.execute("""
UPDATE chat_sessions
SET messages = messages || %s::jsonb,
updated_at = NOW()
WHERE session_id = %s
""", ([{"role": role, "content": content, "timestamp": str(now())}], session_id))
self.conn.commit()
def get_context(self, session_id, max_tokens=4000):
"""Get conversation context"""
with self.conn.cursor() as cur:
cur.execute("""
SELECT messages
FROM chat_sessions
WHERE session_id = %s
""", (session_id,))
result = cur.fetchone()
if not result:
return []
messages = result[0]
# Trim to token limit
return messages[-20:] # Last 20 messages
def get_recent_sessions(self, user_id, limit=10):
"""Get recent sessions for user"""
with self.conn.cursor() as cur:
cur.execute("""
SELECT session_id, created_at, messages
FROM chat_sessions
WHERE user_id = %s
ORDER BY updated_at DESC
LIMIT %s
""", (user_id, limit))
return cur.fetchall()
# Create session table
"""
CREATE TABLE chat_sessions (
session_id SERIAL PRIMARY KEY,
user_id INTEGER REFERENCES users(id),
messages JSONB DEFAULT '[]',
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
"""
Feature Store for ML
Storing Precomputed Features
class FeatureStore:
def __init__(self, conn):
self.conn = conn
def store_features(self, entity_type, entity_id, features):
"""Store precomputed ML features"""
import json
import jsonb
with self.conn.cursor() as cur:
cur.execute("""
INSERT INTO ml_features (entity_type, entity_id, features)
VALUES (%s, %s, %s)
ON CONFLICT (entity_type, entity_id)
DO UPDATE SET features = EXCLUDED.features,
updated_at = NOW()
""", (entity_type, entity_id, json.dumps(features)))
self.conn.commit()
def get_features(self, entity_type, entity_id):
"""Retrieve features for ML inference"""
import json
with self.conn.cursor() as cur:
cur.execute("""
SELECT features
FROM ml_features
WHERE entity_type = %s AND entity_id = %s
""", (entity_type, entity_id))
result = cur.fetchone()
return json.loads(result[0]) if result else None
def batch_get(self, entity_type, entity_ids):
"""Batch retrieve features"""
import json
with self.conn.cursor() as cur:
cur.execute("""
SELECT entity_id, features
FROM ml_features
WHERE entity_type = %s AND entity_id = ANY(%s)
""", (entity_type, entity_ids))
return {
row[0]: json.loads(row[1])
for row in cur.fetchall()
}
# Create feature store table
"""
CREATE TABLE ml_features (
id SERIAL PRIMARY KEY,
entity_type VARCHAR(50) NOT NULL,
entity_id BIGINT NOT NULL,
features JSONB NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
UNIQUE(entity_type, entity_id)
);
CREATE INDEX idx_ml_features_entity ON ml_features (entity_type, entity_id);
"""
Semantic Caching
class SemanticCache:
def __init__(self, conn, ttl=3600):
self.conn = conn
self.ttl = ttl
def get_cached_response(self, query_embedding, threshold=0.95):
"""Find similar cached query"""
with self.conn.cursor() as cur:
cur.execute("""
SELECT response, 1 - (query_embedding <=> %s) AS similarity
FROM query_cache
WHERE query_embedding <=> %s < %s
ORDER BY query_embedding <=> %s
LIMIT 1
""", (query_embedding, query_embedding, 1-threshold, query_embedding))
return cur.fetchone()
def cache_response(self, query, query_embedding, response):
"""Cache query and response"""
with self.conn.cursor() as cur:
cur.execute("""
INSERT INTO query_cache (query, query_embedding, response)
VALUES (%s, %s, %s)
""", (query, query_embedding, response))
self.conn.commit()
# Create cache table
"""
CREATE TABLE query_cache (
id SERIAL PRIMARY KEY,
query TEXT NOT NULL,
query_embedding VECTOR(1536),
response TEXT NOT NULL,
created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
);
CREATE INDEX idx_query_cache_embedding
ON query_cache
USING HNSW (query_embedding vector_cosine_ops);
"""
Best Practices
Performance Optimization
-- 1. Use appropriate index type
-- HNSW for real-time applications
-- IVFFlat for very large datasets
-- 2. Batch inserts
INSERT INTO documents (title, content, embedding)
SELECT title, content, embedding FROM temp_table;
-- 3. Monitor query performance
EXPLAIN ANALYZE
SELECT * FROM documents
ORDER BY embedding <=> $embedding
LIMIT 10;
-- 4. Partition large tables
CREATE TABLE documents (
id SERIAL,
embedding VECTOR(1536),
-- columns
) PARTITION BY RANGE (created_at);
-- 5. Use connection pooling
-- PgBouncer for better performance
Security
-- 1. Enable row-level security
ALTER TABLE documents ENABLE ROW LEVEL SECURITY;
-- 2. Create policies
CREATE POLICY document_access_policy ON documents
FOR SELECT USING (owner_id = current_user_id());
-- 3. Use separate users for different access levels
CREATE ROLE app_readonly;
GRANT SELECT ON documents TO app_readonly;
-- 4. Encrypt sensitive data
-- Use pgcrypto extension
CREATE EXTENSION pgcrypto;
-- Encrypt before storing
INSERT INTO documents (content, encrypted_embedding)
VALUES ('sensitive', pgp_sym_encrypt(%s, 'key'));
Resources
Conclusion
PostgreSQL with pgvector provides a powerful, cost-effective platform for AI applications. From vector similarity search to complete RAG pipelines, session management to feature stores, PostgreSQL can handle the data requirements of modern AI applications while leveraging its existing reliability and ecosystem.
In the final article, we’ll explore real-world production use cases for PostgreSQL across different industries and application types.
Comments