Introduction
The emergence of sqlite-vec has transformed SQLite from a traditional database into a viable vector database for AI applications. This development enables local-first AI applications, privacy-preserving embeddings storage, and cost-effective RAG (Retrieval-Augmented Generation) pipelines.
In 2026, SQLite’s vector search capabilities are being adopted for applications ranging from semantic search to recommendation systems, all running locally without external database dependencies.
Understanding Vector Search
What are Embeddings?
Embeddings are numerical representations of data (text, images, audio) that capture semantic meaning. Similar items have similar embedding vectors.
import numpy as np
# Simple embedding example
texts = ["cat", "dog", "computer", "keyboard"]
# Embeddings (simplified)
# In reality, use sentence-transformers, OpenAI, etc.
embeddings = {
"cat": [0.1, 0.2, 0.9], # Similar to dog
"dog": [0.1, 0.2, 0.85], # Similar to cat
"computer": [0.8, 0.9, 0.1], # Similar to keyboard
"keyboard": [0.8, 0.85, 0.1] # Similar to computer
}
# Cosine similarity: measure of similarity
def cosine_similarity(a, b):
return np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))
print(cosine_similarity(embeddings["cat"], embeddings["dog"])) # ~0.99
print(cosine_similarity(embeddings["cat"], embeddings["computer"])) # ~0.17
Vector Search in Database
-- Traditional database: exact match
SELECT * FROM documents WHERE title = 'Python Tutorial';
-- Vector database: semantic similarity
-- Find documents most similar to query
SELECT id, title, distance
FROM document_vectors
WHERE embedding MATCH query_embedding
LIMIT 5;
Setting Up sqlite-vec
Installation
# Clone repository
git clone https://github.com/asg017/sqlite-vec.git
cd sqlite-vec
# Build
make
# This creates:
# vec0.dylib (macOS)
# vec0.so (Linux)
# vec0.dll (Windows)
Loading the Extension
import sqlite3
# Load extension
conn = sqlite3.connect('vectors.db')
conn.enable_load_extension(True)
# Load vec extension
conn.load_extension('./vec0')
# Verify
result = conn.execute("SELECT vec0_version()").fetchone()
print(f"sqlite-vec version: {result[0]}")
# Check extension loaded
tables = conn.execute(
"SELECT name FROM sqlite_master WHERE type='table'"
).fetchall()
print("Tables:", tables)
Alternative: Using Pre-built Binaries
# Download pre-built binaries
import urllib.request
import platform
import os
def get_vec_extension():
system = platform.system()
arch = platform.machine()
if system == 'Linux':
url = "https://github.com/asg017/sqlite-vec/releases/latest/download/vec0-linux-x86_64.so"
ext = ".so"
elif system == 'Darwin':
url = "https://github.com/asg017/sqlite-vec/releases/latest/download/vec0-darwin-x86_64.dylib"
ext = ".dylib"
else:
raise NotImplementedError("Windows not supported")
path = f"vec0{ext}"
if not os.path.exists(path):
urllib.request.urlretrieve(url, path)
return path
vec_path = get_vec_extension()
print(f"Loaded: {vec_path}")
Creating Vector Tables
Basic Vector Table
-- Load extension
SELECT load_extension('./vec0');
-- Create virtual table for embeddings
CREATE VIRTUAL TABLE article_vectors USING vec0(
id INTEGER PRIMARY KEY,
embedding float[768], -- 768-dimensional
title TEXT NOT NULL,
content TEXT
);
-- Or with automatic schema
CREATE VIRTUAL TABLE doc_vectors USING vec0(
id INTEGER PRIMARY KEY,
embedding float[1536],
metadata JSON
);
Supported Vector Types
-- Float vectors (most common)
embedding float[768]
-- Integer vectors (quantized)
embedding int8[768]
-- Binary vectors (binary embeddings)
embedding bit[768]
Schema Details
-- Check table schema
.schema article_vectors
-- Output:
-- CREATE VIRTUAL TABLE article_vectors USING vec0(
-- id INTEGER PRIMARY KEY,
-- embedding float[768],
-- title TEXT NOT NULL,
-- content TEXT
-- );
-- Get table info (custom pragma)
PRAGMA table_info(article_vectors);
Inserting Vectors
Python Insert Example
import sqlite3
import numpy as np
def create_vector_table(conn):
"""Create vector table with metadata."""
conn.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS articles USING vec0(
id INTEGER PRIMARY KEY,
embedding float[384],
title TEXT,
content TEXT,
author TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""")
def insert_article(conn, title, content, embedding):
"""Insert article with embedding."""
conn.execute(
"""
INSERT INTO articles (title, content, embedding)
VALUES (?, ?, ?)
""",
(title, content, embedding.tobytes())
)
# Example usage
conn = sqlite3.connect('knowledge.db')
conn.enable_load_extension(True)
conn.load_extension('./vec0')
create_vector_table(conn)
# Generate embedding (using sentence-transformers)
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v2')
articles = [
("Python Basics", "Python is a high-level programming language..."),
("JavaScript Guide", "JavaScript is a versatile scripting language..."),
("Database Design", "Good database design is crucial for performance..."),
]
for title, content in articles:
embedding = model.encode(content)
insert_article(conn, title, content, embedding)
conn.commit()
print("Articles inserted with embeddings")
Bulk Insert
import sqlite3
import numpy as np
def bulk_insert_vectors(conn, items, batch_size=100):
"""Bulk insert vectors with batching."""
conn.execute("BEGIN")
for i in range(0, len(items), batch_size):
batch = items[i:i+batch_size]
for item in batch:
conn.execute(
"INSERT INTO vectors (data, embedding) VALUES (?, ?)",
(item['data'], item['embedding'].tobytes())
)
conn.execute("COMMIT")
conn.execute("BEGIN")
conn.execute("COMMIT")
# Usage
vectors = [
{'data': 'Document 1', 'embedding': np.random.randn(384)},
{'data': 'Document 2', 'embedding': np.random.randn(384)},
# ... more items
]
bulk_insert_vectors(conn, vectors)
Vector Similarity Search
Basic KNN Search
-- Find 5 most similar documents
SELECT
id,
title,
distance
FROM articles
WHERE embedding MATCH ?
LIMIT 5;
-- Using k parameter
SELECT id, distance
FROM articles
WHERE embedding MATCH ? AND k = 10;
Python Search Implementation
import sqlite3
import numpy as np
from sentence_transformers import SentenceTransformer
def semantic_search(conn, query, top_k=5):
"""Search for similar articles using semantic query."""
# Generate query embedding
model = SentenceTransformer('all-MiniLM-L6-v6')
query_embedding = model.encode(query)
# Search
results = conn.execute("""
SELECT
id,
title,
content,
distance
FROM articles
WHERE embedding MATCH ?
LIMIT ?
""", (query_embedding.tobytes(), top_k)).fetchall()
return results
# Usage
results = semantic_search(conn, "programming language tutorial")
for row in results:
print(f"ID: {row[0]}, Title: {row[1]}, Distance: {row[2]:.4f}")
print(f"Content: {row[3][:100]}...")
print("---")
Advanced Search
-- Filtered search
SELECT id, title, distance
FROM articles
WHERE embedding MATCH ?
AND author = 'John Doe'
LIMIT 10;
-- Distance threshold
SELECT id, title, distance
FROM articles
WHERE embedding MATCH ?
AND k = 20
AND distance < 0.5;
-- Combined with metadata search
SELECT id, title, distance
FROM articles
WHERE embedding MATCH ?
AND (author LIKE '%John%' OR title LIKE '%Python%')
LIMIT 10;
Building RAG Pipelines
RAG Architecture with SQLite
import sqlite3
import numpy as np
from sentence_transformers import SentenceTransformer
import openai
class SQLiteRAG:
"""RAG pipeline using SQLite for vector storage."""
def __init__(self, db_path, model_name='all-MiniLM-L6-v2'):
self.conn = sqlite3.connect(db_path)
self.conn.enable_load_extension(True)
self.conn.load_extension('./vec0')
self.model = SentenceTransformer(model_name)
self._init_database()
def _init_database(self):
"""Initialize database tables."""
self.conn.execute("""
CREATE VIRTUAL TABLE IF NOT EXISTS chunks USING vec0(
id INTEGER PRIMARY KEY,
chunk_text TEXT,
embedding float[384],
source TEXT,
chunk_index INTEGER
)
""")
self.conn.execute("""
CREATE TABLE IF NOT EXISTS metadata (
id INTEGER PRIMARY KEY,
source TEXT,
total_chunks INTEGER,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
)
""")
def ingest_document(self, text, source, chunk_size=500, overlap=50):
"""Split document into chunks and store with embeddings."""
chunks = self._chunk_text(text, chunk_size, overlap)
for i, chunk in enumerate(chunks):
embedding = self.model.encode(chunk)
self.conn.execute(
"""
INSERT INTO chunks (chunk_text, embedding, source, chunk_index)
VALUES (?, ?, ?, ?)
""",
(chunk, embedding.tobytes(), source, i)
)
self.conn.execute(
"INSERT INTO metadata (source, total_chunks) VALUES (?, ?)",
(source, len(chunks))
)
self.conn.commit()
def _chunk_text(self, text, size, overlap):
"""Split text into overlapping chunks."""
chunks = []
start = 0
while start < len(text):
end = start + size
chunks.append(text[start:end])
start = end - overlap
return chunks
def retrieve(self, query, top_k=5):
"""Retrieve relevant chunks for query."""
query_embedding = self.model.encode(query)
results = self.conn.execute("""
SELECT
chunk_text,
source,
distance
FROM chunks
WHERE embedding MATCH ?
LIMIT ?
""", (query_embedding.tobytes(), top_k)).fetchall()
return results
def answer(self, question, context_limit=3000):
"""Answer question using retrieved context."""
# Retrieve relevant chunks
chunks = self.retrieve(question, top_k=10)
# Build context
context = ""
for chunk in chunks:
if len(context) + len(chunk[0]) > context_limit:
break
context += chunk[0] + "\n\n"
# Generate answer using LLM
prompt = f"""Based on the following context, answer the question.
Context:
{context}
Question: {question}
Answer:"""
response = openai.ChatCompletion.create(
model="gpt-4",
messages=[{"role": "user", "content": prompt}],
temperature=0
)
return response.choices[0].message.content
# Usage
rag = SQLiteRAG('knowledge.db')
# Ingest documents
with open('document.txt') as f:
rag.ingest_document(f.read(), 'document.txt')
# Ask questions
answer = rag.answer("What is Python?")
print(answer)
Hybrid Search
-- Combine vector and keyword search
WITH vector_results AS (
SELECT id, chunk_text, distance as vec_dist
FROM chunks
WHERE embedding MATCH ?
LIMIT 10
)
SELECT
v.id,
v.chunk_text,
v.vec_dist,
bm25(chunks, ?) as keyword_score
FROM vector_results v
JOIN chunks c ON v.id = c.id
ORDER BY v.vec_dist + keyword_score
LIMIT 10;
Local AI Applications
Personal AI Assistant
import sqlite3
import numpy as np
from sentence_transformers import SentenceTransformer
import json
class PersonalKnowledgeBase:
"""Local knowledge base with semantic search."""
def __init__(self, db_path):
self.conn = sqlite3.connect(db_path)
self.conn.enable_load_extension(True)
self.conn.load_extension('./vec0')
self.model = SentenceTransformer('all-MiniLM-L6-v6')
self._setup()
def _setup(self):
"""Setup database schema."""
self.conn.executescript("""
CREATE VIRTUAL TABLE IF NOT EXISTS memories USING vec0(
id INTEGER PRIMARY KEY,
content TEXT,
embedding float[384],
tags TEXT,
created_at TEXT DEFAULT CURRENT_TIMESTAMP
);
CREATE TABLE IF NOT EXISTS settings (
key TEXT PRIMARY KEY,
value TEXT
);
""")
self.conn.commit()
def add_memory(self, content, tags=None):
"""Add a memory with embedding."""
embedding = self.model.encode(content)
self.conn.execute(
"INSERT INTO memories (content, embedding, tags) VALUES (?, ?, ?)",
(content, embedding.tobytes(), json.dumps(tags or []))
)
self.conn.commit()
def search_memories(self, query, limit=5):
"""Search memories semantically."""
query_embedding = self.model.encode(query)
results = self.conn.execute("""
SELECT
content,
tags,
distance,
created_at
FROM memories
WHERE embedding MATCH ?
LIMIT ?
""", (query_embedding.tobytes(), limit)).fetchall()
return [{
'content': r[0],
'tags': json.loads(r[1]),
'distance': r[2],
'created_at': r[3]
} for r in results]
def get_stats(self):
"""Get knowledge base statistics."""
count = self.conn.execute(
"SELECT COUNT(*) FROM memories"
).fetchone()[0]
return {'total_memories': count}
# Usage
kb = PersonalKnowledgeBase('my_knowledge.db')
# Add memories
kb.add_memory(
"I love programming in Python and have been using it for 5 years",
tags=['programming', 'python', 'hobbies']
)
kb.add_memory(
"My favorite books are science fiction novels by Isaac Asimov",
tags=['books', 'hobbies', 'scifi']
)
# Search
results = kb.search_memories("What do I like to code in?")
for r in results:
print(f"Content: {r['content']}")
print(f"Distance: {r['distance']:.4f}")
print(f"Tags: {r['tags']}")
print("---")
Document Q&A System
class DocumentQASystem:
"""Simple document Q&A with SQLite vectors."""
def __init__(self, db_path):
self.conn = sqlite3.connect(db_path)
self.conn.enable_load_extension(True)
self.conn.load_extension('./vec0')
self._init_tables()
def _init_tables(self):
"""Initialize tables."""
self.conn.executescript("""
CREATE VIRTUAL TABLE IF NOT EXISTS docs USING vec0(
id INTEGER PRIMARY KEY,
title TEXT,
chunk_text TEXT,
embedding float[384],
doc_name TEXT
);
CREATE INDEX IF NOT EXISTS idx_doc_name ON docs(doc_name);
""")
def load_document(self, doc_name, text, chunk_size=300):
"""Load document into vector store."""
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v6')
# Chunk
chunks = [text[i:i+chunk_size] for i in range(0, len(text), chunk_size)]
# Insert with embeddings
for i, chunk in enumerate(chunks):
embedding = model.encode(chunk)
self.conn.execute(
"""
INSERT INTO docs (title, chunk_text, embedding, doc_name)
VALUES (?, ?, ?, ?)
""",
(f"{doc_name} - Part {i+1}", chunk, embedding.tobytes(), doc_name)
)
self.conn.commit()
def query(self, question, top_k=3):
"""Query document."""
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('all-MiniLM-L6-v6')
query_embedding = model.encode(question)
results = self.conn.execute("""
SELECT chunk_text, doc_name, distance
FROM docs
WHERE embedding MATCH ?
LIMIT ?
""", (query_embedding.tobytes(), top_k)).fetchall()
return results
Performance Optimization
Indexing Strategies
-- Use appropriate vector dimensions
-- Smaller dimensions = faster search, less accuracy
-- Create metadata indexes
CREATE INDEX idx_source ON docs(source);
CREATE INDEX idx_created ON docs(created_at);
-- Analyze table for query optimization
ANALYZE docs;
Batch Processing
def efficient_batch_search(conn, queries, embedding_model):
"""Efficiently search multiple queries."""
# Pre-generate all query embeddings
query_embeddings = [model.encode(q) for q in queries]
results = []
for q, emb in zip(queries, query_embeddings):
result = conn.execute(
"SELECT id, distance FROM docs WHERE embedding MATCH ? LIMIT 1",
(emb.tobytes(),)
).fetchone()
results.append(result)
return results
# Or use batch query (limited support)
def batch_search(conn, queries, model):
"""Search multiple queries in one call (if supported)."""
# Some vec versions support batch
pass
Memory Management
-- Adjust cache for vector workloads
PRAGMA cache_size = -256000; -- 256MB
-- Use WAL mode for concurrent access
PRAGMA journal_mode = WAL;
-- Consider memory-mapped I/O
PRAGMA mmap_size = 1073741824; -- 1GB
Deployment Considerations
Mobile Deployment
// iOS using SQLite.swift + custom vec integration
import SQLite
// Limitations:
// - Must compile vec extension for iOS
// - Limited vector dimensions recommended
// - Consider quantized vectors (int8)
// Android with SQLCipher + vec
// Similar considerations as iOS
// Pre-compile .so for ARM64/ARMv7
// Use smaller embedding models
Edge Computing
# Raspberry Pi / Edge deployment
# Use smaller models: all-MiniLM-L6-v2 (384 dim)
# Quantized vectors: int8
# Example: quantized vectors
from sentence_transformers import SentenceTransformer
import numpy as np
model = SentenceTransformer('all-MiniLM-L6-v6')
embedding = model.encode("query")
# Quantize to int8
quantized = (embedding * 127).astype(np.int8)
# Store as blob
conn.execute(
"INSERT INTO vectors (embedding) VALUES (?)",
(quantized.tobytes(),)
)
Best Practices
Embedding Model Selection
# Recommended models for different use cases
# Small & fast (384 dims)
model = SentenceTransformer('all-MiniLM-L6-v2')
# Better quality (768 dims)
model = SentenceTransformer('all-mpnet-base-v2')
# For specific domains:
# - bge-small-en (good general purpose)
# - bge-base-en (higher quality)
# - e5-small-v2 (good for retrieval)
Data Management
-- Regular cleanup of old embeddings
DELETE FROM vectors
WHERE created_at < datetime('now', '-90 days');
-- Update stale vectors
UPDATE vectors
SET embedding = ?,
updated_at = CURRENT_TIMESTAMP
WHERE id = ?;
-- Monitor vector table size
PRAGMA page_count;
PRAGMA page_size;
Alternatives and Complementary Tools
# SQLite + other vector stores
# For larger scale:
# - ChromaDB (dedicated vector DB)
# - Pinecone (cloud)
# - Weaviate (open source)
# - Milvus (open source)
# For SQLite specifically:
# - sqlite-vec (main)
# - sqlite-vss (older, deprecated)
# - pgvector (PostgreSQL)
# When to use SQLite vectors:
# โ Local/edge applications
# โ Small to medium datasets (<100k vectors)
# โ Single-user applications
# โ Simple deployment requirements
Resources
Conclusion
SQLite with sqlite-vec enables powerful local AI applications that were previously impossible without external vector databases. From personal knowledge bases to RAG pipelines, SQLite provides a lightweight, privacy-preserving solution for vector storage and retrieval.
In the next article, we’ll explore production use cases for SQLite, including real-world patterns and implementation strategies.
Comments