Fine-tuning Large Language Models: Adapting LLMs for Specific Tasks
Fine-tuning adapts pre-trained LLMs to specific tasks or domains. This guide covers efficient fine-tuning methods and practical implementation.
Fine-tuning Fundamentals
Why Fine-tune?
# Pre-trained model limitations
# - General knowledge, not domain-specific
# - May not follow specific instructions
# - Lacks task-specific patterns
# Fine-tuning benefits
# - Adapt to specific domain
# - Improve performance on target task
# - Reduce hallucinations
# - Better instruction following
Full Fine-tuning vs Parameter-Efficient Methods
# Full fine-tuning: Update all parameters
# Pros: Best performance
# Cons: High memory, slow, requires large dataset
# LoRA (Low-Rank Adaptation): Update small matrices
# Pros: Memory efficient, fast, works with small datasets
# Cons: Slightly lower performance
# QLoRA: Quantized LoRA
# Pros: Very memory efficient, runs on consumer GPUs
# Cons: Slower training
# Prefix tuning: Add learnable prefix
# Pros: Efficient, modular
# Cons: Limited flexibility
Full Fine-tuning
Fine-tuning with Hugging Face
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset
import torch
# Load model and tokenizer
model_name = "gpt2"
tokenizer = AutoTokenizer.from_pretrained(model_name)
model = AutoModelForCausalLM.from_pretrained(model_name)
# Prepare dataset
dataset = load_dataset('text', data_files={'train': 'train.txt', 'validation': 'val.txt'})
def tokenize_function(examples):
return tokenizer(examples['text'], truncation=True, max_length=512)
tokenized_dataset = dataset.map(tokenize_function, batched=True)
# Training arguments
training_args = TrainingArguments(
output_dir='./results',
num_train_epochs=3,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
warmup_steps=500,
weight_decay=0.01,
logging_dir='./logs',
logging_steps=100,
evaluation_strategy='epoch',
save_strategy='epoch',
load_best_model_at_end=True,
)
# Trainer
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset['train'],
eval_dataset=tokenized_dataset['validation'],
)
# Train
trainer.train()
# Save model
model.save_pretrained('./fine_tuned_model')
tokenizer.save_pretrained('./fine_tuned_model')
Fine-tuning for Instruction Following
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import Dataset
import json
# Load instruction-following dataset
with open('instructions.jsonl', 'r') as f:
data = [json.loads(line) for line in f]
# Format as instruction-response pairs
formatted_data = []
for item in data:
formatted_data.append({
'text': f"Instruction: {item['instruction']}\nResponse: {item['response']}"
})
dataset = Dataset.from_dict({'text': [d['text'] for d in formatted_data]})
# Tokenize
tokenizer = AutoTokenizer.from_pretrained('gpt2')
tokenizer.pad_token = tokenizer.eos_token
def tokenize_function(examples):
return tokenizer(examples['text'], truncation=True, max_length=512, padding='max_length')
tokenized_dataset = dataset.map(tokenize_function, batched=True)
# Train
model = AutoModelForCausalLM.from_pretrained('gpt2')
training_args = TrainingArguments(
output_dir='./instruction_model',
num_train_epochs=5,
per_device_train_batch_size=4,
learning_rate=2e-5,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
)
trainer.train()
Parameter-Efficient Fine-tuning
LoRA (Low-Rank Adaptation)
from peft import get_peft_model, LoraConfig, TaskType
from transformers import AutoModelForCausalLM, AutoTokenizer
# Load base model
model_name = "meta-llama/Llama-2-7b"
model = AutoModelForCausalLM.from_pretrained(model_name)
tokenizer = AutoTokenizer.from_pretrained(model_name)
# Configure LoRA
lora_config = LoraConfig(
task_type=TaskType.CAUSAL_LM,
r=8, # Rank
lora_alpha=32,
lora_dropout=0.1,
bias="none",
target_modules=["q_proj", "v_proj"], # Which modules to apply LoRA
)
# Apply LoRA
model = get_peft_model(model, lora_config)
# Check trainable parameters
model.print_trainable_parameters()
# Output: trainable params: 4,194,304 || all params: 6,738,415,616 || trainable%: 0.06
# Fine-tune with LoRA
from transformers import TrainingArguments, Trainer
training_args = TrainingArguments(
output_dir='./lora_model',
num_train_epochs=3,
per_device_train_batch_size=8,
learning_rate=1e-4,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset,
)
trainer.train()
# Save LoRA weights
model.save_pretrained('./lora_weights')
QLoRA (Quantized LoRA)
from peft import prepare_model_for_kbit_training, get_peft_model, LoraConfig, TaskType
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
import torch
# Quantization config
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_use_double_quant=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16
)
# Load quantized model
model_name = "meta-llama/Llama-2-7b"
model = AutoModelForCausalLM.from_pretrained(
model_name,
quantization_config=bnb_config,
device_map="auto"
)
# Prepare for training
model = prepare_model_for_kbit_training(model)
# Configure LoRA
lora_config = LoraConfig(
r=16,
lora_alpha=32,
lora_dropout=0.05,
bias="none",
task_type=TaskType.CAUSAL_LM,
target_modules=["q_proj", "v_proj", "k_proj", "o_proj"]
)
# Apply LoRA
model = get_peft_model(model, lora_config)
# Train with QLoRA
# Much more memory efficient than full fine-tuning
Instruction Fine-tuning
Creating Instruction Datasets
import json
# Format 1: Instruction-Response
instruction_data = [
{
"instruction": "What is machine learning?",
"response": "Machine learning is a subset of AI that enables systems to learn from data."
},
{
"instruction": "Explain neural networks",
"response": "Neural networks are computational models inspired by biological neurons..."
}
]
# Format 2: Instruction-Input-Output
instruction_input_output = [
{
"instruction": "Classify sentiment",
"input": "This product is amazing!",
"output": "Positive"
}
]
# Format 3: Conversation
conversation_data = [
{
"messages": [
{"role": "user", "content": "What is Python?"},
{"role": "assistant", "content": "Python is a programming language..."},
{"role": "user", "content": "What are its uses?"},
{"role": "assistant", "content": "Python is used for..."}
]
}
]
# Save as JSONL
with open('instructions.jsonl', 'w') as f:
for item in instruction_data:
f.write(json.dumps(item) + '\n')
Fine-tuning for Chat
from transformers import AutoTokenizer, AutoModelForCausalLM, TrainingArguments, Trainer
from datasets import load_dataset
# Load chat dataset
dataset = load_dataset('json', data_files='chat_data.jsonl')
# Tokenize conversations
tokenizer = AutoTokenizer.from_pretrained('meta-llama/Llama-2-7b-chat')
def format_chat(examples):
formatted = []
for messages in examples['messages']:
text = ""
for msg in messages:
if msg['role'] == 'user':
text += f"User: {msg['content']}\n"
else:
text += f"Assistant: {msg['content']}\n"
formatted.append(text)
return tokenizer(formatted, truncation=True, max_length=512)
tokenized_dataset = dataset.map(format_chat, batched=True)
# Train
model = AutoModelForCausalLM.from_pretrained('meta-llama/Llama-2-7b-chat')
training_args = TrainingArguments(
output_dir='./chat_model',
num_train_epochs=3,
per_device_train_batch_size=4,
learning_rate=2e-5,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=tokenized_dataset['train'],
)
trainer.train()
Evaluation and Testing
from transformers import pipeline
import torch
# Load fine-tuned model
model_path = './fine_tuned_model'
generator = pipeline('text-generation', model=model_path)
# Test generation
test_prompts = [
"Machine learning is",
"The future of AI",
"Python is used for"
]
for prompt in test_prompts:
result = generator(prompt, max_length=50, num_return_sequences=1)
print(f"Prompt: {prompt}")
print(f"Generated: {result[0]['generated_text']}\n")
# Evaluate on benchmark
from datasets import load_dataset
from torch.utils.data import DataLoader
test_dataset = load_dataset('glue', 'mrpc', split='test')
# Compute metrics
from sklearn.metrics import accuracy_score, f1_score
predictions = []
labels = []
for batch in DataLoader(test_dataset, batch_size=8):
with torch.no_grad():
outputs = model(**batch)
preds = torch.argmax(outputs.logits, dim=-1)
predictions.extend(preds.cpu().numpy())
labels.extend(batch['label'].numpy())
accuracy = accuracy_score(labels, predictions)
f1 = f1_score(labels, predictions, average='weighted')
print(f"Accuracy: {accuracy:.4f}")
print(f"F1 Score: {f1:.4f}")
Best Practices
- Start with small learning rate: 1e-5 to 5e-5 for fine-tuning
- Use validation set: Monitor overfitting
- Gradient accumulation: Simulate larger batch sizes
- Warmup steps: Gradually increase learning rate
- Early stopping: Stop when validation loss plateaus
- Save checkpoints: Save best model during training
- Use LoRA for efficiency: Especially for large models
Common Pitfalls
Bad Practice:
# Don't: Use high learning rate
training_args = TrainingArguments(learning_rate=0.01)
# Don't: No validation
trainer = Trainer(model=model, args=args, train_dataset=train_data)
# Don't: Train on full dataset without splitting
model.fit(all_data, epochs=100)
Good Practice:
# Do: Use appropriate learning rate
training_args = TrainingArguments(learning_rate=2e-5)
# Do: Monitor validation
trainer = Trainer(
model=model,
args=args,
train_dataset=train_data,
eval_dataset=val_data,
)
# Do: Split data properly
train_data, val_data = train_test_split(data, test_size=0.2)
Conclusion
Fine-tuning adapts pre-trained LLMs to specific tasks efficiently. Use parameter-efficient methods like LoRA for resource-constrained environments. Carefully prepare datasets, monitor training, and evaluate on appropriate benchmarks to build effective domain-specific models.
Comments