TensorFlow & Keras: Building Neural Networks with Python

TensorFlow is Google’s open-source machine learning framework, and Keras is its high-level API that simplifies neural network development. Together, they provide a powerful platform for building and deploying deep learning models.

Getting Started with TensorFlow and Keras

Installation

# Install TensorFlow (includes Keras)
pip install tensorflow

# Verify installation
python -c "import tensorflow as tf; print(tf.__version__)"

Basic Concepts

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
import numpy as np

# TensorFlow works with tensors (multi-dimensional arrays)
# Tensors are immutable, similar to NumPy arrays but optimized for GPU

# Create tensors
tensor_0d = tf.constant(5)  # Scalar
tensor_1d = tf.constant([1, 2, 3])  # Vector
tensor_2d = tf.constant([[1, 2], [3, 4]])  # Matrix
tensor_3d = tf.constant([[[1, 2], [3, 4]], [[5, 6], [7, 8]]])  # 3D tensor

print(f"Scalar shape: {tensor_0d.shape}")
print(f"Vector shape: {tensor_1d.shape}")
print(f"Matrix shape: {tensor_2d.shape}")
print(f"3D tensor shape: {tensor_3d.shape}")

# Tensor operations
a = tf.constant([[1, 2], [3, 4]], dtype=tf.float32)
b = tf.constant([[5, 6], [7, 8]], dtype=tf.float32)

print(f"Addition: {tf.add(a, b)}")
print(f"Matrix multiplication: {tf.matmul(a, b)}")
print(f"Element-wise multiplication: {tf.multiply(a, b)}")

Building Models with Keras

Sequential API (Simple Models)

The Sequential API is ideal for linear stacks of layers.

from tensorflow.keras import Sequential, layers
from tensorflow.keras.datasets import mnist
from tensorflow.keras.utils import to_categorical

# Load and preprocess data
(X_train, y_train), (X_test, y_test) = mnist.load_data()

# Normalize pixel values
X_train = X_train.astype('float32') / 255.0
X_test = X_test.astype('float32') / 255.0

# Flatten images
X_train_flat = X_train.reshape(-1, 28*28)
X_test_flat = X_test.reshape(-1, 28*28)

# One-hot encode labels
y_train_encoded = to_categorical(y_train, 10)
y_test_encoded = to_categorical(y_test, 10)

# Build sequential model
model = Sequential([
    layers.Dense(128, activation='relu', input_shape=(784,)),
    layers.Dropout(0.2),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.2),
    layers.Dense(32, activation='relu'),
    layers.Dense(10, activation='softmax')
])

# Compile model
model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Display model architecture
model.summary()

# Train model
history = model.fit(
    X_train_flat, y_train_encoded,
    epochs=10,
    batch_size=32,
    validation_split=0.2,
    verbose=1
)

# Evaluate on test set
test_loss, test_accuracy = model.evaluate(X_test_flat, y_test_encoded)
print(f"Test Accuracy: {test_accuracy:.4f}")

# Make predictions
predictions = model.predict(X_test_flat[:5])
print(f"Predicted classes: {np.argmax(predictions, axis=1)}")

Functional API (Complex Models)

The Functional API allows building more complex architectures with multiple inputs/outputs.

from tensorflow.keras import Input, Model, layers

# Define inputs
input_layer = Input(shape=(784,))

# Build layers
x = layers.Dense(128, activation='relu')(input_layer)
x = layers.Dropout(0.2)(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dropout(0.2)(x)

# Multiple outputs
output_main = layers.Dense(10, activation='softmax', name='main_output')(x)
output_aux = layers.Dense(10, activation='softmax', name='aux_output')(x)

# Create model
model = Model(inputs=input_layer, outputs=[output_main, output_aux])

model.compile(
    optimizer='adam',
    loss=['categorical_crossentropy', 'categorical_crossentropy'],
    loss_weights=[1.0, 0.5],
    metrics=['accuracy']
)

# Train with multiple outputs
# model.fit(X_train_flat, [y_train_encoded, y_train_encoded], epochs=10)

Custom Models with Subclassing

For maximum flexibility, subclass the Model class.

class CustomModel(keras.Model):
    def __init__(self, num_classes=10):
        super(CustomModel, self).__init__()
        self.dense1 = layers.Dense(128, activation='relu')
        self.dropout1 = layers.Dropout(0.2)
        self.dense2 = layers.Dense(64, activation='relu')
        self.dropout2 = layers.Dropout(0.2)
        self.dense3 = layers.Dense(num_classes, activation='softmax')
    
    def call(self, inputs, training=False):
        x = self.dense1(inputs)
        x = self.dropout1(x, training=training)
        x = self.dense2(x)
        x = self.dropout2(x, training=training)
        return self.dense3(x)

# Create and compile custom model
custom_model = CustomModel()
custom_model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# custom_model.fit(X_train_flat, y_train_encoded, epochs=10)

Advanced Training Techniques

Custom Training Loop

For fine-grained control over training:

@tf.function
def train_step(model, x, y, loss_fn, optimizer):
    with tf.GradientTape() as tape:
        logits = model(x, training=True)
        loss_value = loss_fn(y, logits)
    
    grads = tape.gradient(loss_value, model.trainable_weights)
    optimizer.apply_gradients(zip(grads, model.trainable_weights))
    
    return loss_value

# Custom training loop
model = Sequential([
    layers.Dense(128, activation='relu', input_shape=(784,)),
    layers.Dense(10, activation='softmax')
])

optimizer = keras.optimizers.Adam(learning_rate=0.001)
loss_fn = keras.losses.CategoricalCrossentropy()

epochs = 5
for epoch in range(epochs):
    print(f"Epoch {epoch + 1}/{epochs}")
    
    for step, (x_batch, y_batch) in enumerate(
        tf.data.Dataset.from_tensor_slices((X_train_flat, y_train_encoded))
        .shuffle(10000)
        .batch(32)
    ):
        loss = train_step(model, x_batch, y_batch, loss_fn, optimizer)
        
        if step % 100 == 0:
            print(f"Step {step}, Loss: {loss:.4f}")

Learning Rate Scheduling

Adjust learning rate during training for better convergence:

# Exponential decay
initial_learning_rate = 0.1
lr_schedule = keras.optimizers.schedules.ExponentialDecay(
    initial_learning_rate,
    decay_steps=10000,
    decay_rate=0.96,
    staircase=True
)

optimizer = keras.optimizers.Adam(learning_rate=lr_schedule)

# Step decay
def lr_scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return lr * tf.math.exp(-0.1)

lr_callback = keras.callbacks.LearningRateScheduler(lr_scheduler)

# model.fit(X_train, y_train, callbacks=[lr_callback], epochs=20)

Callbacks for Monitoring

# Early stopping
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    patience=3,
    restore_best_weights=True
)

# Model checkpoint
model_checkpoint = keras.callbacks.ModelCheckpoint(
    'best_model.h5',
    monitor='val_accuracy',
    save_best_only=True
)

# Reduce learning rate on plateau
reduce_lr = keras.callbacks.ReduceLROnPlateau(
    monitor='val_loss',
    factor=0.5,
    patience=2,
    min_lr=1e-7
)

# Custom callback
class CustomCallback(keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs=None):
        if logs['accuracy'] > 0.95:
            print(f"\nReached 95% accuracy at epoch {epoch}")
            self.model.stop_training = True

# model.fit(X_train, y_train,
#          callbacks=[early_stopping, model_checkpoint, reduce_lr, CustomCallback()],
#          epochs=100)

Convolutional Neural Networks (CNN)

# CNN for image classification
cnn_model = Sequential([
    layers.Conv2D(32, (3, 3), activation='relu', input_shape=(28, 28, 1)),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.MaxPooling2D((2, 2)),
    layers.Conv2D(64, (3, 3), activation='relu'),
    layers.Flatten(),
    layers.Dense(64, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

cnn_model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy']
)

# Reshape data for CNN (add channel dimension)
X_train_cnn = X_train.reshape(-1, 28, 28, 1)
X_test_cnn = X_test.reshape(-1, 28, 28, 1)

# cnn_model.fit(X_train_cnn, y_train, epochs=10, validation_split=0.2)

Transfer Learning

Leverage pre-trained models for faster training:

from tensorflow.keras.applications import VGG16
from tensorflow.keras.preprocessing import image

# Load pre-trained VGG16 model
base_model = VGG16(weights='imagenet', include_top=False, input_shape=(224, 224, 3))

# Freeze base model weights
base_model.trainable = False

# Add custom layers
model = Sequential([
    base_model,
    layers.Flatten(),
    layers.Dense(256, activation='relu'),
    layers.Dropout(0.5),
    layers.Dense(10, activation='softmax')
])

model.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

# Fine-tune: unfreeze some layers
base_model.trainable = True
for layer in base_model.layers[:-4]:
    layer.trainable = False

# Recompile with lower learning rate
model.compile(
    optimizer=keras.optimizers.Adam(learning_rate=1e-5),
    loss='categorical_crossentropy',
    metrics=['accuracy']
)

Model Saving and Loading

# Save entire model
model.save('my_model.h5')
model.save('my_model')  # SavedModel format

# Load model
loaded_model = keras.models.load_model('my_model.h5')

# Save only weights
model.save_weights('model_weights.h5')

# Load weights into new model
new_model = Sequential([...])
new_model.load_weights('model_weights.h5')

# Save model architecture as JSON
model_json = model.to_json()
with open('model_architecture.json', 'w') as f:
    f.write(model_json)

# Load architecture from JSON
with open('model_architecture.json', 'r') as f:
    loaded_model = keras.models.model_from_json(f.read())

Best Practices

Data Preprocessing: Normalize inputs, handle missing values
Batch Normalization: Stabilizes training, allows higher learning rates
Dropout: Prevents overfitting in deep networks
Validation Monitoring: Always use validation set to detect overfitting
Checkpointing: Save best model during training
Learning Rate Tuning: Start with default, adjust based on training curves
Regularization: Use L1/L2 regularization for weight constraints

Common Pitfalls

Bad Practice:

# Don't: Train without validation
model.fit(X_train, y_train, epochs=100)

# Don't: Use raw, unscaled data
model.fit(X_raw, y)  # X_raw has values in [0, 10000]

# Don't: Ignore class imbalance
model.fit(X_imbalanced, y_imbalanced)

Good Practice:

# Do: Monitor validation loss
model.fit(X_train, y_train, 
         validation_split=0.2,
         callbacks=[early_stopping])

# Do: Normalize data
X_normalized = X / 255.0

# Do: Handle class imbalance
class_weights = {0: 1.0, 1: 5.0}  # Weight minority class higher
model.fit(X, y, class_weight=class_weights)

Conclusion

TensorFlow and Keras provide a comprehensive ecosystem for deep learning. The Sequential API handles most use cases, while the Functional API and subclassing offer flexibility for complex architectures. Master the fundamentals, use callbacks for monitoring, and always validate on separate data. The combination of TensorFlow’s power and Keras’s simplicity makes building neural networks accessible to developers of all levels.