Skip to main content
โšก Calmops

OpenTelemetry Complete Guide: Universal Observability

Introduction

OpenTelemetry (OTel) is an open-source observability framework that provides vendor-neutral APIs, SDKs, and tools for collecting telemetry data. This guide covers everything you need to implement unified observability.

Understanding OpenTelemetry

What is OpenTelemetry?

OpenTelemetry provides:

  • Vendor-neutral APIs
  • Language SDKs
  • Collector components
  • Protocol standards
graph LR
    subgraph "Applications"
        App1[App 1]
        App2[App 2]
        App3[App 3]
    end
    
    subgraph "OpenTelemetry"
        SDK[SDKs]
        Collector[Collector]
    end
    
    subgraph "Backends"
        Jaeger[Jaeger]
        Prometheus[Prometheus]
        Grafana[Grafana]
    end
    
    App1 --> SDK
    App2 --> SDK
    App3 --> SDK
    SDK --> Collector
    Collector --> Jaeger
    Collector --> Prometheus
    Collector --> Grafana

Key Concepts

Concept Description
Trace Request path through system
Span Single operation in trace
Metric Numeric measurement
Log Discrete event
Attribute Key-value metadata

Setting Up

Installation

# Install packages
npm install @opentelemetry/api
npm install @opentelemetry/sdk-node
npm install @opentelemetry/auto-instrumentations-node
npm install @opentelemetry/exporter-trace-otlp-grpc

Basic Configuration

// tracing.ts
import { NodeSDK } from '@opentelemetry/sdk-node';
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-grpc';
import { HttpInstrumentation } from '@opentelemetry/instrumentation-http';
import { ExpressInstrumentation } from '@opentelemetry/instrumentation-express';

const sdk = new NodeSDK({
  traceExporter: new OTLPTraceExporter({
    url: 'localhost:4317',
  }),
  instrumentations: [
    new HttpInstrumentation(),
    new ExpressInstrumentation(),
  ],
});

sdk.start();

Tracing

Creating Spans

import { trace, SpanKind } from '@opentelemetry/api';

const tracer = trace.getTracer('my-service');

async function processOrder(orderId: string) {
  return tracer.startActiveSpan('processOrder', async (span) => {
    try {
      // Add attributes
      span.setAttribute('order.id', orderId);
      span.setAttribute('customer.region', 'us-east');
      
      // Nested spans
      await validateOrder(orderId);
      await chargePayment(orderId);
      await shipOrder(orderId);
      
      span.setAttribute('order.status', 'completed');
    } catch (error) {
      span.setAttribute('error', true);
      span.recordException(error);
      throw error;
    } finally {
      span.end();
    }
  });
}

Context Propagation

import { propagation, ROOT_CONTEXT } from '@opentelemetry/api';

// Inject context (sender)
const carrier = {};
propagation.inject(ROOT_CONTEXT, carrier);

// Extract context (receiver)
const ctx = propagation.extract(diagnosticContext, carrier);

Metrics

Creating Metrics

import { metrics } from '@opentelemetry/api';

const meter = metrics.getMeter('my-service');

// Counter
const requestCounter = meter.createCounter('http.requests', {
  description: 'Total HTTP requests',
});

// Histogram
const requestDuration = meter.createHistogram('http.duration', {
  description: 'HTTP request duration',
  unit: 'ms',
});

// UpDownCounter
const activeConnections = meter.createUpDownCounter('connections.active', {
  description: 'Active connections',
});

// Record values
requestCounter.add(1, { method: 'GET', status: 200 });
requestDuration.record(45.6, { endpoint: '/api/users' });

Logs

Structured Logging

import { logs, LogSeverityNumber } from '@opentelemetry/api';

const logger = logs.getLogger('my-service');

logger.emit({
  severityNumber: LogSeverityNumber.INFO,
  body: 'Request processed',
  attributes: {
    'order.id': '12345',
    'customer.id': '67890',
    'duration.ms': 45,
  },
});

Collector

Configuration

# otel-collector-config.yaml
receivers:
  otlp:
    protocols:
      grpc:
        endpoint: 0.0.0.0:4317
      http:
        endpoint: 0.0.0.0:4318
  
  prometheus:
    config:
      scrape_configs:
        - job_name: 'otel-collector'
          scrape_interval: 10s

processors:
  batch:
    timeout: 5s
    send_batch_size: 1000
  
  memory_limiter:
    check_interval: 1s
    limit_mib: 400

exporters:
  jaeger:
    endpoint: jaeger:14250
    tls:
      insecure: true
  
  prometheus:
    endpoint: 0.0.0.0:8889
  
  otlp:
    endpoint: otlp-backend:4317

service:
  pipelines:
    traces:
      receivers: [otlp]
      processors: [batch, memory_limiter]
      exporters: [jaeger, otlp]
    metrics:
      receivers: [otlp, prometheus]
      processors: [batch]
      exporters: [prometheus, otlp]
    logs:
      receivers: [otlp]
      processors: [batch]
      exporters: [otlp]

Docker Compose

# docker-compose.yml
services:
  otel-collector:
    image: otel/opentelemetry-collector:latest
    volumes:
      - ./otel-collector-config.yaml:/etc/otelcol-contrib/config.yaml
    ports:
      - "4317:4317"
      - "4318:4318"
      - "8889:8889"
  
  jaeger:
    image: jaegertracing/all-in-one:latest
    ports:
      - "16686:16686"
  
  prometheus:
    image: prom/prometheus:latest
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml

Instrumentation

Auto-Instrumentation

// node-auto-init.js
import { NodeSDK } from '@opentelemetry/sdk-node';
import { getNodeAutoInstrumentations } from '@opentelemetry/auto-instrumentations-node';
import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-grpc';

const sdk = new NodeSDK({
  traceExporter: new OTLPTraceExporter(),
  instrumentations: [getNodeAutoInstrumentations()],
});

sdk.start();
# Run with auto-instrumentation
node --require ./node-auto-init.js app.js

Framework Support

// Express
import { ExpressInstrumentation } from '@opentelemetry/instrumentation-express';

// Fastify
import { FastifyInstrumentation } from '@opentelemetry/instrumentation-fastify';

// HTTP
import { HttpInstrumentation } from '@opentelemetry/instrumentation-http';

// MongoDB
import { MongoDBInstrumentation } from '@opentelemetry/instrumentation-mongodb';

// Redis
import { RedisInstrumentation } from '@opentelemetry/instrumentation-redis';

// PostgreSQL
import { PgInstrumentation } from '@opentelemetry/instrumentation-pg';

Best Practices

1. Use Semantic Conventions

// Use standard attribute names
span.setAttribute(SemanticAttributes.HTTP_METHOD, 'GET');
span.setAttribute(SemanticAttributes.HTTP_URL, 'https://api.example.com');
span.setAttribute(SemanticAttributes.HTTP_STATUS_CODE, 200);
span.setAttribute(SemanticAttributes.DB_SYSTEM, 'postgresql');
span.setAttribute(SemanticAttributes.DB_STATEMENT, 'SELECT * FROM users');

2. Sample Wisely

import { AlwaysSample, TraceIdRatioBased } from '@opentelemetry/sdk-trace-base';

// Always sample for debugging
const alwaysSample = new AlwaysSample();

// Sample 10% in production
const percentageSampler = new TraceIdRatioBased(0.1);

3. Add Context

// Don't forget to add context to errors
try {
  await riskyOperation();
} catch (error) {
  span.recordException(error);
  span.setAttribute('error', true);
  throw error;
}

Conclusion

OpenTelemetry provides:

  • Vendor-neutral observability
  • Unified telemetry collection
  • Standard semantic conventions
  • Extensive ecosystem

Perfect for: Cloud-native applications, microservices, multi-cloud deployments.


External Resources

Comments