🚀

Module 8: Kafka Monitoring & Optimization

55 minutes2 examplesAdvanced

Hands-on Examples

Interactive examples to reinforce your learning

Complete Monitoring Setup

Full monitoring stack with Prometheus, Grafana, and Kafka UI

Code Example
# Complete Monitoring Setup

## docker-compose.monitoring.yml
version: '3.8'

services:
  zookeeper:
    image: confluentinc/cp-zookeeper:latest
    environment:
      ZOOKEEPER_CLIENT_PORT: 2181
      ZOOKEEPER_TICK_TIME: 2000

  kafka:
    image: confluentinc/cp-kafka:latest
    depends_on:
      - zookeeper
    ports:
      - "9092:9092"
      - "9999:9999"
    environment:
      KAFKA_BROKER_ID: 1
      KAFKA_ZOOKEEPER_CONNECT: zookeeper:2181
      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://localhost:9092
      KAFKA_JMX_PORT: 9999
      KAFKA_JMX_HOSTNAME: localhost
      KAFKA_JMX_OPTS: -Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Dcom.sun.management.jmxremote.port=9999 -Dcom.sun.management.jmxremote.rmi.port=9999 -Djava.rmi.server.hostname=localhost

  kafka-ui:
    image: provectuslabs/kafka-ui:latest
    depends_on:
      - kafka
    ports:
      - "8080:8080"
    environment:
      KAFKA_CLUSTERS_0_NAME: local
      KAFKA_CLUSTERS_0_BOOTSTRAPSERVERS: kafka:29092

  jmx-exporter:
    image: solsson/kafka-prometheus-jmx-exporter@sha256:6f82e2b0464f50da8104acd7363a9ddd122f5f6e2d78a8b1bfe0f7d3e90e7c0a
    ports:
      - "5555:5555"
    environment:
      KAFKA_JMX_HOSTNAME: kafka
      KAFKA_JMX_PORT: 9999

  prometheus:
    image: prom/prometheus:latest
    ports:
      - "9090:9090"
    volumes:
      - ./prometheus.yml:/etc/prometheus/prometheus.yml
      - ./alerts.yml:/etc/prometheus/alerts.yml
    command:
      - '--config.file=/etc/prometheus/prometheus.yml'
      - '--storage.tsdb.path=/prometheus'
      - '--web.console.libraries=/etc/prometheus/console_libraries'
      - '--web.console.templates=/etc/prometheus/consoles'
      - '--web.enable-lifecycle'
      - '--web.enable-admin-api'

  grafana:
    image: grafana/grafana:latest
    ports:
      - "3000:3000"
    environment:
      GF_SECURITY_ADMIN_PASSWORD: admin
      GF_INSTALL_PLUGINS: grafana-clock-panel,grafana-simple-json-datasource
    volumes:
      - grafana-storage:/var/lib/grafana
      - ./grafana/dashboards:/etc/grafana/provisioning/dashboards
      - ./grafana/datasources:/etc/grafana/provisioning/datasources

volumes:
  grafana-storage:

## prometheus.yml
global:
  scrape_interval: 15s
  evaluation_interval: 15s

rule_files:
  - "alerts.yml"

scrape_configs:
  - job_name: 'prometheus'
    static_configs:
      - targets: ['localhost:9090']

  - job_name: 'kafka-jmx'
    static_configs:
      - targets: ['jmx-exporter:5555']
    scrape_interval: 5s

  - job_name: 'kafka-ui'
    static_configs:
      - targets: ['kafka-ui:8080']

## alerts.yml
groups:
  - name: kafka-alerts
    rules:
      - alert: KafkaDown
        expr: up{job="kafka-jmx"} == 0
        for: 1m
        labels:
          severity: critical
        annotations:
          summary: "Kafka broker is down"
          description: "Kafka broker has been down for more than 1 minute."

      - alert: HighConsumerLag
        expr: kafka_consumer_lag_sum > 10000
        for: 5m
        labels:
          severity: warning
        annotations:
          summary: "High consumer lag detected"
          description: "Consumer lag is {{ $value }} messages."

      - alert: UnderReplicatedPartitions
        expr: kafka_cluster_partition_under_replicated_partitions > 0
        for: 2m
        labels:
          severity: critical
        annotations:
          summary: "Under-replicated partitions detected"
          description: "{{ $value }} partitions are under-replicated."

## Start monitoring stack:
docker-compose -f docker-compose.monitoring.yml up -d

## Access points:
# Kafka UI: http://localhost:8080
# Prometheus: http://localhost:9090
# Grafana: http://localhost:3000 (admin/admin)

Expected Output:

Complete monitoring stack with Kafka UI, Prometheus metrics collection, and Grafana dashboards.

Explanation:

This setup provides comprehensive monitoring with real-time metrics, alerting, and visualization for Kafka clusters.