Spark UI Proxying#

Instructions for proxying data engine UIs when deployed on two cnodes.

The proxy rewrites links from the spark UI so that they can be accessed using hostnames set on the client machine.

The proxy needs to have network access to the spark cluster.

             +----------------------------------+
             |          Client Machine          |
             |----------------------------------|
             |     /etc/hosts configuration     |
             |  - spark.master  -> 10.143.15.111|
             |  - spark.worker1 -> 10.143.15.111|
             |  - spark.worker2 -> 10.143.15.111|
             +----------------------------------+
                               |
                               |
                          HTTP Requests
                               |
                               v
                     +-------------------+
                     |    Nginx Proxy    |
                     |-------------------|
                     | IP: 10.143.15.111 |
                     |  - nginx.conf     |
                     |  - Maps Spark UI  |
                     +-------------------+
                               |
               +---------------+-------------------+
               |                                   |
               v                                   v
   +-------------------------+           +------------------------+
   |   Spark Master Node     |           |   Spark Worker Nodes   |
   |---------=---------------|           |------------------------|
   | IP:                     |           | IPs:                   |
   |  - 172.200.202.20:9292  |           |  - 172.200.202.21:9293 |
   |  - 172.200.202.20:18080 |           |  - 172.200.202.21:9293 |
   +-------------------------+           +------------------------+            

docker-compose.yml#

services:
  nginx:
    image: nginx:stable
    container_name: nginx_proxy
    ports:
      - "80:80"
      - "18080:18080"
    volumes:
      - ./nginx.conf:/etc/nginx/nginx.conf:ro
    networks:
      - spark_network
networks:
  spark_network:
    driver: bridge

nginx.conf#

Will need to be modified to work on another cluster.

events {
    worker_connections 1024;
}

http {
    # Master node variables
    map $host $master_ip { default "172.200.202.20"; }
    map $host $master_port { default "9292"; }
    map $host $master_host { default "spark.master"; }

    # Worker 1 variables
    map $host $worker1_ip { default "172.200.202.21"; }
    map $host $worker1_port { default "9293"; }
    map $host $worker1_host { default "spark.worker1"; }
    map $host $worker1_old_name { default "cosmo-arrow-cb2-cn-1"; }

    # Worker 2 variables
    map $host $worker2_ip { default "172.200.202.22"; }
    map $host $worker2_port { default "9293"; }
    map $host $worker2_host { default "spark.worker2"; }
    map $host $worker2_old_name { default "cosmo-arrow-cb2-cn-2"; }

    map $host $worker_prefix { default "worker-20250115194059-"; }

    upstream spark_nodes {
        server 172.200.202.20:9292; # Master
        server 172.200.202.21:9293; # Worker 1
        server 172.200.202.22:9293; # Worker 2
    }

    include /etc/nginx/mime.types;
    default_type application/octet-stream;

    server {
        listen 80;
        server_name spark.master;

        location / {
            proxy_pass http://$master_ip:$master_port;
            proxy_set_header Host $host;
            proxy_set_header Accept-Encoding "";

            sub_filter_types text/html text/css text/xml text/javascript application/javascript;
            sub_filter_once off;

            # URLs
            sub_filter "http://$master_ip:$master_port" "http://$master_host";
            sub_filter "http://$worker1_ip:$worker1_port" "http://$worker1_host";
            sub_filter "http://$worker2_ip:$worker2_port" "http://$worker2_host";

            # Worker IDs
            sub_filter "${worker_prefix}${worker1_ip}-" "${worker_prefix}${worker1_host}-";
            sub_filter "${worker_prefix}${worker2_ip}-" "${worker_prefix}${worker2_host}-";

            # IPs
            sub_filter $master_ip $master_host;
            sub_filter $worker1_ip $worker1_host;
            sub_filter $worker2_ip $worker2_host;

            # Old names
            sub_filter "${worker1_old_name}:${worker1_port}" $worker1_host;
            sub_filter "${worker2_old_name}:${worker2_port}" $worker2_host;
            sub_filter "${worker1_old_name}:${master_port}" $master_host;
            sub_filter "${worker2_old_name}:${master_port}" $master_host;
        }
    }

    server {
        listen 18080;
        server_name spark.master;

        location / {
            proxy_pass http://$master_ip:18080;
            proxy_set_header Host $host;
            proxy_set_header Accept-Encoding "";

            sub_filter_types text/html text/css text/xml text/javascript application/javascript;
            sub_filter_once off;

            # URLs
            sub_filter "http://$master_ip:$master_port" "http://$master_host";
            sub_filter "http://$worker1_ip:$worker1_port" "http://$worker1_host";
            sub_filter "http://$worker2_ip:$worker2_port" "http://$worker2_host";

            # Worker IDs
            sub_filter "${worker_prefix}${worker1_ip}-" "${worker_prefix}${worker1_host}-";
            sub_filter "${worker_prefix}${worker2_ip}-" "${worker_prefix}${worker2_host}-";

            # IPs
            sub_filter $master_ip $master_host;
            sub_filter $worker1_ip $worker1_host;
            sub_filter $worker2_ip $worker2_host;

            # Old names
            sub_filter "${worker1_old_name}:${worker1_port}" $worker1_host;
            sub_filter "${worker2_old_name}:${worker2_port}" $worker2_host;
            sub_filter "${worker1_old_name}:${master_port}" $master_host;
            sub_filter "${worker2_old_name}:${master_port}" $master_host;
        }
    }

    server {
        listen 80;
        server_name spark.worker1;

        location / {
            proxy_pass http://$worker1_ip:$worker1_port;
            proxy_set_header Host $host;
            proxy_set_header Accept-Encoding "";

            sub_filter_types text/html text/css text/xml text/javascript application/javascript;
            sub_filter_once off;

            # URLs
            sub_filter "http://$master_ip:$master_port" "http://$master_host";
            sub_filter "http://$worker1_ip:$worker1_port" "http://$worker1_host";
            sub_filter "http://$worker2_ip:$worker2_port" "http://$worker2_host";

            # Worker IDs
            sub_filter "${worker_prefix}${worker1_ip}-" "${worker_prefix}${worker1_host}-";
            sub_filter "${worker_prefix}${worker2_ip}-" "${worker_prefix}${worker2_host}-";

            # IPs
            sub_filter $master_ip $master_host;
            sub_filter $worker1_ip $worker1_host;
            sub_filter $worker2_ip $worker2_host;

            # Old names
            sub_filter "${worker1_old_name}:${worker1_port}" $worker1_host;
            sub_filter "${worker2_old_name}:${worker2_port}" $worker2_host;
            sub_filter "${worker1_old_name}:${master_port}" $master_host;
            sub_filter "${worker2_old_name}:${master_port}" $master_host;
        }
    }

    server {
        listen 80;
        server_name spark.worker2;

        location / {
            proxy_pass http://$worker2_ip:$worker2_port;
            proxy_set_header Host $host;
            proxy_set_header Accept-Encoding "";

            sub_filter_types text/html text/css text/xml text/javascript application/javascript;
            sub_filter_once off;

            # URLs
            sub_filter "http://$master_ip:$master_port" "http://$master_host";
            sub_filter "http://$worker1_ip:$worker1_port" "http://$worker1_host";
            sub_filter "http://$worker2_ip:$worker2_port" "http://$worker2_host";

            # Worker IDs
            sub_filter "${worker_prefix}${worker1_ip}-" "${worker_prefix}${worker1_host}-";
            sub_filter "${worker_prefix}${worker2_ip}-" "${worker_prefix}${worker2_host}-";

            # IPs
            sub_filter $master_ip $master_host;
            sub_filter $worker1_ip $worker1_host;
            sub_filter $worker2_ip $worker2_host;

            # Old names
            sub_filter "${worker1_old_name}:${worker1_port}" $worker1_host;
            sub_filter "${worker2_old_name}:${worker2_port}" $worker2_host;
            sub_filter "${worker1_old_name}:${master_port}" $master_host;
            sub_filter "${worker2_old_name}:${master_port}" $master_host;
        }
    }
}

client machine /etc/hosts#

10.143.15.111 spark.master
10.143.15.111 spark.worker1
10.143.15.111 spark.worker2

Test#

curl http://spark.master
curl http://spark.master:18080
curl http://spark.worker1
curl http://spark.worker2