Connectivity Test

Connectivity Test#

IMPORTANT

If you are running your spark driver inside docker:

  • Ensure your driver host IP is accessible from the spark cluster

  • Ensure you have forwarded the following ports:

    • 2222:2222

    • 7777:7777

# The IP of the driver host
SPARK_DRIVER_HOST='10.143.15.111'
SPARK_DRIVER_PORT=2222
SPARK_BLOCK_MANAGER_PORT=7777

# The IP of the spark cluster master
SPARK_MASTER_HOST='172.200.202.20'
SPARK_MASTER_PORT=2424
from pyspark.sql import SparkSession

# Initialize Spark session
spark = (
    SparkSession.builder
    .appName("SparkConnectionTest")
    .master(f"spark://{SPARK_MASTER_HOST}:{SPARK_MASTER_PORT}")
    .config("spark.driver.host", SPARK_DRIVER_HOST)
    .config("spark.driver.port", SPARK_DRIVER_PORT)
    .config("spark.driver.blockManager.port", SPARK_BLOCK_MANAGER_PORT)  
    .config("spark.sql.session.timeZone", "UTC")
    .config("spark.driver.memory", "8g")
    .config("spark.executor.memory", "1g")
    .getOrCreate()
)

print(spark.version)
3.4.1
# free up resources
spark.stop()