Basic Workload Test#
# The IP of the driver host
SPARK_DRIVER_HOST='10.143.15.111'
SPARK_DRIVER_PORT=2222
SPARK_BLOCK_MANAGER_PORT=7777
# The IP of the spark cluster master
SPARK_MASTER_HOST='172.200.202.20'
SPARK_MASTER_PORT=2424
from pyspark.sql import SparkSession
spark = (
SparkSession.builder
.appName("SparkConnectionTest")
.master(f"spark://{SPARK_MASTER_HOST}:{SPARK_MASTER_PORT}")
.config("spark.driver.host", SPARK_DRIVER_HOST)
.config("spark.driver.port", SPARK_DRIVER_PORT)
.config("spark.driver.blockManager.port", SPARK_BLOCK_MANAGER_PORT)
.config("spark.sql.session.timeZone", "UTC")
.config("spark.driver.memory", "8g")
.config("spark.executor.memory", "1g")
.getOrCreate()
)
print(spark.version)
# Test the connection by running a simple operation
test_df = spark.sql("SELECT 1 as test")
test_df.collect()
# free up resources
spark.stop()