Basic Workload Test

Basic Workload Test#

# The IP of the driver host
SPARK_DRIVER_HOST='10.143.15.111'
SPARK_DRIVER_PORT=2222
SPARK_BLOCK_MANAGER_PORT=7777

# The IP of the spark cluster master
SPARK_MASTER_HOST='172.200.202.20'
SPARK_MASTER_PORT=2424
from pyspark.sql import SparkSession

spark = (
    SparkSession.builder
    .appName("SparkConnectionTest")
    .master(f"spark://{SPARK_MASTER_HOST}:{SPARK_MASTER_PORT}")
    .config("spark.driver.host", SPARK_DRIVER_HOST)
    .config("spark.driver.port", SPARK_DRIVER_PORT)
    .config("spark.driver.blockManager.port", SPARK_BLOCK_MANAGER_PORT)  
    .config("spark.sql.session.timeZone", "UTC")
    .config("spark.driver.memory", "8g")
    .config("spark.executor.memory", "1g")
    .getOrCreate()
)

print(spark.version)
# Test the connection by running a simple operation
test_df = spark.sql("SELECT 1 as test")
test_df.collect()
# free up resources
spark.stop()