test.csv
key,a,b,c
a,1,,-1
a,2,,
a,3,,4
test.py
from pyspark.sql import SparkSession
from pyspark.sql import functions as F
spark = SparkSession \
.builder \
.appName("spark-app") \
.getOrCreate()
spark.sparkContext.setLogLevel("WARN")
df = spark.read.csv("test.csv", header=True)
res = df.groupBy(["key"]).agg(*[
F.max("a"),
F.max("b"),
F.max("c"),
F.min("a"),
F.min("b"),
F.min("c"),
])
print (res.toPandas())
spark-submit test.py
key max(a) max(b) max(c) min(a) min(b) min(c)
0 a 3 None 4 1 None -1