I have what seems like a simple problem but I keep banging my head against the wall with no success. I am essentially trying to do the same thing as this post except that I
I think you have to cast the vector column to an array before you can aggregate it.
from pyspark.ml.linalg import Vectors, VectorUDT
from pyspark.sql import functions as F
from pyspark.sql import types as T
def vec2array(v):
v = Vectors.dense(v)
array = list([float(x) for x in v])
return array
vec2array_udf = F.udf(vec2array, T.ArrayType(T.FloatType()))
df = df.withColumn('Vec', vec2array_udf('Vec'))
n = len(df.select('Vec').first()[0])
bla = df.agg(F.array(*[F.sum(F.col("Vec")[i]) for i in range(n)]).alias("sum"))