Snippet 1
# Returns the cartesian product with another DataFrame
df.select("age", "name").collect()
# [Row(age=2, name='Alice'), Row(age=5, name='Bob')]
df2.select("name", "height").collect()
# [Row(name='Tom', height=80), Row(name='Bob', height=85)]
df.crossJoin(df2.select("height")).select(
"age", "name", "height").collect()
# [Row(age=2, name='Alice', height=80), Row(age=2, name='Alice', height=85), Row(age=55, name='Bob', height=80), Row(age=5, name='Bob', height=85)]
Snippet 2
a = [1, 2, 3]
b = ["a", "b", "c"]
index = pd.MultiIndex.from_product([a, b], names = ["a", "b"])
pd.DataFrame(index = index).reset_index()
Copyright © Code Fetcher 2020