1.union 连接
val rdd1 = context.parallelize(List(1, 2, 3, 4))
val rdd2 = context.parallelize(List(3, 4, 5, 6))
println("rdd1.partitions" + rdd1.partitions.size)
val unionRdd = rdd1.union(rdd2);
unionRdd.foreach(println)
println("unionRdd.partitions:" + unionRdd.partitions.size)
打印结果 :unionRdd.partitions:2
2.cartesian 笛卡尔积
val rdd1 = context.parallelize(List(1, 2, 3, 4))
val rdd2 = context.parallelize(List(3, 4, 5, 6))
val cartestionRdd = rdd1.cartesian(rdd2)
cartestionRdd.foreach(println)
打印结果:(1,3)
(1,4)
(1,5)
(1,6)
(2,3)
(2,4)
(2,5)
(2,6)
(3,3)
(3,4)
(3,5)
(3,6)
(4,3)
(4,4)
(4,5)
(4,6)
3 intersection 交集
val rdd1 = context.parallelize(List(1, 2, 3, 4))
val rdd2 = context.parallelize(List(3, 4, 5, 6))
val intersectionRdd = rdd1.intersection(rdd2)
intersectionRdd.foreach(println)
打印结果:
4
3
4 subtract 差集
val rdd1 = context.parallelize(List(1, 2, 3, 4))
val rdd2 = context.parallelize(List(3, 4, 5, 6))
val subtractRdd = rdd1.subtract(rdd2)
subtractRdd.foreach(println)
打印结果:
1
2
5.cogroup
val kv1: RDD[(String, Int)] = context.parallelize(List(("k1", 1), ("k1", 2), ("k3", 3), ("k4", 4)))
val kv2: RDD[(String, Int)] = context.parallelize(List(("k1", 10), ("k2", 12), ("k3", 13), ("k4", 14)))
val cogroup = kv1.cogroup(kv2)
cogroup.foreach(println)
打印结果
(k3,(CompactBuffer(3),CompactBuffer(13)))
(k2,(CompactBuffer(),CompactBuffer(12)))
(k1,(CompactBuffer(1, 2),CompactBuffer(10)))
(k4,(CompactBuffer(4),CompactBuffer(14)))