join(otherStream, [numTasks])

join(otherStream, [numTasks])

When called on two DStreams of (K, V) and (K, W) pairs, return a new DStream of (K, (V, W)) pairs with all pairs of elements for each key.
1
import org.apache.spark._
2
import org.apache.spark.SparkContext._
3
import org.apache.spark.streaming._
4
import org.apache.spark.streaming.StreamingContext._
5
import org.apache.log4j.{Level, Logger}
6
​
7
val conf = new SparkConf()
8
.setMaster("local[2]").setAppName("NetworkWordCount")
9
sc.stop
10
val ssc = new StreamingContext(conf, Seconds(1))
11
import org.apache.spark.rdd.RDD
12
import scala.collection.mutable.Queue
13
val rddQueue = new Queue[RDD[(Int,Int)]]()
14
val inputStream = ssc.queueStream(rddQueue)
15
inputStream.join(inputStream)
16
​
17
/*
18
res4: org.apache.spark.streaming.dstream.DStream[(Int, (Int, Int))] = [email protected]
19
*/
Copied!
​
Last modified 1yr ago
Copy link