join(otherStream, [numTasks])

When called on two DStreams of (K, V) and (K, W) pairs, return a new DStream of (K, (V, W)) pairs with all pairs of elements for each key.

import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.spark.streaming._
import org.apache.spark.streaming.StreamingContext._
import org.apache.log4j.{Level, Logger}

val conf = new SparkConf()
 .setMaster("local[2]").setAppName("NetworkWordCount")
sc.stop
val ssc = new StreamingContext(conf, Seconds(1))
import org.apache.spark.rdd.RDD
import scala.collection.mutable.Queue
val rddQueue = new Queue[RDD[(Int,Int)]]()
val inputStream = ssc.queueStream(rddQueue)
inputStream.join(inputStream)

/*
res4: org.apache.spark.streaming.dstream.DStream[(Int, (Int, Int))] = org.apache.spark.streaming.dstream.TransformedDStream@7296b9c6
*/

PreviousreduceByKey(func, [numTasks])Nextcogroup(otherStream, [numTasks])

Last updated 5 years ago

Was this helpful?