saveAsObjectFiles(prefix, [suffix])

saveAsObjectFiles(prefix, [suffix])

Save this DStream's contents as SequenceFiles of serialized Java objects. The file name at each batch interval is generated based on prefix and suffix:
"prefix-TIME_IN_MS[.suffix]".
Python API This is not available in the Python API.
Example:
import org.apache.spark._
import org.apache.spark.SparkContext._
import org.apache.spark.streaming._
import org.apache.spark.streaming.StreamingContext._
import org.apache.log4j.{Level, Logger}
import org.apache.spark.sql.{Row, SaveMode, SparkSession}
import org.apache.spark.sql.SQLContext
​
Logger.getLogger("org").setLevel(Level.ERROR)
val spark = SparkSession
.builder()
.config("spark.master", "local[2]")
.appName("streaming for book")
.getOrCreate()
​
spark.sparkContext.setCheckpointDir("/tmp/")
​
import spark.implicits._
val sc=spark.sparkContext
val ssc = new StreamingContext(sc, Seconds(1))
​
​
val messages1 = ssc.textFileStream("/tmp/filestream1/")
val messages2 = ssc.textFileStream("/tmp/filestream2/")
val messages11=messages1.filter(_.nonEmpty).map(x=>(x,x))
val messages22=messages2.filter(_.nonEmpty).map(x=>(x,x))
val messages4=messages11.join(messages22)
​
messages4.print()
messages4.saveAsObjectFiles("/tmp/stream-TIME_IN_MS.obj")
​
ssc.start()
ssc.awaitTermination()
​
/*
​
in /tmp/
​
drwxrwxr-x 2 dv6 dv6 4096 Mar 7 23:05 stream-TIME_IN_MS.obj-1583651108000
drwxrwxr-x 2 dv6 dv6 4096 Mar 7 23:05 stream-TIME_IN_MS.obj-1583651109000
drwxrwxr-x 2 dv6 dv6 4096 Mar 7 23:05 stream-TIME_IN_MS.obj-1583651110000
drwxrwxr-x 2 dv6 dv6 4096 Mar 7 23:05 stream-TIME_IN_MS.obj-1583651111000
drwxrwxr-x 2 dv6 dv6 4096 Mar 7 23:05 stream-TIME_IN_MS.obj-1583651112000
drwxrwxr-x 2 dv6 dv6 4096 Mar 7 23:05 stream-TIME_IN_MS.obj-1583651113000
...
​
*/
​