Read all list of list in spark
import org.apache.hadoop.fs.{FileSystem,Path}
FileSystem.get( sc.hadoopConfiguration ).listStatus( new Path("/shravan/json")).foreach( x => println(x.getPath ))
#######
find Duilpcate and read HDFS files
val df = spark.read.option("header", "true").option("delimiter", "|").option("inferSchema", "true").csv("a.scv")
#find dulpicate
df.groupBy("id","name","age").count().filter("count >1").coalesce(1).write.format("csv").mode("overwrite").save("/tmp/spark_output/datacsv1")
#########################3
Write to hgfs
******use************
**df2.coalesce(1).write.mode("overwrite").insertInto("db.tb")
df1.coalesce(1).write.format("orc").mode("overwrite").saveAsTable("db.tb");
df.coalesce(1).write.mode("overwrite").csv("/shravan/")
df.write.format("csv").save("/tmp/spark_output/datacsv")
df .coalesce(1) .write.mode("overwrite").option("header","true").csv("/shravan/"
######################
####################################################