//prepare csv
year,make,model,comment,blank"2012","Tesla","S","No comment","1997","Ford,E350","Go get one now they are going fast","2015","Chevy","Volt"
//Processing and inserting data in hive without schema
import org.apache.spark.sql.hive.HiveContextimport org.apache.spark.sql.hive.orc._val hiveContext = new org.apache.spark.sql.hive.HiveContext(sc)val df = hiveContext.read.format("com.databricks.spark.csv").option("header", "true").option("inferSchema", "true").load("/tmp/cars.csv")val selectedData = df.select("year", "model")selectedData.write.format("orc").option("header", "true").save("/tmp/newcars")
//permission issues as user hive
// org.apache.hadoop.hive.ql.metadata.HiveException: MetaException(message:java.security.AccessControlException: Permission denied: user=hive, access=WRITE, inode="/tmp/newcars":hdfs:hdfs:drwxr-xr-x//Updated /tmp/newcars_orc_cust17 directory permissions
hiveContext.sql("create external table newcars_orc_ext_cust17(year string,model string) stored as orc location '/tmp/newcars'")hiveContext.sql("show tables").collect().foreach(println)
hiveContext.sql("select * from newcars").collect().foreach(println)