You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Soheil Pourbafrani <so...@gmail.com> on 2019/02/12 10:45:38 UTC

Create Hive table from CSVfile

Hi, Using the following code I create a Thrift Server including a Hive
table from CSV file and I expect it considers the first line as a header
but when I select data from the so-called table, I see it considers the CSV
header as data row! It seems the line "TBLPROPERTIES(skip.header.line.count
= 1)" didn't work! Is there any way to do that using the SparkSQL?

def main(args: Array[String]): Unit = {
    val conf = new SparkConf
    conf
      .set("hive.server2.thrift.port", "10000")
      .set("spark.sql.hive.thriftServer.singleSession", "true")
      .set("spark.sql.warehouse.dir", "/metadata/hive")
      .set("spark.sql.catalogImplementation","hive")
      .set("skip.header.line.count","1")
      .setMaster("local[*]")
      .setAppName("ThriftServer")
    val sc = new SparkContext(conf)
    val spark = SparkSession.builder()
      .config(conf)
      .enableHiveSupport()
      .getOrCreate()

spark.sql(
      "CREATE TABLE IF NOT EXISTS freq_back (" +
        "id int," +
        "time_stamp bigint," +
        "time_quality string )" +
        "ROW FORMAT DELIMITED " +
        "FIELDS TERMINATED BY ',' " +
        "STORED AS TEXTFILE " +
        "LOCATION 'hdfs://DB_BackUp/freq' " +
        "TBLPROPERTIES(skip.header.line.count = 1)"
    )

HiveThriftServer2.startWithContext(spark.sqlContext)