You are viewing a plain text version of this content. The canonical link for it is here.

Posted to user@hive.apache.org by Mich Talebzadeh <mi...@gmail.com> on 2016/03/15 00:31:25 UTC

Inserting into a Hive transactional table using Java code

Hive 2:00
Spark engine 1.3.1
Eclipse Scala IDE build of Eclipse SDK

A simple routine that reads CSV files from a staging directory by creating
an external table and insert into an ORC transactional table.

Using beeline from another server the task finishes pretty quickly. The
code is shown in attachment 1 (import_csv.hql).

However, when I use Eclipse to run the code as a Java app, The insert into
table hangs trying to insert 65 rows. Interestingly it only holds shared
read lock on the target table

Lock ID Database        Table   Partition       State   Type    Transaction
ID  Last Hearbeat   Acquired At     User    Hostname
2000    test    t2      NULL    ACQUIRED        *SHARED_READ*     NULL
1457998918685   1457998618675   hduser  rhes564

This is the progress of the code

14/03/2016 23:36:19.19
 Running: CREATE EXTERNAL TABLE stg_t2 ( INVOICENUMBER string ,PAYMENTDATE
string ,NET string ,VAT string ,TOTAL string )COMMENT 'from csv file from
excel sheet PayInsPeridaleTechnology' ROW FORMAT serde
'org.apache.hadoop.hive.serde2.OpenCSVSerde' STORED AS TEXTFILE LOCATION
'/data/stg/table2' TBLPROPERTIES ('skip.header.line.count'='1')

 Running: CREATE TABLE t2 ( INVOICENUMBER          INT
,PAYMENTDATE            timestamp,NET
DECIMAL(20,2),VAT                    DECIMAL(20,2),TOTAL
DECIMAL(20,2))COMMENT 'from csv file from excel sheet
PayInsPeridaleTechnology' CLUSTERED BY (INVOICENUMBER) INTO 256 BUCKETS
STORED AS ORC TBLPROPERTIES ( 'orc.compress'='ZLIB', 'transactional'='true')

Hanging here

 Running: INSERT INTO TABLE t2 SELECT INVOICENUMBER ,
CAST(UNIX_TIMESTAMP(paymentdate,'DD/MM/YYYY')*1000 as timestamp) ,
CAST(REGEXP_REPLACE(net,'[^\d\.]','') AS DECIMAL(20,2)) ,
CAST(REGEXP_REPLACE(vat,'[^\d\.]','') AS DECIMAL(20,2)) ,
CAST(REGEXP_REPLACE(total,'[^\d\.]','') AS DECIMAL(20,2)) FROM stg_t2 WHERE
CAST(REGEXP_REPLACE(total,'[^\d\.]','') AS DECIMAL(20,2)) > 0.0



Dr Mich Talebzadeh



LinkedIn * https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw
<https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>*



http://talebzadehmich.wordpress.com

Fwd: Inserting into a Hive transactional table using Java code

Posted by Mich Talebzadeh <mi...@gmail.com>.

Hive 2:00
Spark engine 1.3.1
Eclipse Scala IDE build of Eclipse SDK

A simple routine that reads CSV files from a staging directory by creating
an external table and insert into an ORC transactional table.

Using beeline from another server the task finishes pretty quickly. The
code is shown below

[image: Inline images 1]

However, when I use Eclipse to run the code as a Java app


import java.sql.SQLException;
import java.sql.Connection;
import java.sql.ResultSet;
import java.sql.Statement;
import java.sql.DriverManager;
public class ImportCSV
{
  private static String driverName = "org.apache.hive.jdbc.HiveDriver";
  private static String _username;
  private static String _password;
  private static String _hiveserver;
  private static String _database;
  public static void main(String[] args) throws SQLException {
    boolean confirm = false;
    _hiveserver = "jdbc:hive2://rhes564:10010";
    _database = "/default";
    _username = "hduser";
    _password = "xxxxxx";
    int status = 0;
    String query = null;
    String TableName = null;
    Connection _con = null;
    try {
      Class.forName(driverName);
    } catch (ClassNotFoundException e) {
      // TODO Auto-generated catch block
      e.printStackTrace();
      System.exit(1);
    }
    ResultSet rs = null;
    _con = DriverManager.getConnection(_hiveserver+_database, _username,
_password);
    Statement statement = _con.createStatement();
    // let us do some work
//
   try
   {
    query = "set hive.execution.engine=spark";
    statement.execute(query);
    query = "set spark.home=/usr/lib/spark-1.3.1-bin-hadoop2.6";
    statement.execute(query);
    query = "set spark.master=yarn-client";
    statement.execute(query);
    query = "set
hive.input.format=org.apache.hadoop.hive.ql.io.BucketizedHiveInputFormat";
    statement.execute(query);
    query = "set hive.optimize.bucketmapjoin=true";
    statement.execute(query);
    query = "set hive.optimize.bucketmapjoin.sortedmerge=true";
    statement.execute(query);
    query = "select from_unixtime(unix_timestamp(), 'dd/MM/yyyy
HH:mm:ss.ss') AS StartTime";
    rs = statement.executeQuery(query);
    rs = statement.getResultSet();
    while (rs.next())
    {
      System.out.println(rs.getString(1));
    }
    query = "use test";
    statement.execute(query);
    query = "DROP TABLE IF EXISTS stg_t2";
    statement.execute(query);
    query = "CREATE EXTERNAL TABLE stg_t2 ( " +
            "INVOICENUMBER string " +
            ",PAYMENTDATE string " +
            ",NET string " +
            ",VAT string " +
            ",TOTAL string " +
            ")" +
            "COMMENT 'from csv file from excel sheet
PayInsPeridaleTechnology' " +
            "ROW FORMAT serde 'org.apache.hadoop.hive.serde2.OpenCSVSerde'
" +
            "STORED AS TEXTFILE " +
            "LOCATION '/data/stg/table2' " +
            "TBLPROPERTIES ('skip.header.line.count'='1') ";
    System.out.println("\n Running: " + query+ "\n");
    statement.execute(query);
    query = "DROP TABLE IF EXISTS t2";
    statement.execute(query);
    query = "CREATE TABLE t2 ( " +
            "INVOICENUMBER          INT " +
            ",PAYMENTDATE            timestamp" +
            ",NET                    DECIMAL(20,2)" +
            ",VAT                    DECIMAL(20,2)" +
            ",TOTAL                  DECIMAL(20,2)" +
            ")" +
            "COMMENT 'from csv file from excel sheet
PayInsPeridaleTechnology' " +
            "CLUSTERED BY (INVOICENUMBER) INTO 256 BUCKETS " +
            "STORED AS ORC " +
            "TBLPROPERTIES ( 'orc.compress'='ZLIB', " +
            "'transactional'='true')";
    System.out.println("\n Running: " + query+ "\n");
    statement.execute(query);
    query = "INSERT INTO TABLE t2 " +
            "SELECT " +
            "INVOICENUMBER " +
            ", CAST(UNIX_TIMESTAMP(paymentdate,'DD/MM/YYYY')*1000 as
timestamp) " +
            ", CAST(REGEXP_REPLACE(net,'[^\\d\\.]','') AS DECIMAL(20,2)) " +
            ", CAST(REGEXP_REPLACE(vat,'[^\\d\\.]','') AS DECIMAL(20,2)) " +
            ", CAST(REGEXP_REPLACE(total,'[^\\d\\.]','') AS DECIMAL(20,2))
" +
            "FROM " +
            "stg_t2 " +
            "WHERE " +
            "CAST(REGEXP_REPLACE(total,'[^\\d\\.]','') AS DECIMAL(20,2)) >
0.0 ";
    System.out.println("\n Running: " + query+ "\n");
    statement.execute(query);
    query = " SELECT * FROM t2 ORDER BY invoicenumber";
     rs = statement.executeQuery(query);
     rs = statement.getResultSet();
    System.out.println("\n Running: " + query+ "\n");
    System.out.println("\n Running: " + query+ "\n");
    int numCols = 5;
    int rowNum = 0;
    String row = "";
    for (rowNum = 1; rs.next(); rowNum++)
    {
      row = "";
      for (int i=1; i<=numCols; i++)
      try
      {
         if(i==1) row += rs.getString(1)+ ", ";
         if(i==2) row += rs.getString(2)+ ", ";
         if(i==3) row += rs.getString(3)+ ", ";
         if(i==4) row += rs.getString(4)+ ", ";
         if(i==5) row += rs.getString(3);
      }
      // Catch the exception
      catch (SQLException sqe)
      {
        numCols = --i;  // So the number of columns is i -1
        break;         // break and reset the number of columns to correct
value
      }
      System.out.println(row);
    }
    query = "select from_unixtime(unix_timestamp(), 'dd/MM/yyyy
HH:mm:ss.ss') AS EndTime";
    rs = statement.executeQuery(query);
    rs = statement.getResultSet();
    System.out.println("\n Running: " + query+ "\n");
    while (rs.next())
    {
      System.out.println(rs.getString(1));
    }
    //
   }catch(SQLException se){
      //Handle errors for JDBC
      se.printStackTrace();
   }catch(Exception e){
      //Handle errors for Class.forName
      e.printStackTrace();
   }
  }//end main
}//ImportCSV



, The insert into table hangs trying to insert 65 rows. Interestingly it
only holds shared read lock on the target table

Lock ID Database        Table   Partition       State   Type    Transaction
ID  Last Hearbeat   Acquired At     User    Hostname
2000    test    t2      NULL    ACQUIRED        *SHARED_READ*     NULL
1457998918685   1457998618675   hduser  rhes564

This is the progress of the code

14/03/2016 23:36:19.19
 Running: CREATE EXTERNAL TABLE stg_t2 ( INVOICENUMBER string ,PAYMENTDATE
string ,NET string ,VAT string ,TOTAL string )COMMENT 'from csv file from
excel sheet PayInsPeridaleTechnology' ROW FORMAT serde
'org.apache.hadoop.hive.serde2.OpenCSVSerde' STORED AS TEXTFILE LOCATION
'/data/stg/table2' TBLPROPERTIES ('skip.header.line.count'='1')

 Running: CREATE TABLE t2 ( INVOICENUMBER          INT
,PAYMENTDATE            timestamp,NET
DECIMAL(20,2),VAT                    DECIMAL(20,2),TOTAL
DECIMAL(20,2))COMMENT 'from csv file from excel sheet
PayInsPeridaleTechnology' CLUSTERED BY (INVOICENUMBER) INTO 256 BUCKETS
STORED AS ORC TBLPROPERTIES ( 'orc.compress'='ZLIB', 'transactional'='true')

Hanging here

 Running: INSERT INTO TABLE t2 SELECT INVOICENUMBER ,
CAST(UNIX_TIMESTAMP(paymentdate,'DD/MM/YYYY')*1000 as timestamp) ,
CAST(REGEXP_REPLACE(net,'[^\d\.]','') AS DECIMAL(20,2)) ,
CAST(REGEXP_REPLACE(vat,'[^\d\.]','') AS DECIMAL(20,2)) ,
CAST(REGEXP_REPLACE(total,'[^\d\.]','') AS DECIMAL(20,2)) FROM stg_t2 WHERE
CAST(REGEXP_REPLACE(total,'[^\d\.]','') AS DECIMAL(20,2)) > 0.0



Dr Mich Talebzadeh



LinkedIn * https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw
<https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>*



http://talebzadehmich.wordpress.com

Fwd: Inserting into a Hive transactional table using Java code

Posted by Mich Talebzadeh <mi...@gmail.com>.

Hive 2:00
Spark engine 1.3.1
Eclipse Scala IDE build of Eclipse SDK

A simple routine that reads CSV files from a staging directory by creating
an external table and insert into an ORC transactional table.

Using beeline from another server the task finishes pretty quickly. The
code is shown in attachment 1 (import_csv.hql).

However, when I use Eclipse to run the code as a Java app, The insert into
table hangs trying to insert 65 rows. Interestingly it only holds shared
read lock on the target table

Lock ID Database        Table   Partition       State   Type    Transaction
ID  Last Hearbeat   Acquired At     User    Hostname
2000    test    t2      NULL    ACQUIRED        *SHARED_READ*     NULL
1457998918685   1457998618675   hduser  rhes564

This is the progress of the code

14/03/2016 23:36:19.19
 Running: CREATE EXTERNAL TABLE stg_t2 ( INVOICENUMBER string ,PAYMENTDATE
string ,NET string ,VAT string ,TOTAL string )COMMENT 'from csv file from
excel sheet PayInsPeridaleTechnology' ROW FORMAT serde
'org.apache.hadoop.hive.serde2.OpenCSVSerde' STORED AS TEXTFILE LOCATION
'/data/stg/table2' TBLPROPERTIES ('skip.header.line.count'='1')

 Running: CREATE TABLE t2 ( INVOICENUMBER          INT
,PAYMENTDATE            timestamp,NET
DECIMAL(20,2),VAT                    DECIMAL(20,2),TOTAL
DECIMAL(20,2))COMMENT 'from csv file from excel sheet
PayInsPeridaleTechnology' CLUSTERED BY (INVOICENUMBER) INTO 256 BUCKETS
STORED AS ORC TBLPROPERTIES ( 'orc.compress'='ZLIB', 'transactional'='true')

Hanging here

 Running: INSERT INTO TABLE t2 SELECT INVOICENUMBER ,
CAST(UNIX_TIMESTAMP(paymentdate,'DD/MM/YYYY')*1000 as timestamp) ,
CAST(REGEXP_REPLACE(net,'[^\d\.]','') AS DECIMAL(20,2)) ,
CAST(REGEXP_REPLACE(vat,'[^\d\.]','') AS DECIMAL(20,2)) ,
CAST(REGEXP_REPLACE(total,'[^\d\.]','') AS DECIMAL(20,2)) FROM stg_t2 WHERE
CAST(REGEXP_REPLACE(total,'[^\d\.]','') AS DECIMAL(20,2)) > 0.0



Dr Mich Talebzadeh



LinkedIn * https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw
<https://www.linkedin.com/profile/view?id=AAEAAAAWh2gBxianrbJd6zP6AcPCCdOABUrV8Pw>*



http://talebzadehmich.wordpress.com