You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@spark.apache.org by "Hyukjin Kwon (JIRA)" <ji...@apache.org> on 2019/03/08 07:34:00 UTC

[jira] [Resolved] (SPARK-27091) Spark 2.4 having issue with JDBC Hive connection

     [ https://issues.apache.org/jira/browse/SPARK-27091?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Hyukjin Kwon resolved SPARK-27091.
----------------------------------
    Resolution: Invalid

There looks no evidence that it's an issue in Spark.

> Spark 2.4  having issue with JDBC Hive connection 
> --------------------------------------------------
>
>                 Key: SPARK-27091
>                 URL: https://issues.apache.org/jira/browse/SPARK-27091
>             Project: Spark
>          Issue Type: Bug
>          Components: Java API
>    Affects Versions: 2.4.0
>            Reporter: vaquar khan
>            Priority: Minor
>
> I am try to connect Apache Spark 2.4 using Java JDBC with Hive and getting
>  *Logs :*
>  {color:#d04437}   +org.apache.spark.sql.AnalysisException+: cannot resolve '`XXX.yyy`' given input columns: error{color}
> {color:#d04437}      at org.apache.spark.sql.catalyst.analysis.package$AnalysisErrorAt.failAnalysis(+package.scala:42+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(+CheckAnalysis.scala:88+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1$$anonfun$apply$2.applyOrElse(+CheckAnalysis.scala:85+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(+TreeNode.scala:289+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.trees.TreeNode$$anonfun$transformUp$1.apply(+TreeNode.scala:289+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.trees.CurrentOrigin$.withOrigin(+TreeNode.scala:70+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.trees.TreeNode.transformUp(+TreeNode.scala:288+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(+QueryPlan.scala:268+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$transformExpressionsUp$1.apply(+QueryPlan.scala:268+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpression$1(+QueryPlan.scala:279+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(+QueryPlan.scala:289+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1$1.apply(+QueryPlan.scala:293+){color}
> {color:#d04437}       at scala.collection.TraversableLike$$anonfun$map$1.apply(+TraversableLike.scala:244+){color}
> {color:#d04437}       at scala.collection.TraversableLike$$anonfun$map$1.apply(+TraversableLike.scala:244+){color}
> {color:#d04437}       at scala.collection.mutable.ResizableArray$class.foreach(+ResizableArray.scala:59+){color}
> {color:#d04437}       at scala.collection.mutable.ArrayBuffer.foreach(+ArrayBuffer.scala:47+){color}
> {color:#d04437}       at scala.collection.TraversableLike$class.map(+TraversableLike.scala:244+){color}
> {color:#d04437}       at scala.collection.AbstractTraversable.map(+Traversable.scala:105+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.plans.QueryPlan.org$apache$spark$sql$catalyst$plans$QueryPlan$$recursiveTransform$1(+QueryPlan.scala:293+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.plans.QueryPlan$$anonfun$6.apply(+QueryPlan.scala:298+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.trees.TreeNode.mapProductIterator(+TreeNode.scala:187+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.plans.QueryPlan.mapExpressions(+QueryPlan.scala:298+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.plans.QueryPlan.transformExpressionsUp(+QueryPlan.scala:268+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(+CheckAnalysis.scala:85+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$$anonfun$checkAnalysis$1.apply(+CheckAnalysis.scala:78+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.trees.TreeNode.foreachUp(+TreeNode.scala:127+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.analysis.CheckAnalysis$class.checkAnalysis(+CheckAnalysis.scala:78+){color}
> {color:#d04437}       at org.apache.spark.sql.catalyst.analysis.Analyzer.checkAnalysis(+Analyzer.scala:91+){color}
> {color:#d04437}       at org.apache.spark.sql.execution.QueryExecution.assertAnalyzed(+QueryExecution.scala:52+){color}
> {color:#d04437}       at org.apache.spark.sql.Dataset$.ofRows(+Dataset.scala:67+){color}
> {color:#d04437}       at org.apache.spark.sql.Dataset.org$apache$spark$sql$Dataset$$withPlan(+Dataset.scala:2884+){color}
> {color:#d04437}       at org.apache.spark.sql.Dataset.select(+Dataset.scala:1150+){color}
> {color:#d04437}       at org.apache.spark.sql.Dataset.select(+Dataset.scala:1168+){color}
> {color:#d04437}       at org.apache.spark.sql.Dataset.select(+Dataset.scala:1168+){color}
> {color:#d04437}       at com.khan.vaquar.SparkHiveConnection.start_1(+SparkHiveConnection.java:158+){color}
> {color:#d04437}       at com.khan.vaquar.SparkHiveConnection.main(+SparkHiveConnection.java:26+){color}
> {color:#d04437}19/03/07 14:19:35 INFO SparkContext: Invoking stop() from shutdown hook{color}
> {color:#d04437}x.x19/03/07 14:19:35 INFO SparkUI: Stopped Spark web UI at [http://x:x.x.x:4040|http://3.142.66.46:4040/]{color}
> {color:#d04437}19/03/07 14:19:35 INFO MapOutputTrackerMasterEndpoint: MapOutputTrackerMasterEndpoint stopped!{color}
> {color:#d04437}19/03/07 14:19:35 INFO MemoryStore: MemoryStore cleared{color}
> {color:#d04437}19/03/07 14:19:35 INFO BlockManager: BlockManager stopped{color}
> {color:#d04437}19/03/07 14:19:35 INFO BlockManagerMaster: BlockManagerMaster stopped{color}
> {color:#d04437}19/03/07 14:19:35 INFO OutputCommitCoordinator$OutputCommitCoordinatorEndpoint: OutputCommitCoordinator stopped!{color}
> {color:#d04437}19/03/07 14:19:35 INFO SparkContext: Successfully stopped SparkContext{color}
> {color:#d04437}19/03/07 14:19:35 INFO ShutdownHookManager: Shutdown hook called{color}
> {color:#d04437} {color}
>  
> *While same connection I can print schema and its given proper values*
>  
> |– Schema.AAAA: string (nullable = true)|
>  |-- Schema.BBBB: date (nullable = true)
>  |-- Schema.CCCCC: date (nullable = true)
>  |-- Schema.DDDDD: integer (nullable = true)
>  |-- Schema.EEEEEEEE: integer (nullable = true)
>  |-- Schema.FFFFFFFFFFFF: integer (nullable = true)
>  |-- Schema.GGGGGGGGGGGG: integer (nullable = true)
>  |-- Schema.HHHHHHHHH: string (nullable = true)
>  
>  
>  
> Please find attached code
>  
> package com.khan.vaquar;
>  
> import java.util.Arrays;
>  
> import org.apache.spark.SparkConf;
> import org.apache.spark.SparkContext;
> import org.apache.spark.api.java.JavaRDD;
> import org.apache.spark.api.java.function.Function;
> import org.apache.spark.ml.feature.NGram;
> import org.apache.spark.ml.util.DefaultParamsReader.Metadata;
> import org.apache.spark.sql.Dataset;
> import org.apache.spark.sql.Row;
> import org.apache.spark.sql.RowFactory;
> import org.apache.spark.sql.SQLContext;
> import org.apache.spark.sql.SparkSession;
> import org.apache.spark.sql.types.DataTypes;
> import org.apache.spark.sql.types.StringType;
> import org.apache.spark.sql.types.StructField;
> import org.apache.spark.sql.types.StructType;
>  
> public class SparkHiveConnection {
>                 public static void main(String[] args) {
>                                 SparkHiveConnection app = new SparkHiveConnection();
>                                 try
> {                                                 app.start_1();                                 }
> catch (Exception e)
> {                                                 e.printStackTrace();                                 }
>                 }
>  
>  
>  
>                 private void start_1() {
>  
>                                 SparkConf conf = new SparkConf().setAppName("Checkpoint").setMaster("local[*]");
>  
>                                 SparkContext sparkContext = new SparkContext(conf);
>  
>                                 // We need to specify where Spark will save the checkpoint file. It can
>                                 // be an HDFS location.
>                                 sparkContext.setCheckpointDir("/tmp");
>                                 //
>                                 // SparkSession spark =
>                                 // SparkSession.builder().appName("SparkHiveExample").master("local[*]").getOrCreate();
>  
>                                 String hiveLocation = "jdbc:hive2://1XXXXXXXXXXXXX:2181,1XXXXXXXXXY:2181,1XXXXXXXXXXXZ:2181/;serviceDiscoveryMode=zookeeper;zookeeperNameSpace=hiveserver2";
>                                
>  
>                                 SparkSession spark = SparkSession.builder().appName("SparkHiveExample")
>                                                                 // .master("local[*]")
>                                                                 // .config("spark.sql.warehouse.dir", hiveLocation)
>                                                                 // .config("hive.metastore.uris",hiveLocation)//
>                                                                 // "thrift://localhost:9083"
>                                                                 // .config("hive.mapred.supports.subdirectories", "true")
>                                                                 // .config("spark.driver.allowMultipleContexts", "true")
>                                                                 // .config("mapreduce.input.fileinputformat.input.dir.recursive",
>                                                                 // "true")
>                                                                 // .config("checkpointLocation", "/tmp/hive") // <-- checkpoint
>                                                                 // directory
>                                                                 // .config(" spark.sql.warehouse.dir",
>                                                                 // "//1x.x.x.x/apps/hive/warehouse")
>                                                                 // .enableHiveSupport()
>                                                                 .getOrCreate();
>  
>                                
>                                
>                                 StructType schema = DataTypes.createStructType(new StructField[]
> {                             DataTypes.createStructField("AAAA",  DataTypes.StringType, true),                             DataTypes.createStructField("BBBB", DataTypes.TimestampType, true),                             DataTypes.createStructField("CCCCC", DataTypes.TimestampType, true),                             DataTypes.createStructField("DDDDD", DataTypes.StringType, true),                             DataTypes.createStructField("EEEEEEEE", DataTypes.StringType, true),                             DataTypes.createStructField("FFFFFFFFFFFF", DataTypes.StringType, true),                             DataTypes.createStructField("GGGGGGGGGGGG", DataTypes.StringType, true),                             DataTypes.createStructField("HHHHHHHHH", DataTypes.StringType, true)                     }
> );
>                                
>                                
>                                
>                                
>                                 System.out.print("SQL Session--------------------");
>  
>                                 Dataset<Row> jdbcDF = spark.read()
>                                                                 .format("jdbc")
>                                                                 .option("url", hiveLocation)
>                                                                 .option("dbtable", "schema.TableName")
>                                                                 .option("user", "username")
>                                                                 .option("password", "password")
>                                                                 .option("fetchsize", "20")
>                                                                 .option("inferSchema", false)
>                                                                 //.schema(schema)
>                                                                 // .option("driver", "org.apache.hadoop.hive.jdbc.HiveDriver")
>                                                                 .load();
>  
>                                  
>                                 System.out.print("able to connect------------------ ");
>  
>                                 jdbcDF.printSchema();
>  
>                                 System.out.print("Results ------------------");
>                                
>                                
>                                 jdbcDF.select("columnName").alias("alias").limit(2).show();
>  
>  
>                 }
>                
>                
>  
> }
>  



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

---------------------------------------------------------------------
To unsubscribe, e-mail: issues-unsubscribe@spark.apache.org
For additional commands, e-mail: issues-help@spark.apache.org