You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by Alchemist <al...@gmail.com> on 2018/05/24 03:47:59 UTC
Write data from Hbase using Spark Failing with NPE
aI am using Spark to write data to Hbase, I can read data just fine but write is failing with following exception. I found simila issue that got resolved by adding *site.xml and hbase JARs. But it is npot working for me.
JavaPairRDD<ImmutableBytesWritable, Put> tablePuts = hBaseRDD.mapToPair(new PairFunction<Tuple2<ImmutableBytesWritable, Result>, ImmutableBytesWritable, Put>() { @Override public Tuple2<ImmutableBytesWritable, Put> call(Tuple2<ImmutableBytesWritable, Result> results) throws Exception { byte[] accountId = results._2().getValue(Bytes.toBytes(COLFAMILY), Bytes.toBytes("accountId")); String rowKey = new String(results._2().getRow());
String accountId2 = (Bytes.toString(accountId)); String vbMedia2 = Bytes.toString(vbmedia); System.out.println("************ accountId " + accountId2);
//int prefix = getHash(rowKey); String prefix = getMd5Hash(rowKey); String newrowKey = prefix + rowKey; System.out.println("************ newrowKey &&&&&&&&&&&&&&&" + newrowKey); LOG.info("************ newrowKey &&&&&&&&&&&&&&&" + newrowKey); // Add a single cell def:vbmedia Put put = new Put( Bytes.toBytes(newrowKey) ); put.addColumn(Bytes.toBytes("def"), Bytes.toBytes("accountId"), accountId); } }); Job newAPIJobConfiguration = Job.getInstance(conf); newAPIJobConfiguration.getConfiguration().set(TableOutputFormat.OUTPUT_TABLE, OUT_TABLE_NAME); newAPIJobConfiguration.setOutputFormatClass(org.apache.hadoop.hbase.mapreduce.TableOutputFormat.class); newAPIJobConfiguration.setOutputKeyClass(org.apache.hadoop.hbase.io.ImmutableBytesWritable.class); newAPIJobConfiguration.setOutputValueClass(org.apache.hadoop.io.Writable.class); tablePuts.saveAsNewAPIHadoopDataset(newAPIJobConfiguration.getConfiguration());
Exception in thread "main" java.lang.NullPointerException at org.apache.hadoop.hbase.security.UserProvider.instantiate(UserProvider.java:123) at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:214) at org.apache.hadoop.hbase.client.ConnectionFactory.createConnection(ConnectionFactory.java:119) at org.apache.hadoop.hbase.mapreduce.TableOutputFormat.checkOutputSpecs(TableOutputFormat.java:177) at org.apache.spark.internal.io.HadoopMapReduceWriteConfigUtil.assertConf(SparkHadoopWriter.scala:387) at org.apache.spark.internal.io.SparkHadoopWriter$.write(SparkHadoopWriter.scala:71) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply$mcV$sp(PairRDDFunctions.scala:1083) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1081) at org.apache.spark.rdd.PairRDDFunctions$$anonfun$saveAsNewAPIHadoopDataset$1.apply(PairRDDFunctions.scala:1081) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:151) at org.apache.spark.rdd.RDDOperationScope$.withScope(RDDOperationScope.scala:112) at org.apache.spark.rdd.RDD.withScope(RDD.scala:363) at org.apache.spark.rdd.PairRDDFunctions.saveAsNewAPIHadoopDataset(PairRDDFunctions.scala:1081) at org.apache.spark.api.java.JavaPairRDD.saveAsNewAPIHadoopDataset(JavaPairRDD.scala:831) at com.voicebase.etl.s3tohbase.HbaseScan2.main(HbaseScan2.java:148) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:498) at org.apache.spark.deploy.JavaMainApplication.start(SparkApplication.scala:52) at org.apache.spark.deploy.SparkSubmit$.org$apache$spark$deploy$SparkSubmit$$runMain(SparkSubmit.scala:879) at org.apache.spark.deploy.SparkSubmit$.doRunMain$1(SparkSubmit.scala:197) at org.apache.spark.deploy.SparkSubmit$.submit(SparkSubmit.scala:227) at org.apache.spark.deploy.SparkSubmit$.main(SparkSubmit.scala:136) at org.apache.spark.deploy.SparkSubmit.main(SparkSubmit.scala)