You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@spark.apache.org by "N.Venkata Naga Ravi" <nv...@hotmail.com> on 2014/06/29 18:46:32 UTC
Spark Streaming with HBase
Hi,
Is there any example provided for Spark Streaming with Input provided from HBase table content.
Thanks,
Ravi
Re: Spark Streaming with HBase
Posted by Akhil Das <ak...@sigmoidanalytics.com>.
Something like this???
import java.util.List;
import org.apache.commons.configuration.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.client.Get;
import org.apache.hadoop.hbase.client.HTable;
import org.apache.hadoop.hbase.client.Result;
import org.apache.hadoop.hbase.util.Bytes;
import org.apache.spark.SparkConf;
import org.apache.spark.SparkContext;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.rdd.NewHadoopRDD;
import org.apache.spark.streaming.Duration;
import org.apache.spark.streaming.StreamingContext;
import org.apache.spark.streaming.api.java.JavaStreamingContext;
import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
import org.apache.hadoop.hbase.mapreduce.TableInputFormat;
import com.google.common.collect.Lists;
import scala.Function1;
import scala.collection.JavaConverters.*;
import scala.reflect.ClassTag;
public class SparkHBaseMain {
public static void main(String[] arg){
try{
List<String> jars =
Lists.newArrayList("/home/akhld/mobi/localcluster/x/spark-0.9.1-bin-hadoop2/assembly/target/scala-2.10/spark-assembly-0.9.1-hadoop2.2.0.jar",
"/home/akhld/Downloads/hbase-server-0.96.0-hadoop2.jar",
"/home/akhld/Downloads/hbase-protocol-0.96.0-hadoop2.jar",
"/home/akhld/Downloads/hbase-hadoop2-compat-0.96.0-hadoop2.jar",
"/home/akhld/Downloads/hbase-common-0.96.0-hadoop2.jar",
"/home/akhld/Downloads/hbase-client-0.96.0-hadoop2.jar",
"/home/akhld/Downloads/htrace-core-2.02.jar");
SparkConf spconf = new SparkConf();
spconf.setMaster("local");
spconf.setAppName("HBaser");
spconf.setSparkHome("/home/akhld/mobi/localcluster/x/spark-0.9.1-bin-hadoop2");
spconf.setJars(jars.toArray(new String[jars.size()]));
spconf.set("spark.executor.memory", "1g");
JavaStreamingContext jsc = new JavaStreamingContext(spconf,new
Duration(10000));
org.apache.hadoop.conf.Configuration conf = HBaseConfiguration.create();
conf.addResource("/home/akhld/mobi/temp/sung/hbase-site.xml");
conf.set(TableInputFormat.INPUT_TABLE, "blogposts");
NewHadoopRDD<ImmutableBytesWritable, Result> rdd2 =
new NewHadoopRDD<ImmutableBytesWritable, Result>
(jsc.ssc().sc(), TableInputFormat.class, ImmutableBytesWritable.class,
Result.class, conf);
System.out.println(rdd2.count());
jsc.start();
}catch(Exception e){
e.printStackTrace();
System.out.println("Craaaashed : " + e);
}
}
}
Thanks
Best Regards
On Sun, Jun 29, 2014 at 10:16 PM, N.Venkata Naga Ravi <nv...@hotmail.com>
wrote:
> Hi,
>
> Is there any example provided for Spark Streaming with Input provided from
> HBase table content.
>
> Thanks,
> Ravi
>