You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@hbase.apache.org by Margusja <ma...@roo.ee> on 2014/04/10 10:12:02 UTC

This server is in the failed servers list

Hi
I have java code:

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hbase.HBaseConfiguration;
import org.apache.hadoop.hbase.HColumnDescriptor;
import org.apache.hadoop.hbase.HTableDescriptor;
import org.apache.hadoop.hbase.client.HBaseAdmin;
import org.apache.hadoop.hbase.util.Bytes;

public class Hbase_connect {

         public static void main(String[] args) throws Exception {
                 Configuration conf = HBaseConfiguration.create();
                 conf.set("hbase.zookeeper.quorum", 
"sandbox.hortonworks.com");
                 conf.set("hbase.zookeeper.property.clientPort", "2181");
                 conf.set("hbase.rootdir", 
"hdfs://sandbox.hortonworks.com:8020/apps/hbase/data");
                 conf.set("zookeeper.znode.parent", "/hbase-unsecure");
                 HBaseAdmin admin = new HBaseAdmin(conf);
                 HTableDescriptor[] tabdesc = admin.listTables();
                 for(int i=0; i<tabdesc.length; i++) {
                         System.out.println("Table = " + new 
String(tabdesc [i].getName()));
                 }
         }
}

^C[hbase@sandbox hbase_connect]$ ls -lah libs/
total 80M
drwxr-xr-x 3 hbase hadoop 4.0K Apr  5 10:42 .
drwxr-xr-x 3 hbase hadoop 4.0K Apr  5 11:02 ..
-rw-r--r-- 1 hbase hadoop 2.5K Oct  6 23:39 hadoop-client-2.2.0.jar
-rw-r--r-- 1 hbase hadoop 4.1M Jul 24  2013 hadoop-core-1.2.1.jar
drwxr-xr-x 4 hbase hadoop 4.0K Apr  5 09:40 hbase-0.96.2-hadoop2
-rw-r--r-- 1 hbase hadoop  76M Apr  3 16:18 hbase-0.96.2-hadoop2-bin.tar.gz

[hbase@sandbox hbase_connect]$ java -cp 
./:./libs/*:./libs/hbase-0.96.2-hadoop2/lib/* Hbase_connect
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
environment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 GMT
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
environment:host.name=sandbox.hortonworks.com
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
environment:java.version=1.6.0_30
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
environment:java.vendor=Sun Microsystems Inc.
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
environment:java.home=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
environment:java.class.path=./:./libs/hadoop-client-2.2.0.jar:./libs/hadoop-core-1.2.1.jar:./libs/hbase-0.96.2-hadoop2/lib/management-api-3.0.0-b012.jar:./libs/hbase-0.96.2-hadoop2/lib/jackson-core-asl-1.8.8.jar:./libs/hbase-0.96.2-hadoop2/lib/slf4j-log4j12-1.6.4.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-server-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jsp-2.1-6.1.14.jar:./libs/hbase-0.96.2-hadoop2/lib/log4j-1.2.17.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-core-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-codec-1.7.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-jobclient-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-common-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-server-1.8.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-it-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-el-1.0.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-collections-3.2.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-common-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-grizzly2-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/protobuf-java-2.5.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-client-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-common-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-api-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-app-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hamcrest-core-1.3.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-beanutils-core-1.8.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-client-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/slf4j-api-1.6.4.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-compress-1.4.1.jar:./libs/hbase-0.96.2-hadoop2/lib/xmlenc-0.52.jar:./libs/hbase-0.96.2-hadoop2/lib/javax.servlet-api-3.0.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-hadoop-compat-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-http-servlet-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-io-2.4.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-guice-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/findbugs-annotations-1.3.9-1.jar:./libs/hbase-0.96.2-hadoop2/lib/avro-1.7.4.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-testing-util-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-configuration-1.6.jar:./libs/hbase-0.96.2-hadoop2/lib/zookeeper-3.4.5.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-server-0.96.2-hadoop2-tests.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-json-1.8.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-logging-1.1.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-server-common-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/gmbal-api-only-3.0.0-b023.jar:./libs/hbase-0.96.2-hadoop2/lib/jasper-compiler-5.5.23.jar:./libs/hbase-0.96.2-hadoop2/lib/jasper-runtime-5.5.23.jar:./libs/hbase-0.96.2-hadoop2/lib/guava-12.0.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-prefix-tree-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jetty-6.1.26.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-math-2.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-hdfs-2.2.0-tests.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-test-framework-grizzly2-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-client-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/jaxb-api-2.2.2.jar:./libs/hbase-0.96.2-hadoop2/lib/jackson-jaxrs-1.8.8.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-http-server-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/metrics-core-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-core-1.8.jar:./libs/hbase-0.96.2-hadoop2/lib/jruby-complete-1.6.8.jar:./libs/hbase-0.96.2-hadoop2/lib/javax.servlet-3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-httpclient-3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-hdfs-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-shuffle-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jaxb-impl-2.2.3-1.jar:./libs/hbase-0.96.2-hadoop2/lib/paranamer-2.3.jar:./libs/hbase-0.96.2-hadoop2/lib/xz-1.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-shell-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jsp-api-2.1-6.1.14.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-cli-1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/aopalliance-1.0.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-rcm-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-auth-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/snappy-java-1.0.4.1.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-lang-2.6.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-daemon-1.0.13.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-jobclient-2.2.0-tests.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-test-framework-core-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/jettison-1.3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/jackson-mapper-asl-1.8.8.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-net-3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-common-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-annotations-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-protocol-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-beanutils-1.7.0.jar:./libs/hbase-0.96.2-hadoop2/lib/servlet-api-2.5-6.1.14.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-hadoop2-compat-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-client-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-server-nodemanager-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jsr305-1.3.9.jar:./libs/hbase-0.96.2-hadoop2/lib/jetty-sslengine-6.1.26.jar:./libs/hbase-0.96.2-hadoop2/lib/libthrift-0.9.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jsch-0.1.42.jar:./libs/hbase-0.96.2-hadoop2/lib/guice-servlet-3.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jackson-xc-1.8.8.jar:./libs/hbase-0.96.2-hadoop2/lib/netty-3.6.6.Final.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-digester-1.8.jar:./libs/hbase-0.96.2-hadoop2/lib/httpcore-4.1.3.jar:./libs/hbase-0.96.2-hadoop2/lib/javax.inject-1.jar:./libs/hbase-0.96.2-hadoop2/lib/activation-1.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-thrift-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jamon-runtime-2.3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/htrace-core-2.04.jar:./libs/hbase-0.96.2-hadoop2/lib/guice-3.0.jar:./libs/hbase-0.96.2-hadoop2/lib/junit-4.11.jar:./libs/hbase-0.96.2-hadoop2/lib/jets3t-0.6.1.jar:./libs/hbase-0.96.2-hadoop2/lib/asm-3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-examples-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-http-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-framework-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/jetty-util-6.1.26.jar:./libs/hbase-0.96.2-hadoop2/lib/httpclient-4.1.3.jar
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
environment:java.library.path=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64/server:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
environment:java.io.tmpdir=/tmp
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
environment:java.compiler=<NA>
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client environment:os.name=Linux
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client environment:os.arch=amd64
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
environment:os.version=2.6.32-431.11.2.el6.x86_64
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
environment:user.name=hbase
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
environment:user.home=/home/hbase
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
environment:user.dir=/home/hbase/hbase_connect
14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Initiating client 
connection, connectString=sandbox.hortonworks.com:2181 
sessionTimeout=90000 watcher=hconnection-0x35ec28b7, 
quorum=sandbox.hortonworks.com:2181, baseZNode=/hbase-unsecure
14/04/05 11:03:03 INFO zookeeper.RecoverableZooKeeper: Process 
identifier=hconnection-0x35ec28b7 connecting to ZooKeeper 
ensemble=sandbox.hortonworks.com:2181
14/04/05 11:03:03 INFO zookeeper.ClientCnxn: Opening socket connection 
to server sandbox.hortonworks.com/10.0.2.15:2181. Will not attempt to 
authenticate using SASL (unknown error)
14/04/05 11:03:03 INFO zookeeper.ClientCnxn: Socket connection 
established to sandbox.hortonworks.com/10.0.2.15:2181, initiating session
14/04/05 11:03:03 INFO zookeeper.ClientCnxn: Session establishment 
complete on server sandbox.hortonworks.com/10.0.2.15:2181, sessionid = 
0x1453145e9500056, negotiated timeout = 40000
14/04/05 11:03:04 INFO 
client.HConnectionManager$HConnectionImplementation: getMaster attempt 1 
of 35 failed; retrying after sleep of 100, 
exception=com.google.protobuf.ServiceException: 
org.apache.hadoop.hbase.DoNotRetryIOException: 
java.lang.NoSuchMethodError: 
org.apache.hadoop.net.NetUtils.getInputStream(Ljava/net/Socket;)Lorg/apache/hadoop/net/SocketInputWrapper;
14/04/05 11:03:04 INFO 
client.HConnectionManager$HConnectionImplementation: getMaster attempt 2 
of 35 failed; retrying after sleep of 201, 
exception=com.google.protobuf.ServiceException: 
org.apache.hadoop.hbase.ipc.RpcClient$FailedServerException: This server 
is in the failed servers list: sandbox.hortonworks.com/10.0.2.15:60000
14/04/05 11:03:04 INFO 
client.HConnectionManager$HConnectionImplementation: getMaster attempt 3 
of 35 failed; retrying after sleep of 300, 
exception=com.google.protobuf.ServiceException: 
org.apache.hadoop.hbase.ipc.RpcClient$FailedServerException: This server 
is in the failed servers list: sandbox.hortonworks.com/10.0.2.15:60000
14/04/05 11:03:05 INFO 
client.HConnectionManager$HConnectionImplementation: getMaster attempt 4 
of 35 failed; retrying after sleep of 500, 
exception=com.google.protobuf.ServiceException: 
org.apache.hadoop.hbase.ipc.RpcClient$FailedServerException: This server 
is in the failed servers list: sandbox.hortonworks.com/10.0.2.15:60000
14/04/05 11:03:05 INFO 
client.HConnectionManager$HConnectionImplementation: getMaster attempt 5 
of 35 failed; retrying after sleep of 1001, 
exception=com.google.protobuf.ServiceException: 
org.apache.hadoop.hbase.ipc.RpcClient$FailedServerException: This server 
is in the failed servers list: sandbox.hortonworks.com/10.0.2.15:60000
14/04/05 11:03:06 INFO 
client.HConnectionManager$HConnectionImplementation: getMaster attempt 6 
of 35 failed; retrying after sleep of 2014, 
exception=com.google.protobuf.ServiceException: 
org.apache.hadoop.hbase.DoNotRetryIOException: 
java.lang.NoSuchMethodError: 
org.apache.hadoop.net.NetUtils.getInputStream(Ljava/net/Socket;)Lorg/apache/hadoop/net/SocketInputWrapper;
14/04/05 11:03:08 INFO 
client.HConnectionManager$HConnectionImplementation: getMaster attempt 7 
of 35 failed; retrying after sleep of 4027, 
exception=com.google.protobuf.ServiceException: 
org.apache.hadoop.hbase.DoNotRetryIOException: 
java.lang.NoSuchMethodError: 
org.apache.hadoop.net.NetUtils.getInputStream(Ljava/net/Socket;)Lorg/apache/hadoop/net/SocketInputWrapper;


[hbase@sandbox hbase_connect]$ jps
4355 HMaster
5335 Jps
4711 HRegionServer
4715 ThriftServer
4717 RESTServer

tcp 0 0 0.0.0.0:2181 0.0.0.0:* LISTEN
tcp 0 0 10.0.2.15:60000 0.0.0.0:* LISTEN 4355/java

[root@sandbox ~]# cat /etc/hosts
127.0.0.1       localhost.localdomain localhost
10.0.2.15       sandbox.hortonworks.com sandbox

Any hints?

-- 
Best regards, Margus (Margusja) Roo
+372 51 48 780
http://margus.roo.ee
http://ee.linkedin.com/in/margusroo
skype: margusja
ldapsearch -x -h ldap.sk.ee -b c=EE "(serialNumber=37303140314)"



Re: Lease exception when I execute large scan with filters.

Posted by Guillermo Ortiz <ko...@gmail.com>.
Okay, thank you, I'll check it this Monday. I didn't know that Scan checks
all the versions.
So, I was checking each column and each version although it just showed me
the newest version because I didn't indicate anything about the VERSIONS
attribute. It makes sense that it takes so long.


2014-04-11 16:57 GMT+02:00 Ted Yu <yu...@gmail.com>:

> In your previous example:
> scan 'table1', {FILTER => "ValueFilter(=, 'binary:5')"}
>
> there was no expression w.r.t. timestamp. See the following javadoc from
> Scan.java:
>
>  * To only retrieve columns within a specific range of version timestamps,
>
>  * execute {@link #setTimeRange(long, long) setTimeRange}.
>
>  * <p>
>
>  * To only retrieve columns with a specific timestamp, execute
>
>  * {@link #setTimeStamp(long) setTimestamp}.
>
> You can use one of the above methods to make your scan more selective.
>
>
> ValueFilter#filterKeyValue(Cell) doesn't utilize advanced feature of
> ReturnCode. You can refer to:
>
>
> https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/Filter.ReturnCode.html
>
> You can take a look at SingleColumnValueFilter#filterKeyValue() for example
> of how various ReturnCode's are used to speed up scan.
>
> Cheers
>
>
> On Fri, Apr 11, 2014 at 8:40 AM, Guillermo Ortiz <konstt2000@gmail.com
> >wrote:
>
> > I read something interesting about it in HBase TDG.
> >
> > Page 344:
> > The StoreScanner class combines the store files and memstore that the
> > Store instance
> > contains. It is also where the exclusion happens, based on the Bloom
> > filter, or the timestamp. If you are asking for versions that are not
> more
> > than 30 minutes old, for example, you can skip all storage files that are
> > older than one hour: they will not contain anything of interest. See "Key
> > Design" on page 357 for details on the exclusion, and how to make use of
> > it.
> >
> > So, I guess that it doesn't have to read all the HFiles?? But, I don't
> know
> > if HBase really uses the timestamp of each row or the date of the file. I
> > guess when I execute the scan, it reads everything, but, I don't know
> why.
> > I think there's something else that I don't see so that everything works
> to
> > me.
> >
> >
> > 2014-04-11 13:05 GMT+02:00 gortiz <go...@pragsis.com>:
> >
> > > Sorry, I didn't get it why it should read all the timestamps and not
> just
> > > the newest it they're sorted and you didn't specific any timestamp in
> > your
> > > filter.
> > >
> > >
> > >
> > > On 11/04/14 12:13, Anoop John wrote:
> > >
> > >> In the storage layer (HFiles in HDFS) all versions of a particular
> cell
> > >> will be staying together.  (Yes it has to be lexicographically ordered
> > >> KVs). So during a scan we will have to read all the version data.  At
> > this
> > >> storage layer it doesn't know the versions stuff etc.
> > >>
> > >> -Anoop-
> > >>
> > >> On Fri, Apr 11, 2014 at 3:33 PM, gortiz <go...@pragsis.com> wrote:
> > >>
> > >>  Yes, I have tried with two different values for that value of
> versions,
> > >>> 1000 and maximum value for integers.
> > >>>
> > >>> But, I want to keep those versions. I don't want to keep just 3
> > versions.
> > >>> Imagine that I want to record a new version each minute and store a
> > day,
> > >>> those are 1440 versions.
> > >>>
> > >>> Why is HBase going to read all the versions?? , I thought, if you
> don't
> > >>> indicate any versions it's just read the newest and skip the rest. It
> > >>> doesn't make too much sense to read all of them if data is sorted,
> plus
> > >>> the
> > >>> newest version is stored in the top.
> > >>>
> > >>>
> > >>>
> > >>> On 11/04/14 11:54, Anoop John wrote:
> > >>>
> > >>>    What is the max version setting u have done for ur table cf?
>  When u
> > >>>> set
> > >>>> some a value, HBase has to keep all those versions.  During a scan
> it
> > >>>> will
> > >>>> read all those versions. In 94 version the default value for the max
> > >>>> versions is 3.  I guess you have set some bigger value.   If u have
> > not,
> > >>>> mind testing after a major compaction?
> > >>>>
> > >>>> -Anoop-
> > >>>>
> > >>>> On Fri, Apr 11, 2014 at 1:01 PM, gortiz <go...@pragsis.com> wrote:
> > >>>>
> > >>>>   Last test I have done it's to reduce the number of versions to
> 100.
> > >>>>
> > >>>>> So, right now, I have 100 rows with 100 versions each one.
> > >>>>> Times are: (I got the same times for blocksize of 64Ks and 1Mb)
> > >>>>> 100row-1000versions + blockcache-> 80s.
> > >>>>> 100row-1000versions + No blockcache-> 70s.
> > >>>>>
> > >>>>> 100row-*100*versions + blockcache-> 7.3s.
> > >>>>> 100row-*100*versions + No blockcache-> 6.1s.
> > >>>>>
> > >>>>> What's the reasons of this? I guess HBase is enough smart for not
> > >>>>> consider
> > >>>>> old versions, so, it just checks the newest. But, I reduce 10 times
> > the
> > >>>>> size (in versions) and I got a 10x of performance.
> > >>>>>
> > >>>>> The filter is scan 'filters', {FILTER => "ValueFilter(=,
> > >>>>> 'binary:5')",STARTROW =>
> '1010000000000000000000000000000000000101',
> > >>>>> STOPROW => '6010000000000000000000000000000000000201'}
> > >>>>>
> > >>>>>
> > >>>>>
> > >>>>> On 11/04/14 09:04, gortiz wrote:
> > >>>>>
> > >>>>>   Well, I guessed that, what it doesn't make too much sense because
> > >>>>> it's
> > >>>>>
> > >>>>>> so
> > >>>>>> slow. I only have right now 100 rows with 1000 versions each row.
> > >>>>>> I have checked the size of the dataset and each row is about
> > 700Kbytes
> > >>>>>> (around 7Gb, 100rowsx1000versions). So, it should only check 100
> > rows
> > >>>>>> x
> > >>>>>> 700Kbytes = 70Mb, since it just check the newest version. How can
> it
> > >>>>>> spend
> > >>>>>> too many time checking this quantity of data?
> > >>>>>>
> > >>>>>> I'm generating again the dataset with a bigger blocksize
> (previously
> > >>>>>> was
> > >>>>>> 64Kb, now, it's going to be 1Mb). I could try tunning the scanning
> > and
> > >>>>>> baching parameters, but I don't think they're going to affect too
> > >>>>>> much.
> > >>>>>>
> > >>>>>> Another test I want to do, it's generate the same dataset with
> just
> > >>>>>> 100versions, It should spend around the same time, right? Or am I
> > >>>>>> wrong?
> > >>>>>>
> > >>>>>> On 10/04/14 18:08, Ted Yu wrote:
> > >>>>>>
> > >>>>>>   It should be newest version of each value.
> > >>>>>>
> > >>>>>>> Cheers
> > >>>>>>>
> > >>>>>>>
> > >>>>>>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com>
> > wrote:
> > >>>>>>>
> > >>>>>>> Another little question is, when the filter I'm using, Do I check
> > all
> > >>>>>>> the
> > >>>>>>>
> > >>>>>>>    versions? or just the newest? Because, I'm wondering if when I
> > do
> > >>>>>>>> a
> > >>>>>>>> scan
> > >>>>>>>> over all the table, I look for the value "5" in all the dataset
> or
> > >>>>>>>> I'm
> > >>>>>>>> just
> > >>>>>>>> looking for in one newest version of each value.
> > >>>>>>>>
> > >>>>>>>>
> > >>>>>>>> On 10/04/14 16:52, gortiz wrote:
> > >>>>>>>>
> > >>>>>>>> I was trying to check the behaviour of HBase. The cluster is a
> > group
> > >>>>>>>> of
> > >>>>>>>>
> > >>>>>>>>  old computers, one master, five slaves, each one with 2Gb, so,
> > 12gb
> > >>>>>>>>> in
> > >>>>>>>>> total.
> > >>>>>>>>> The table has a column family with 1000 columns and each column
> > >>>>>>>>> with
> > >>>>>>>>> 100
> > >>>>>>>>> versions.
> > >>>>>>>>> There's another column faimily with four columns an one image
> of
> > >>>>>>>>> 100kb.
> > >>>>>>>>>     (I've tried without this column family as well.)
> > >>>>>>>>> The table is partitioned manually in all the slaves, so data
> are
> > >>>>>>>>> balanced
> > >>>>>>>>> in the cluster.
> > >>>>>>>>>
> > >>>>>>>>> I'm executing this sentence *scan 'table1', {FILTER =>
> > >>>>>>>>> "ValueFilter(=,
> > >>>>>>>>> 'binary:5')"* in HBase 0.94.6
> > >>>>>>>>> My time for lease and rpc is three minutes.
> > >>>>>>>>> Since, it's a full scan of the table, I have been playing with
> > the
> > >>>>>>>>> BLOCKCACHE as well (just disable and enable, not about the size
> > of
> > >>>>>>>>> it). I
> > >>>>>>>>> thought that it was going to have too much calls to the GC. I'm
> > not
> > >>>>>>>>> sure
> > >>>>>>>>> about this point.
> > >>>>>>>>>
> > >>>>>>>>> I know that it's not the best way to use HBase, it's just a
> > test. I
> > >>>>>>>>> think
> > >>>>>>>>> that it's not working because the hardware isn't enough,
> > although,
> > >>>>>>>>> I
> > >>>>>>>>> would
> > >>>>>>>>> like to try some kind of tunning to improve it.
> > >>>>>>>>>
> > >>>>>>>>>
> > >>>>>>>>>
> > >>>>>>>>>
> > >>>>>>>>>
> > >>>>>>>>>
> > >>>>>>>>>
> > >>>>>>>>>
> > >>>>>>>>> On 10/04/14 14:21, Ted Yu wrote:
> > >>>>>>>>>
> > >>>>>>>>> Can you give us a bit more information:
> > >>>>>>>>>
> > >>>>>>>>>  HBase release you're running
> > >>>>>>>>>> What filters are used for the scan
> > >>>>>>>>>>
> > >>>>>>>>>> Thanks
> > >>>>>>>>>>
> > >>>>>>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com>
> wrote:
> > >>>>>>>>>>
> > >>>>>>>>>>     I got this error when I execute a full scan with filters
> > >>>>>>>>>> about a
> > >>>>>>>>>> table.
> > >>>>>>>>>>
> > >>>>>>>>>> Caused by: java.lang.RuntimeException:
> org.apache.hadoop.hbase.
> > >>>>>>>>>>
> > >>>>>>>>>>> regionserver.LeaseException:
> > >>>>>>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
> > >>>>>>>>>>> '-4165751462641113359' does not exist
> > >>>>>>>>>>>        at org.apache.hadoop.hbase.regionserver.Leases.
> > >>>>>>>>>>> removeLease(Leases.java:231)
> > >>>>>>>>>>>
> > >>>>>>>>>>>
> > >>>>>>>>>>>        at org.apache.hadoop.hbase.regionserver.HRegionServer.
> > >>>>>>>>>>> next(HRegionServer.java:2482)
> > >>>>>>>>>>>        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
> > >>>>>>>>>>> Method)
> > >>>>>>>>>>>        at sun.reflect.NativeMethodAccessorImpl.invoke(
> > >>>>>>>>>>> NativeMethodAccessorImpl.java:39)
> > >>>>>>>>>>>        at sun.reflect.DelegatingMethodAccessorImpl.invoke(
> > >>>>>>>>>>> DelegatingMethodAccessorImpl.java:25)
> > >>>>>>>>>>>        at java.lang.reflect.Method.invoke(Method.java:597)
> > >>>>>>>>>>>        at org.apache.hadoop.hbase.ipc.
> > >>>>>>>>>>> WritableRpcEngine$Server.call(
> > >>>>>>>>>>> WritableRpcEngine.java:320)
> > >>>>>>>>>>>        at
> org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
> > >>>>>>>>>>> HBaseServer.java:1428)
> > >>>>>>>>>>>
> > >>>>>>>>>>> I have read about increase the lease time and rpc time, but
> > it's
> > >>>>>>>>>>> not
> > >>>>>>>>>>> working.. what else could I try?? The table isn't too big. I
> > have
> > >>>>>>>>>>> been
> > >>>>>>>>>>> checking the logs from GC, HMaster and some RegionServers
> and I
> > >>>>>>>>>>> didn't see
> > >>>>>>>>>>> anything weird. I tried as well to try with a couple of
> caching
> > >>>>>>>>>>> values.
> > >>>>>>>>>>>
> > >>>>>>>>>>>
> > >>>>>>>>>>> --
> > >>>>>>>>>>>
> > >>>>>>>>>> *Guillermo Ortiz*
> > >>>>>>>> /Big Data Developer/
> > >>>>>>>>
> > >>>>>>>> Telf.: +34 917 680 490<https://mail.google.com/
> > >>>>>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
> > >>>>>>>> Fax: +34 913 833 301<https://mail.google.com/
> > >>>>>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
> > >>>>>>>>
> > >>>>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> > >>>>>>>>
> > >>>>>>>> _http://www.bidoop.es_
> > >>>>>>>>
> > >>>>>>>>
> > >>>>>>>>
> > >>>>>>>> --
> > >>>>>>>>
> > >>>>>>> *Guillermo Ortiz*
> > >>>>> /Big Data Developer/
> > >>>>>
> > >>>>> Telf.: +34 917 680 490<https://mail.google.com/
> > >>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
> > >>>>> Fax: +34 913 833 301<https://mail.google.com/
> > >>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
> > >>>>>
> > >>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> > >>>>>
> > >>>>> _http://www.bidoop.es_
> > >>>>>
> > >>>>>
> > >>>>>
> > >>>>>  --
> > >>> *Guillermo Ortiz*
> > >>> /Big Data Developer/
> > >>>
> > >>> Telf.: +34 917 680 490<https://mail.google.com/mail/
> > >>> u/0/html/compose/static_files/blank_quirks.html#>
> > >>> Fax: +34 913 833 301<https://mail.google.com/mail/
> > >>> u/0/html/compose/static_files/blank_quirks.html#>
> > >>>   C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> > >>>
> > >>> _http://www.bidoop.es_
> > >>>
> > >>>
> > >>>
> > >
> > > --
> > > *Guillermo Ortiz*
> > > /Big Data Developer/
> > >
> > > Telf.: +34 917 680 490
> > > Fax: +34 913 833 301
> > > C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> > >
> > > _http://www.bidoop.es_
> > >
> > >
> >
>

Re: Lease exception when I execute large scan with filters.

Posted by Ted Yu <yu...@gmail.com>.
In your previous example:
scan 'table1', {FILTER => "ValueFilter(=, 'binary:5')"}

there was no expression w.r.t. timestamp. See the following javadoc from
Scan.java:

 * To only retrieve columns within a specific range of version timestamps,

 * execute {@link #setTimeRange(long, long) setTimeRange}.

 * <p>

 * To only retrieve columns with a specific timestamp, execute

 * {@link #setTimeStamp(long) setTimestamp}.

You can use one of the above methods to make your scan more selective.


ValueFilter#filterKeyValue(Cell) doesn't utilize advanced feature of
ReturnCode. You can refer to:

https://hbase.apache.org/apidocs/org/apache/hadoop/hbase/filter/Filter.ReturnCode.html

You can take a look at SingleColumnValueFilter#filterKeyValue() for example
of how various ReturnCode's are used to speed up scan.

Cheers


On Fri, Apr 11, 2014 at 8:40 AM, Guillermo Ortiz <ko...@gmail.com>wrote:

> I read something interesting about it in HBase TDG.
>
> Page 344:
> The StoreScanner class combines the store files and memstore that the
> Store instance
> contains. It is also where the exclusion happens, based on the Bloom
> filter, or the timestamp. If you are asking for versions that are not more
> than 30 minutes old, for example, you can skip all storage files that are
> older than one hour: they will not contain anything of interest. See "Key
> Design" on page 357 for details on the exclusion, and how to make use of
> it.
>
> So, I guess that it doesn't have to read all the HFiles?? But, I don't know
> if HBase really uses the timestamp of each row or the date of the file. I
> guess when I execute the scan, it reads everything, but, I don't know why.
> I think there's something else that I don't see so that everything works to
> me.
>
>
> 2014-04-11 13:05 GMT+02:00 gortiz <go...@pragsis.com>:
>
> > Sorry, I didn't get it why it should read all the timestamps and not just
> > the newest it they're sorted and you didn't specific any timestamp in
> your
> > filter.
> >
> >
> >
> > On 11/04/14 12:13, Anoop John wrote:
> >
> >> In the storage layer (HFiles in HDFS) all versions of a particular cell
> >> will be staying together.  (Yes it has to be lexicographically ordered
> >> KVs). So during a scan we will have to read all the version data.  At
> this
> >> storage layer it doesn't know the versions stuff etc.
> >>
> >> -Anoop-
> >>
> >> On Fri, Apr 11, 2014 at 3:33 PM, gortiz <go...@pragsis.com> wrote:
> >>
> >>  Yes, I have tried with two different values for that value of versions,
> >>> 1000 and maximum value for integers.
> >>>
> >>> But, I want to keep those versions. I don't want to keep just 3
> versions.
> >>> Imagine that I want to record a new version each minute and store a
> day,
> >>> those are 1440 versions.
> >>>
> >>> Why is HBase going to read all the versions?? , I thought, if you don't
> >>> indicate any versions it's just read the newest and skip the rest. It
> >>> doesn't make too much sense to read all of them if data is sorted, plus
> >>> the
> >>> newest version is stored in the top.
> >>>
> >>>
> >>>
> >>> On 11/04/14 11:54, Anoop John wrote:
> >>>
> >>>    What is the max version setting u have done for ur table cf?  When u
> >>>> set
> >>>> some a value, HBase has to keep all those versions.  During a scan it
> >>>> will
> >>>> read all those versions. In 94 version the default value for the max
> >>>> versions is 3.  I guess you have set some bigger value.   If u have
> not,
> >>>> mind testing after a major compaction?
> >>>>
> >>>> -Anoop-
> >>>>
> >>>> On Fri, Apr 11, 2014 at 1:01 PM, gortiz <go...@pragsis.com> wrote:
> >>>>
> >>>>   Last test I have done it's to reduce the number of versions to 100.
> >>>>
> >>>>> So, right now, I have 100 rows with 100 versions each one.
> >>>>> Times are: (I got the same times for blocksize of 64Ks and 1Mb)
> >>>>> 100row-1000versions + blockcache-> 80s.
> >>>>> 100row-1000versions + No blockcache-> 70s.
> >>>>>
> >>>>> 100row-*100*versions + blockcache-> 7.3s.
> >>>>> 100row-*100*versions + No blockcache-> 6.1s.
> >>>>>
> >>>>> What's the reasons of this? I guess HBase is enough smart for not
> >>>>> consider
> >>>>> old versions, so, it just checks the newest. But, I reduce 10 times
> the
> >>>>> size (in versions) and I got a 10x of performance.
> >>>>>
> >>>>> The filter is scan 'filters', {FILTER => "ValueFilter(=,
> >>>>> 'binary:5')",STARTROW => '1010000000000000000000000000000000000101',
> >>>>> STOPROW => '6010000000000000000000000000000000000201'}
> >>>>>
> >>>>>
> >>>>>
> >>>>> On 11/04/14 09:04, gortiz wrote:
> >>>>>
> >>>>>   Well, I guessed that, what it doesn't make too much sense because
> >>>>> it's
> >>>>>
> >>>>>> so
> >>>>>> slow. I only have right now 100 rows with 1000 versions each row.
> >>>>>> I have checked the size of the dataset and each row is about
> 700Kbytes
> >>>>>> (around 7Gb, 100rowsx1000versions). So, it should only check 100
> rows
> >>>>>> x
> >>>>>> 700Kbytes = 70Mb, since it just check the newest version. How can it
> >>>>>> spend
> >>>>>> too many time checking this quantity of data?
> >>>>>>
> >>>>>> I'm generating again the dataset with a bigger blocksize (previously
> >>>>>> was
> >>>>>> 64Kb, now, it's going to be 1Mb). I could try tunning the scanning
> and
> >>>>>> baching parameters, but I don't think they're going to affect too
> >>>>>> much.
> >>>>>>
> >>>>>> Another test I want to do, it's generate the same dataset with just
> >>>>>> 100versions, It should spend around the same time, right? Or am I
> >>>>>> wrong?
> >>>>>>
> >>>>>> On 10/04/14 18:08, Ted Yu wrote:
> >>>>>>
> >>>>>>   It should be newest version of each value.
> >>>>>>
> >>>>>>> Cheers
> >>>>>>>
> >>>>>>>
> >>>>>>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com>
> wrote:
> >>>>>>>
> >>>>>>> Another little question is, when the filter I'm using, Do I check
> all
> >>>>>>> the
> >>>>>>>
> >>>>>>>    versions? or just the newest? Because, I'm wondering if when I
> do
> >>>>>>>> a
> >>>>>>>> scan
> >>>>>>>> over all the table, I look for the value "5" in all the dataset or
> >>>>>>>> I'm
> >>>>>>>> just
> >>>>>>>> looking for in one newest version of each value.
> >>>>>>>>
> >>>>>>>>
> >>>>>>>> On 10/04/14 16:52, gortiz wrote:
> >>>>>>>>
> >>>>>>>> I was trying to check the behaviour of HBase. The cluster is a
> group
> >>>>>>>> of
> >>>>>>>>
> >>>>>>>>  old computers, one master, five slaves, each one with 2Gb, so,
> 12gb
> >>>>>>>>> in
> >>>>>>>>> total.
> >>>>>>>>> The table has a column family with 1000 columns and each column
> >>>>>>>>> with
> >>>>>>>>> 100
> >>>>>>>>> versions.
> >>>>>>>>> There's another column faimily with four columns an one image of
> >>>>>>>>> 100kb.
> >>>>>>>>>     (I've tried without this column family as well.)
> >>>>>>>>> The table is partitioned manually in all the slaves, so data are
> >>>>>>>>> balanced
> >>>>>>>>> in the cluster.
> >>>>>>>>>
> >>>>>>>>> I'm executing this sentence *scan 'table1', {FILTER =>
> >>>>>>>>> "ValueFilter(=,
> >>>>>>>>> 'binary:5')"* in HBase 0.94.6
> >>>>>>>>> My time for lease and rpc is three minutes.
> >>>>>>>>> Since, it's a full scan of the table, I have been playing with
> the
> >>>>>>>>> BLOCKCACHE as well (just disable and enable, not about the size
> of
> >>>>>>>>> it). I
> >>>>>>>>> thought that it was going to have too much calls to the GC. I'm
> not
> >>>>>>>>> sure
> >>>>>>>>> about this point.
> >>>>>>>>>
> >>>>>>>>> I know that it's not the best way to use HBase, it's just a
> test. I
> >>>>>>>>> think
> >>>>>>>>> that it's not working because the hardware isn't enough,
> although,
> >>>>>>>>> I
> >>>>>>>>> would
> >>>>>>>>> like to try some kind of tunning to improve it.
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> On 10/04/14 14:21, Ted Yu wrote:
> >>>>>>>>>
> >>>>>>>>> Can you give us a bit more information:
> >>>>>>>>>
> >>>>>>>>>  HBase release you're running
> >>>>>>>>>> What filters are used for the scan
> >>>>>>>>>>
> >>>>>>>>>> Thanks
> >>>>>>>>>>
> >>>>>>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
> >>>>>>>>>>
> >>>>>>>>>>     I got this error when I execute a full scan with filters
> >>>>>>>>>> about a
> >>>>>>>>>> table.
> >>>>>>>>>>
> >>>>>>>>>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.
> >>>>>>>>>>
> >>>>>>>>>>> regionserver.LeaseException:
> >>>>>>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
> >>>>>>>>>>> '-4165751462641113359' does not exist
> >>>>>>>>>>>        at org.apache.hadoop.hbase.regionserver.Leases.
> >>>>>>>>>>> removeLease(Leases.java:231)
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>>        at org.apache.hadoop.hbase.regionserver.HRegionServer.
> >>>>>>>>>>> next(HRegionServer.java:2482)
> >>>>>>>>>>>        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
> >>>>>>>>>>> Method)
> >>>>>>>>>>>        at sun.reflect.NativeMethodAccessorImpl.invoke(
> >>>>>>>>>>> NativeMethodAccessorImpl.java:39)
> >>>>>>>>>>>        at sun.reflect.DelegatingMethodAccessorImpl.invoke(
> >>>>>>>>>>> DelegatingMethodAccessorImpl.java:25)
> >>>>>>>>>>>        at java.lang.reflect.Method.invoke(Method.java:597)
> >>>>>>>>>>>        at org.apache.hadoop.hbase.ipc.
> >>>>>>>>>>> WritableRpcEngine$Server.call(
> >>>>>>>>>>> WritableRpcEngine.java:320)
> >>>>>>>>>>>        at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
> >>>>>>>>>>> HBaseServer.java:1428)
> >>>>>>>>>>>
> >>>>>>>>>>> I have read about increase the lease time and rpc time, but
> it's
> >>>>>>>>>>> not
> >>>>>>>>>>> working.. what else could I try?? The table isn't too big. I
> have
> >>>>>>>>>>> been
> >>>>>>>>>>> checking the logs from GC, HMaster and some RegionServers and I
> >>>>>>>>>>> didn't see
> >>>>>>>>>>> anything weird. I tried as well to try with a couple of caching
> >>>>>>>>>>> values.
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>> --
> >>>>>>>>>>>
> >>>>>>>>>> *Guillermo Ortiz*
> >>>>>>>> /Big Data Developer/
> >>>>>>>>
> >>>>>>>> Telf.: +34 917 680 490<https://mail.google.com/
> >>>>>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
> >>>>>>>> Fax: +34 913 833 301<https://mail.google.com/
> >>>>>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
> >>>>>>>>
> >>>>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> >>>>>>>>
> >>>>>>>> _http://www.bidoop.es_
> >>>>>>>>
> >>>>>>>>
> >>>>>>>>
> >>>>>>>> --
> >>>>>>>>
> >>>>>>> *Guillermo Ortiz*
> >>>>> /Big Data Developer/
> >>>>>
> >>>>> Telf.: +34 917 680 490<https://mail.google.com/
> >>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
> >>>>> Fax: +34 913 833 301<https://mail.google.com/
> >>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
> >>>>>
> >>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> >>>>>
> >>>>> _http://www.bidoop.es_
> >>>>>
> >>>>>
> >>>>>
> >>>>>  --
> >>> *Guillermo Ortiz*
> >>> /Big Data Developer/
> >>>
> >>> Telf.: +34 917 680 490<https://mail.google.com/mail/
> >>> u/0/html/compose/static_files/blank_quirks.html#>
> >>> Fax: +34 913 833 301<https://mail.google.com/mail/
> >>> u/0/html/compose/static_files/blank_quirks.html#>
> >>>   C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> >>>
> >>> _http://www.bidoop.es_
> >>>
> >>>
> >>>
> >
> > --
> > *Guillermo Ortiz*
> > /Big Data Developer/
> >
> > Telf.: +34 917 680 490
> > Fax: +34 913 833 301
> > C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> >
> > _http://www.bidoop.es_
> >
> >
>

Re: Lease exception when I execute large scan with filters.

Posted by Guillermo Ortiz <ko...@gmail.com>.
I read something interesting about it in HBase TDG.

Page 344:
The StoreScanner class combines the store files and memstore that the
Store instance
contains. It is also where the exclusion happens, based on the Bloom
filter, or the timestamp. If you are asking for versions that are not more
than 30 minutes old, for example, you can skip all storage files that are
older than one hour: they will not contain anything of interest. See "Key
Design" on page 357 for details on the exclusion, and how to make use of
it.

So, I guess that it doesn't have to read all the HFiles?? But, I don't know
if HBase really uses the timestamp of each row or the date of the file. I
guess when I execute the scan, it reads everything, but, I don't know why.
I think there's something else that I don't see so that everything works to
me.


2014-04-11 13:05 GMT+02:00 gortiz <go...@pragsis.com>:

> Sorry, I didn't get it why it should read all the timestamps and not just
> the newest it they're sorted and you didn't specific any timestamp in your
> filter.
>
>
>
> On 11/04/14 12:13, Anoop John wrote:
>
>> In the storage layer (HFiles in HDFS) all versions of a particular cell
>> will be staying together.  (Yes it has to be lexicographically ordered
>> KVs). So during a scan we will have to read all the version data.  At this
>> storage layer it doesn't know the versions stuff etc.
>>
>> -Anoop-
>>
>> On Fri, Apr 11, 2014 at 3:33 PM, gortiz <go...@pragsis.com> wrote:
>>
>>  Yes, I have tried with two different values for that value of versions,
>>> 1000 and maximum value for integers.
>>>
>>> But, I want to keep those versions. I don't want to keep just 3 versions.
>>> Imagine that I want to record a new version each minute and store a day,
>>> those are 1440 versions.
>>>
>>> Why is HBase going to read all the versions?? , I thought, if you don't
>>> indicate any versions it's just read the newest and skip the rest. It
>>> doesn't make too much sense to read all of them if data is sorted, plus
>>> the
>>> newest version is stored in the top.
>>>
>>>
>>>
>>> On 11/04/14 11:54, Anoop John wrote:
>>>
>>>    What is the max version setting u have done for ur table cf?  When u
>>>> set
>>>> some a value, HBase has to keep all those versions.  During a scan it
>>>> will
>>>> read all those versions. In 94 version the default value for the max
>>>> versions is 3.  I guess you have set some bigger value.   If u have not,
>>>> mind testing after a major compaction?
>>>>
>>>> -Anoop-
>>>>
>>>> On Fri, Apr 11, 2014 at 1:01 PM, gortiz <go...@pragsis.com> wrote:
>>>>
>>>>   Last test I have done it's to reduce the number of versions to 100.
>>>>
>>>>> So, right now, I have 100 rows with 100 versions each one.
>>>>> Times are: (I got the same times for blocksize of 64Ks and 1Mb)
>>>>> 100row-1000versions + blockcache-> 80s.
>>>>> 100row-1000versions + No blockcache-> 70s.
>>>>>
>>>>> 100row-*100*versions + blockcache-> 7.3s.
>>>>> 100row-*100*versions + No blockcache-> 6.1s.
>>>>>
>>>>> What's the reasons of this? I guess HBase is enough smart for not
>>>>> consider
>>>>> old versions, so, it just checks the newest. But, I reduce 10 times the
>>>>> size (in versions) and I got a 10x of performance.
>>>>>
>>>>> The filter is scan 'filters', {FILTER => "ValueFilter(=,
>>>>> 'binary:5')",STARTROW => '1010000000000000000000000000000000000101',
>>>>> STOPROW => '6010000000000000000000000000000000000201'}
>>>>>
>>>>>
>>>>>
>>>>> On 11/04/14 09:04, gortiz wrote:
>>>>>
>>>>>   Well, I guessed that, what it doesn't make too much sense because
>>>>> it's
>>>>>
>>>>>> so
>>>>>> slow. I only have right now 100 rows with 1000 versions each row.
>>>>>> I have checked the size of the dataset and each row is about 700Kbytes
>>>>>> (around 7Gb, 100rowsx1000versions). So, it should only check 100 rows
>>>>>> x
>>>>>> 700Kbytes = 70Mb, since it just check the newest version. How can it
>>>>>> spend
>>>>>> too many time checking this quantity of data?
>>>>>>
>>>>>> I'm generating again the dataset with a bigger blocksize (previously
>>>>>> was
>>>>>> 64Kb, now, it's going to be 1Mb). I could try tunning the scanning and
>>>>>> baching parameters, but I don't think they're going to affect too
>>>>>> much.
>>>>>>
>>>>>> Another test I want to do, it's generate the same dataset with just
>>>>>> 100versions, It should spend around the same time, right? Or am I
>>>>>> wrong?
>>>>>>
>>>>>> On 10/04/14 18:08, Ted Yu wrote:
>>>>>>
>>>>>>   It should be newest version of each value.
>>>>>>
>>>>>>> Cheers
>>>>>>>
>>>>>>>
>>>>>>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>>
>>>>>>> Another little question is, when the filter I'm using, Do I check all
>>>>>>> the
>>>>>>>
>>>>>>>    versions? or just the newest? Because, I'm wondering if when I do
>>>>>>>> a
>>>>>>>> scan
>>>>>>>> over all the table, I look for the value "5" in all the dataset or
>>>>>>>> I'm
>>>>>>>> just
>>>>>>>> looking for in one newest version of each value.
>>>>>>>>
>>>>>>>>
>>>>>>>> On 10/04/14 16:52, gortiz wrote:
>>>>>>>>
>>>>>>>> I was trying to check the behaviour of HBase. The cluster is a group
>>>>>>>> of
>>>>>>>>
>>>>>>>>  old computers, one master, five slaves, each one with 2Gb, so, 12gb
>>>>>>>>> in
>>>>>>>>> total.
>>>>>>>>> The table has a column family with 1000 columns and each column
>>>>>>>>> with
>>>>>>>>> 100
>>>>>>>>> versions.
>>>>>>>>> There's another column faimily with four columns an one image of
>>>>>>>>> 100kb.
>>>>>>>>>     (I've tried without this column family as well.)
>>>>>>>>> The table is partitioned manually in all the slaves, so data are
>>>>>>>>> balanced
>>>>>>>>> in the cluster.
>>>>>>>>>
>>>>>>>>> I'm executing this sentence *scan 'table1', {FILTER =>
>>>>>>>>> "ValueFilter(=,
>>>>>>>>> 'binary:5')"* in HBase 0.94.6
>>>>>>>>> My time for lease and rpc is three minutes.
>>>>>>>>> Since, it's a full scan of the table, I have been playing with the
>>>>>>>>> BLOCKCACHE as well (just disable and enable, not about the size of
>>>>>>>>> it). I
>>>>>>>>> thought that it was going to have too much calls to the GC. I'm not
>>>>>>>>> sure
>>>>>>>>> about this point.
>>>>>>>>>
>>>>>>>>> I know that it's not the best way to use HBase, it's just a test. I
>>>>>>>>> think
>>>>>>>>> that it's not working because the hardware isn't enough, although,
>>>>>>>>> I
>>>>>>>>> would
>>>>>>>>> like to try some kind of tunning to improve it.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> On 10/04/14 14:21, Ted Yu wrote:
>>>>>>>>>
>>>>>>>>> Can you give us a bit more information:
>>>>>>>>>
>>>>>>>>>  HBase release you're running
>>>>>>>>>> What filters are used for the scan
>>>>>>>>>>
>>>>>>>>>> Thanks
>>>>>>>>>>
>>>>>>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>>>>>
>>>>>>>>>>     I got this error when I execute a full scan with filters
>>>>>>>>>> about a
>>>>>>>>>> table.
>>>>>>>>>>
>>>>>>>>>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.
>>>>>>>>>>
>>>>>>>>>>> regionserver.LeaseException:
>>>>>>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
>>>>>>>>>>> '-4165751462641113359' does not exist
>>>>>>>>>>>        at org.apache.hadoop.hbase.regionserver.Leases.
>>>>>>>>>>> removeLease(Leases.java:231)
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>>        at org.apache.hadoop.hbase.regionserver.HRegionServer.
>>>>>>>>>>> next(HRegionServer.java:2482)
>>>>>>>>>>>        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
>>>>>>>>>>> Method)
>>>>>>>>>>>        at sun.reflect.NativeMethodAccessorImpl.invoke(
>>>>>>>>>>> NativeMethodAccessorImpl.java:39)
>>>>>>>>>>>        at sun.reflect.DelegatingMethodAccessorImpl.invoke(
>>>>>>>>>>> DelegatingMethodAccessorImpl.java:25)
>>>>>>>>>>>        at java.lang.reflect.Method.invoke(Method.java:597)
>>>>>>>>>>>        at org.apache.hadoop.hbase.ipc.
>>>>>>>>>>> WritableRpcEngine$Server.call(
>>>>>>>>>>> WritableRpcEngine.java:320)
>>>>>>>>>>>        at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
>>>>>>>>>>> HBaseServer.java:1428)
>>>>>>>>>>>
>>>>>>>>>>> I have read about increase the lease time and rpc time, but it's
>>>>>>>>>>> not
>>>>>>>>>>> working.. what else could I try?? The table isn't too big. I have
>>>>>>>>>>> been
>>>>>>>>>>> checking the logs from GC, HMaster and some RegionServers and I
>>>>>>>>>>> didn't see
>>>>>>>>>>> anything weird. I tried as well to try with a couple of caching
>>>>>>>>>>> values.
>>>>>>>>>>>
>>>>>>>>>>>
>>>>>>>>>>> --
>>>>>>>>>>>
>>>>>>>>>> *Guillermo Ortiz*
>>>>>>>> /Big Data Developer/
>>>>>>>>
>>>>>>>> Telf.: +34 917 680 490<https://mail.google.com/
>>>>>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>>>>>> Fax: +34 913 833 301<https://mail.google.com/
>>>>>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>>>>>>
>>>>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>>>>>
>>>>>>>> _http://www.bidoop.es_
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> --
>>>>>>>>
>>>>>>> *Guillermo Ortiz*
>>>>> /Big Data Developer/
>>>>>
>>>>> Telf.: +34 917 680 490<https://mail.google.com/
>>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>>> Fax: +34 913 833 301<https://mail.google.com/
>>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>>>
>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>>
>>>>> _http://www.bidoop.es_
>>>>>
>>>>>
>>>>>
>>>>>  --
>>> *Guillermo Ortiz*
>>> /Big Data Developer/
>>>
>>> Telf.: +34 917 680 490<https://mail.google.com/mail/
>>> u/0/html/compose/static_files/blank_quirks.html#>
>>> Fax: +34 913 833 301<https://mail.google.com/mail/
>>> u/0/html/compose/static_files/blank_quirks.html#>
>>>   C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>
>>> _http://www.bidoop.es_
>>>
>>>
>>>
>
> --
> *Guillermo Ortiz*
> /Big Data Developer/
>
> Telf.: +34 917 680 490
> Fax: +34 913 833 301
> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>
> _http://www.bidoop.es_
>
>

Re: Lease exception when I execute large scan with filters.

Posted by gortiz <go...@pragsis.com>.
Sorry, I didn't get it why it should read all the timestamps and not 
just the newest it they're sorted and you didn't specific any timestamp 
in your filter.


On 11/04/14 12:13, Anoop John wrote:
> In the storage layer (HFiles in HDFS) all versions of a particular cell
> will be staying together.  (Yes it has to be lexicographically ordered
> KVs). So during a scan we will have to read all the version data.  At this
> storage layer it doesn't know the versions stuff etc.
>
> -Anoop-
>
> On Fri, Apr 11, 2014 at 3:33 PM, gortiz <go...@pragsis.com> wrote:
>
>> Yes, I have tried with two different values for that value of versions,
>> 1000 and maximum value for integers.
>>
>> But, I want to keep those versions. I don't want to keep just 3 versions.
>> Imagine that I want to record a new version each minute and store a day,
>> those are 1440 versions.
>>
>> Why is HBase going to read all the versions?? , I thought, if you don't
>> indicate any versions it's just read the newest and skip the rest. It
>> doesn't make too much sense to read all of them if data is sorted, plus the
>> newest version is stored in the top.
>>
>>
>>
>> On 11/04/14 11:54, Anoop John wrote:
>>
>>>   What is the max version setting u have done for ur table cf?  When u set
>>> some a value, HBase has to keep all those versions.  During a scan it will
>>> read all those versions. In 94 version the default value for the max
>>> versions is 3.  I guess you have set some bigger value.   If u have not,
>>> mind testing after a major compaction?
>>>
>>> -Anoop-
>>>
>>> On Fri, Apr 11, 2014 at 1:01 PM, gortiz <go...@pragsis.com> wrote:
>>>
>>>   Last test I have done it's to reduce the number of versions to 100.
>>>> So, right now, I have 100 rows with 100 versions each one.
>>>> Times are: (I got the same times for blocksize of 64Ks and 1Mb)
>>>> 100row-1000versions + blockcache-> 80s.
>>>> 100row-1000versions + No blockcache-> 70s.
>>>>
>>>> 100row-*100*versions + blockcache-> 7.3s.
>>>> 100row-*100*versions + No blockcache-> 6.1s.
>>>>
>>>> What's the reasons of this? I guess HBase is enough smart for not
>>>> consider
>>>> old versions, so, it just checks the newest. But, I reduce 10 times the
>>>> size (in versions) and I got a 10x of performance.
>>>>
>>>> The filter is scan 'filters', {FILTER => "ValueFilter(=,
>>>> 'binary:5')",STARTROW => '1010000000000000000000000000000000000101',
>>>> STOPROW => '6010000000000000000000000000000000000201'}
>>>>
>>>>
>>>>
>>>> On 11/04/14 09:04, gortiz wrote:
>>>>
>>>>   Well, I guessed that, what it doesn't make too much sense because it's
>>>>> so
>>>>> slow. I only have right now 100 rows with 1000 versions each row.
>>>>> I have checked the size of the dataset and each row is about 700Kbytes
>>>>> (around 7Gb, 100rowsx1000versions). So, it should only check 100 rows x
>>>>> 700Kbytes = 70Mb, since it just check the newest version. How can it
>>>>> spend
>>>>> too many time checking this quantity of data?
>>>>>
>>>>> I'm generating again the dataset with a bigger blocksize (previously was
>>>>> 64Kb, now, it's going to be 1Mb). I could try tunning the scanning and
>>>>> baching parameters, but I don't think they're going to affect too much.
>>>>>
>>>>> Another test I want to do, it's generate the same dataset with just
>>>>> 100versions, It should spend around the same time, right? Or am I wrong?
>>>>>
>>>>> On 10/04/14 18:08, Ted Yu wrote:
>>>>>
>>>>>   It should be newest version of each value.
>>>>>> Cheers
>>>>>>
>>>>>>
>>>>>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>
>>>>>> Another little question is, when the filter I'm using, Do I check all
>>>>>> the
>>>>>>
>>>>>>>   versions? or just the newest? Because, I'm wondering if when I do a
>>>>>>> scan
>>>>>>> over all the table, I look for the value "5" in all the dataset or I'm
>>>>>>> just
>>>>>>> looking for in one newest version of each value.
>>>>>>>
>>>>>>>
>>>>>>> On 10/04/14 16:52, gortiz wrote:
>>>>>>>
>>>>>>> I was trying to check the behaviour of HBase. The cluster is a group
>>>>>>> of
>>>>>>>
>>>>>>>> old computers, one master, five slaves, each one with 2Gb, so, 12gb
>>>>>>>> in
>>>>>>>> total.
>>>>>>>> The table has a column family with 1000 columns and each column with
>>>>>>>> 100
>>>>>>>> versions.
>>>>>>>> There's another column faimily with four columns an one image of
>>>>>>>> 100kb.
>>>>>>>>     (I've tried without this column family as well.)
>>>>>>>> The table is partitioned manually in all the slaves, so data are
>>>>>>>> balanced
>>>>>>>> in the cluster.
>>>>>>>>
>>>>>>>> I'm executing this sentence *scan 'table1', {FILTER =>
>>>>>>>> "ValueFilter(=,
>>>>>>>> 'binary:5')"* in HBase 0.94.6
>>>>>>>> My time for lease and rpc is three minutes.
>>>>>>>> Since, it's a full scan of the table, I have been playing with the
>>>>>>>> BLOCKCACHE as well (just disable and enable, not about the size of
>>>>>>>> it). I
>>>>>>>> thought that it was going to have too much calls to the GC. I'm not
>>>>>>>> sure
>>>>>>>> about this point.
>>>>>>>>
>>>>>>>> I know that it's not the best way to use HBase, it's just a test. I
>>>>>>>> think
>>>>>>>> that it's not working because the hardware isn't enough, although, I
>>>>>>>> would
>>>>>>>> like to try some kind of tunning to improve it.
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>>
>>>>>>>> On 10/04/14 14:21, Ted Yu wrote:
>>>>>>>>
>>>>>>>> Can you give us a bit more information:
>>>>>>>>
>>>>>>>>> HBase release you're running
>>>>>>>>> What filters are used for the scan
>>>>>>>>>
>>>>>>>>> Thanks
>>>>>>>>>
>>>>>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>>>>
>>>>>>>>>     I got this error when I execute a full scan with filters about a
>>>>>>>>> table.
>>>>>>>>>
>>>>>>>>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.
>>>>>>>>>> regionserver.LeaseException:
>>>>>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
>>>>>>>>>> '-4165751462641113359' does not exist
>>>>>>>>>>        at org.apache.hadoop.hbase.regionserver.Leases.
>>>>>>>>>> removeLease(Leases.java:231)
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>>        at org.apache.hadoop.hbase.regionserver.HRegionServer.
>>>>>>>>>> next(HRegionServer.java:2482)
>>>>>>>>>>        at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
>>>>>>>>>> Method)
>>>>>>>>>>        at sun.reflect.NativeMethodAccessorImpl.invoke(
>>>>>>>>>> NativeMethodAccessorImpl.java:39)
>>>>>>>>>>        at sun.reflect.DelegatingMethodAccessorImpl.invoke(
>>>>>>>>>> DelegatingMethodAccessorImpl.java:25)
>>>>>>>>>>        at java.lang.reflect.Method.invoke(Method.java:597)
>>>>>>>>>>        at org.apache.hadoop.hbase.ipc.
>>>>>>>>>> WritableRpcEngine$Server.call(
>>>>>>>>>> WritableRpcEngine.java:320)
>>>>>>>>>>        at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
>>>>>>>>>> HBaseServer.java:1428)
>>>>>>>>>>
>>>>>>>>>> I have read about increase the lease time and rpc time, but it's
>>>>>>>>>> not
>>>>>>>>>> working.. what else could I try?? The table isn't too big. I have
>>>>>>>>>> been
>>>>>>>>>> checking the logs from GC, HMaster and some RegionServers and I
>>>>>>>>>> didn't see
>>>>>>>>>> anything weird. I tried as well to try with a couple of caching
>>>>>>>>>> values.
>>>>>>>>>>
>>>>>>>>>>
>>>>>>>>>> --
>>>>>>> *Guillermo Ortiz*
>>>>>>> /Big Data Developer/
>>>>>>>
>>>>>>> Telf.: +34 917 680 490<https://mail.google.com/
>>>>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>>>>> Fax: +34 913 833 301<https://mail.google.com/
>>>>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>>>>>
>>>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>>>>
>>>>>>> _http://www.bidoop.es_
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> --
>>>> *Guillermo Ortiz*
>>>> /Big Data Developer/
>>>>
>>>> Telf.: +34 917 680 490<https://mail.google.com/
>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>> Fax: +34 913 833 301<https://mail.google.com/
>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>>
>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>
>>>> _http://www.bidoop.es_
>>>>
>>>>
>>>>
>> --
>> *Guillermo Ortiz*
>> /Big Data Developer/
>>
>> Telf.: +34 917 680 490<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
>> Fax: +34 913 833 301<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
>>   C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>
>> _http://www.bidoop.es_
>>
>>


-- 
*Guillermo Ortiz*
/Big Data Developer/

Telf.: +34 917 680 490
Fax: +34 913 833 301
C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain

_http://www.bidoop.es_


Re: Lease exception when I execute large scan with filters.

Posted by Anoop John <an...@gmail.com>.
In the storage layer (HFiles in HDFS) all versions of a particular cell
will be staying together.  (Yes it has to be lexicographically ordered
KVs). So during a scan we will have to read all the version data.  At this
storage layer it doesn't know the versions stuff etc.

-Anoop-

On Fri, Apr 11, 2014 at 3:33 PM, gortiz <go...@pragsis.com> wrote:

> Yes, I have tried with two different values for that value of versions,
> 1000 and maximum value for integers.
>
> But, I want to keep those versions. I don't want to keep just 3 versions.
> Imagine that I want to record a new version each minute and store a day,
> those are 1440 versions.
>
> Why is HBase going to read all the versions?? , I thought, if you don't
> indicate any versions it's just read the newest and skip the rest. It
> doesn't make too much sense to read all of them if data is sorted, plus the
> newest version is stored in the top.
>
>
>
> On 11/04/14 11:54, Anoop John wrote:
>
>>  What is the max version setting u have done for ur table cf?  When u set
>> some a value, HBase has to keep all those versions.  During a scan it will
>> read all those versions. In 94 version the default value for the max
>> versions is 3.  I guess you have set some bigger value.   If u have not,
>> mind testing after a major compaction?
>>
>> -Anoop-
>>
>> On Fri, Apr 11, 2014 at 1:01 PM, gortiz <go...@pragsis.com> wrote:
>>
>>  Last test I have done it's to reduce the number of versions to 100.
>>> So, right now, I have 100 rows with 100 versions each one.
>>> Times are: (I got the same times for blocksize of 64Ks and 1Mb)
>>> 100row-1000versions + blockcache-> 80s.
>>> 100row-1000versions + No blockcache-> 70s.
>>>
>>> 100row-*100*versions + blockcache-> 7.3s.
>>> 100row-*100*versions + No blockcache-> 6.1s.
>>>
>>> What's the reasons of this? I guess HBase is enough smart for not
>>> consider
>>> old versions, so, it just checks the newest. But, I reduce 10 times the
>>> size (in versions) and I got a 10x of performance.
>>>
>>> The filter is scan 'filters', {FILTER => "ValueFilter(=,
>>> 'binary:5')",STARTROW => '1010000000000000000000000000000000000101',
>>> STOPROW => '6010000000000000000000000000000000000201'}
>>>
>>>
>>>
>>> On 11/04/14 09:04, gortiz wrote:
>>>
>>>  Well, I guessed that, what it doesn't make too much sense because it's
>>>> so
>>>> slow. I only have right now 100 rows with 1000 versions each row.
>>>> I have checked the size of the dataset and each row is about 700Kbytes
>>>> (around 7Gb, 100rowsx1000versions). So, it should only check 100 rows x
>>>> 700Kbytes = 70Mb, since it just check the newest version. How can it
>>>> spend
>>>> too many time checking this quantity of data?
>>>>
>>>> I'm generating again the dataset with a bigger blocksize (previously was
>>>> 64Kb, now, it's going to be 1Mb). I could try tunning the scanning and
>>>> baching parameters, but I don't think they're going to affect too much.
>>>>
>>>> Another test I want to do, it's generate the same dataset with just
>>>> 100versions, It should spend around the same time, right? Or am I wrong?
>>>>
>>>> On 10/04/14 18:08, Ted Yu wrote:
>>>>
>>>>  It should be newest version of each value.
>>>>>
>>>>> Cheers
>>>>>
>>>>>
>>>>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com> wrote:
>>>>>
>>>>> Another little question is, when the filter I'm using, Do I check all
>>>>> the
>>>>>
>>>>>>  versions? or just the newest? Because, I'm wondering if when I do a
>>>>>> scan
>>>>>> over all the table, I look for the value "5" in all the dataset or I'm
>>>>>> just
>>>>>> looking for in one newest version of each value.
>>>>>>
>>>>>>
>>>>>> On 10/04/14 16:52, gortiz wrote:
>>>>>>
>>>>>> I was trying to check the behaviour of HBase. The cluster is a group
>>>>>> of
>>>>>>
>>>>>>> old computers, one master, five slaves, each one with 2Gb, so, 12gb
>>>>>>> in
>>>>>>> total.
>>>>>>> The table has a column family with 1000 columns and each column with
>>>>>>> 100
>>>>>>> versions.
>>>>>>> There's another column faimily with four columns an one image of
>>>>>>> 100kb.
>>>>>>>    (I've tried without this column family as well.)
>>>>>>> The table is partitioned manually in all the slaves, so data are
>>>>>>> balanced
>>>>>>> in the cluster.
>>>>>>>
>>>>>>> I'm executing this sentence *scan 'table1', {FILTER =>
>>>>>>> "ValueFilter(=,
>>>>>>> 'binary:5')"* in HBase 0.94.6
>>>>>>> My time for lease and rpc is three minutes.
>>>>>>> Since, it's a full scan of the table, I have been playing with the
>>>>>>> BLOCKCACHE as well (just disable and enable, not about the size of
>>>>>>> it). I
>>>>>>> thought that it was going to have too much calls to the GC. I'm not
>>>>>>> sure
>>>>>>> about this point.
>>>>>>>
>>>>>>> I know that it's not the best way to use HBase, it's just a test. I
>>>>>>> think
>>>>>>> that it's not working because the hardware isn't enough, although, I
>>>>>>> would
>>>>>>> like to try some kind of tunning to improve it.
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>>
>>>>>>> On 10/04/14 14:21, Ted Yu wrote:
>>>>>>>
>>>>>>> Can you give us a bit more information:
>>>>>>>
>>>>>>>> HBase release you're running
>>>>>>>> What filters are used for the scan
>>>>>>>>
>>>>>>>> Thanks
>>>>>>>>
>>>>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>>>
>>>>>>>>    I got this error when I execute a full scan with filters about a
>>>>>>>> table.
>>>>>>>>
>>>>>>>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.
>>>>>>>>> regionserver.LeaseException:
>>>>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
>>>>>>>>> '-4165751462641113359' does not exist
>>>>>>>>>       at org.apache.hadoop.hbase.regionserver.Leases.
>>>>>>>>> removeLease(Leases.java:231)
>>>>>>>>>
>>>>>>>>>
>>>>>>>>>       at org.apache.hadoop.hbase.regionserver.HRegionServer.
>>>>>>>>> next(HRegionServer.java:2482)
>>>>>>>>>       at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
>>>>>>>>> Method)
>>>>>>>>>       at sun.reflect.NativeMethodAccessorImpl.invoke(
>>>>>>>>> NativeMethodAccessorImpl.java:39)
>>>>>>>>>       at sun.reflect.DelegatingMethodAccessorImpl.invoke(
>>>>>>>>> DelegatingMethodAccessorImpl.java:25)
>>>>>>>>>       at java.lang.reflect.Method.invoke(Method.java:597)
>>>>>>>>>       at org.apache.hadoop.hbase.ipc.
>>>>>>>>> WritableRpcEngine$Server.call(
>>>>>>>>> WritableRpcEngine.java:320)
>>>>>>>>>       at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
>>>>>>>>> HBaseServer.java:1428)
>>>>>>>>>
>>>>>>>>> I have read about increase the lease time and rpc time, but it's
>>>>>>>>> not
>>>>>>>>> working.. what else could I try?? The table isn't too big. I have
>>>>>>>>> been
>>>>>>>>> checking the logs from GC, HMaster and some RegionServers and I
>>>>>>>>> didn't see
>>>>>>>>> anything weird. I tried as well to try with a couple of caching
>>>>>>>>> values.
>>>>>>>>>
>>>>>>>>>
>>>>>>>>> --
>>>>>>>
>>>>>> *Guillermo Ortiz*
>>>>>> /Big Data Developer/
>>>>>>
>>>>>> Telf.: +34 917 680 490<https://mail.google.com/
>>>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>>>> Fax: +34 913 833 301<https://mail.google.com/
>>>>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>>>>
>>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>>>
>>>>>> _http://www.bidoop.es_
>>>>>>
>>>>>>
>>>>>>
>>>>>> --
>>> *Guillermo Ortiz*
>>> /Big Data Developer/
>>>
>>> Telf.: +34 917 680 490<https://mail.google.com/
>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
>>> Fax: +34 913 833 301<https://mail.google.com/
>>> mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>
>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>
>>> _http://www.bidoop.es_
>>>
>>>
>>>
>
> --
> *Guillermo Ortiz*
> /Big Data Developer/
>
> Telf.: +34 917 680 490<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
> Fax: +34 913 833 301<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
>  C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>
> _http://www.bidoop.es_
>
>

Re: Lease exception when I execute large scan with filters.

Posted by Ted Yu <yu...@gmail.com>.
HBase refguide has some explanation on internals w.r.t. versions:
http://hbase.apache.org/book.html#versions

bq. why HBase has versioning

This came from Bigtable. See the paragraph on page 3 of osdi paper:
http://static.googleusercontent.com/media/research.google.com/en/us/archive/bigtable-osdi06.pdf

The example use case from the above paper was to store 3 versions (i.e.
timestamps) of contents column. The timestamps are

bq. the times at which these page versions were actually crawled.

Cheers


On Sat, Apr 12, 2014 at 2:14 PM, Michael Segel <mi...@hotmail.com>wrote:

> You do realize that it is an internal feature and that the public API can
> change to not present access to it.
> However, that wouldn't be a good idea because you would want to be able to
> change it and in some cases review the versions of a cell.  How else do you
> describe versioning which is unique to HBase and/or other specific
> databases, yet temporal modeling is not?
>
> In fact if memory servers... going back to 2009-10 IIRC the 'old API' vs the
> 'new API' for Hadoop where the 'new API' had a subset of the exposed
> classes / methods than the old API? (It was an attempt to simplify the API...
> ) So again, APIs can change.
>
> The point is that you should be modeling your data on time if it is time
> sensitive data. Using versioning bypasses this with bad consequences.
>
> By all means keep abusing the cell's versioning.
> Just don't complain about poor performance and your HBase tossing
> exceptions left and right. I mean I can't stop you from mixing booze, coke
> and meth. All I can do is tell you that its not a good idea and not
> recommended.
>
> If you want a good definition of why HBase has versioning... go ask StAck,
> Ted, Nick or one of the committers since they are more familiar with the
> internal workings of HBase than I. When you get a good answer, then have
> the online HBase book updated.
>
> -Mike
>
> PS... if you want a really good example of why not to use versioning to
> store temporal data...
> What happens if you're storing 100 versions of a cell and you find out
> that you have a duplicate entry with the wrong timestamp and you want to
> delete that one version.
> How do you do that? Going from memory, and I could very well be wrong, but
> the tombstone marker is on the cell, not the version, right?
>
> If it is on the version, what happens to the versions of the cell that are
> older than the tombstone marker?
> Sorry, its been a while since I've been intimate with HBase. Doing a bit
> of other things at the moment, and I'm already overtaxing my last remaining
> living brain cell.  ;-)
>
>
> On Apr 12, 2014, at 9:14 PM, Brian Jeltema <bd...@gmail.com> wrote:
>
> > I don't want to be argumentative here, but by definition is's not an
> internal feature because it's part of the
> > public API. We use versioning in a way that makes me somewhat
> uncomfortable, but it's been quite
> > useful. I'd like to see a clear explanation of why it exists and what
> use cases it was intended to support.
> >
> > Brian
> >
> >> Since you asked...
> >>
> >> Simplest answer... your schema should not rely upon internal features of
> the system.  Since you are tracking your data along the lines of a temporal
> attribute it should be part of the schema. In terms of a good design, by
> making it a part of the schema, you're defining that the data has a
> temporal property/attribute.
> >>
> >> Cell versioning is an internal feature of HBase. Its there for a reason.
> >> Perhaps one of the committers should expand on why its there.  (When I
> asked this earlier, never got an answer. )
> >>
> >>
> >> Longer answer... review how HBase stores the rows, including the versions
> of the cell.
> >> You're putting an unnecessary stress on the system.
> >>
> >> Its just not Zen... ;-)
> >>
> >> The reason I'm a bit short on this topic is that its an issue that
> keeps coming up, over and over again because some idiot keeps looking to
> take a shortcut without understanding the implications of their decision.
> Just like salting the key. (Note:  prepending a truncated hash isn't the
> same as using a salt.  Salting has a specific meaning and the salt is
> orthogonal to the underlying key. Any relationship between the salt and the
> key is purely random luck.)
> >>
> >> Does that help?
> >> (BTW, this should be part of any schema design talk... yet somehow I
> think its not covered... )
> >>
> >> -Mike
> >>
> >> PS. Its not weird that the cell versions are checked. It makes perfect
> sense.
> >>
> >> On Apr 12, 2014, at 2:55 PM, Guillermo Ortiz <ko...@gmail.com>
> wrote:
> >>
> >>> Well, It was just a example why I could keep a thousand versions or a
> cell.
> >>> I didn't know that HBase was checking each version when I do a scan,
> it's a
> >>> little weird when data is sorted.
> >>>
> >>> You get my attention with your comment, that it's better to store data
> over
> >>> time with new columns that with versions. Why is it better?
> >>> Versions looks that there're very convenient for that use case. So,
> does it
> >>> work better a rowkey with 3600 columns, that a rowkey with a column
> with
> >>> 3600 versions? What's the reason for avoiding a massive use of
> versions?
> >>>
> >>>
> >>> 2014-04-12 15:07 GMT+02:00 Michael Segel <mi...@hotmail.com>:
> >>>
> >>>> Silly question...
> >>>>
> >>>> Why does the idea of using versioning to capture temporal changes to
> data
> >>>> keep being propagated?
> >>>>
> >>>> Seriously this issue keeps popping up...
> >>>>
> >>>> If you want to capture data over time... use a timestamp as part of
> the
> >>>> column name.  Don't abuse the cell's version.
> >>>>
> >>>>
> >>>>
> >>>> On Apr 11, 2014, at 11:03 AM, gortiz <go...@pragsis.com> wrote:
> >>>>
> >>>>> Yes, I have tried with two different values for that value of
> versions,
> >>>> 1000 and maximum value for integers.
> >>>>>
> >>>>> But, I want to keep those versions. I don't want to keep just 3
> >>>> versions. Imagine that I want to record a new version each minute and
> store
> >>>> a day, those are 1440 versions.
> >>>>>
> >>>>> Why is HBase going to read all the versions?? , I thought, if you
> don't
> >>>> indicate any versions it's just read the newest and skip the rest. It
> >>>> doesn't make too much sense to read all of them if data is sorted,
> plus the
> >>>> newest version is stored in the top.
> >>>>>
> >>>>>
> >>>>> On 11/04/14 11:54, Anoop John wrote:
> >>>>>> What is the max version setting u have done for ur table cf?  When
> u set
> >>>>>> some a value, HBase has to keep all those versions.  During a scan
> it
> >>>> will
> >>>>>> read all those versions. In 94 version the default value for the max
> >>>>>> versions is 3.  I guess you have set some bigger value.   If u have
> not,
> >>>>>> mind testing after a major compaction?
> >>>>>>
> >>>>>> -Anoop-
> >>>>>>
> >>>>>> On Fri, Apr 11, 2014 at 1:01 PM, gortiz <go...@pragsis.com> wrote:
> >>>>>>
> >>>>>>> Last test I have done it's to reduce the number of versions to 100.
> >>>>>>> So, right now, I have 100 rows with 100 versions each one.
> >>>>>>> Times are: (I got the same times for blocksize of 64Ks and 1Mb)
> >>>>>>> 100row-1000versions + blockcache-> 80s.
> >>>>>>> 100row-1000versions + No blockcache-> 70s.
> >>>>>>>
> >>>>>>> 100row-*100*versions + blockcache-> 7.3s.
> >>>>>>> 100row-*100*versions + No blockcache-> 6.1s.
> >>>>>>>
> >>>>>>> What's the reasons of this? I guess HBase is enough smart for not
> >>>> consider
> >>>>>>> old versions, so, it just checks the newest. But, I reduce 10
> times the
> >>>>>>> size (in versions) and I got a 10x of performance.
> >>>>>>>
> >>>>>>> The filter is scan 'filters', {FILTER => "ValueFilter(=,
> >>>>>>> 'binary:5')",STARTROW =>
> '1010000000000000000000000000000000000101',
> >>>>>>> STOPROW => '6010000000000000000000000000000000000201'}
> >>>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>> On 11/04/14 09:04, gortiz wrote:
> >>>>>>>
> >>>>>>>> Well, I guessed that, what it doesn't make too much sense because
> >>>> it's so
> >>>>>>>> slow. I only have right now 100 rows with 1000 versions each row.
> >>>>>>>> I have checked the size of the dataset and each row is about
> 700Kbytes
> >>>>>>>> (around 7Gb, 100rowsx1000versions). So, it should only check 100
> rows
> >>>> x
> >>>>>>>> 700Kbytes = 70Mb, since it just check the newest version. How can
> it
> >>>> spend
> >>>>>>>> too many time checking this quantity of data?
> >>>>>>>>
> >>>>>>>> I'm generating again the dataset with a bigger blocksize
> (previously
> >>>> was
> >>>>>>>> 64Kb, now, it's going to be 1Mb). I could try tunning the
> scanning and
> >>>>>>>> baching parameters, but I don't think they're going to affect too
> >>>> much.
> >>>>>>>>
> >>>>>>>> Another test I want to do, it's generate the same dataset with
> just
> >>>>>>>> 100versions, It should spend around the same time, right? Or am I
> >>>> wrong?
> >>>>>>>>
> >>>>>>>> On 10/04/14 18:08, Ted Yu wrote:
> >>>>>>>>
> >>>>>>>>> It should be newest version of each value.
> >>>>>>>>>
> >>>>>>>>> Cheers
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com>
> wrote:
> >>>>>>>>>
> >>>>>>>>> Another little question is, when the filter I'm using, Do I check
> >>>> all the
> >>>>>>>>>> versions? or just the newest? Because, I'm wondering if when I
> do a
> >>>> scan
> >>>>>>>>>> over all the table, I look for the value "5" in all the dataset
> or
> >>>> I'm
> >>>>>>>>>> just
> >>>>>>>>>> looking for in one newest version of each value.
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>> On 10/04/14 16:52, gortiz wrote:
> >>>>>>>>>>
> >>>>>>>>>> I was trying to check the behaviour of HBase. The cluster is a
> >>>> group of
> >>>>>>>>>>> old computers, one master, five slaves, each one with 2Gb, so,
> >>>> 12gb in
> >>>>>>>>>>> total.
> >>>>>>>>>>> The table has a column family with 1000 columns and each column
> >>>> with
> >>>>>>>>>>> 100
> >>>>>>>>>>> versions.
> >>>>>>>>>>> There's another column faimily with four columns an one image
> of
> >>>> 100kb.
> >>>>>>>>>>> (I've tried without this column family as well.)
> >>>>>>>>>>> The table is partitioned manually in all the slaves, so data
> are
> >>>>>>>>>>> balanced
> >>>>>>>>>>> in the cluster.
> >>>>>>>>>>>
> >>>>>>>>>>> I'm executing this sentence *scan 'table1', {FILTER =>
> >>>> "ValueFilter(=,
> >>>>>>>>>>> 'binary:5')"* in HBase 0.94.6
> >>>>>>>>>>> My time for lease and rpc is three minutes.
> >>>>>>>>>>> Since, it's a full scan of the table, I have been playing with
> the
> >>>>>>>>>>> BLOCKCACHE as well (just disable and enable, not about the
> size of
> >>>>>>>>>>> it). I
> >>>>>>>>>>> thought that it was going to have too much calls to the GC.
> I'm not
> >>>>>>>>>>> sure
> >>>>>>>>>>> about this point.
> >>>>>>>>>>>
> >>>>>>>>>>> I know that it's not the best way to use HBase, it's just a
> test. I
> >>>>>>>>>>> think
> >>>>>>>>>>> that it's not working because the hardware isn't enough,
> although,
> >>>> I
> >>>>>>>>>>> would
> >>>>>>>>>>> like to try some kind of tunning to improve it.
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>>
> >>>>>>>>>>> On 10/04/14 14:21, Ted Yu wrote:
> >>>>>>>>>>>
> >>>>>>>>>>> Can you give us a bit more information:
> >>>>>>>>>>>> HBase release you're running
> >>>>>>>>>>>> What filters are used for the scan
> >>>>>>>>>>>>
> >>>>>>>>>>>> Thanks
> >>>>>>>>>>>>
> >>>>>>>>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com>
> wrote:
> >>>>>>>>>>>>
> >>>>>>>>>>>> I got this error when I execute a full scan with filters
> about a
> >>>>>>>>>>>> table.
> >>>>>>>>>>>>
> >>>>>>>>>>>>> Caused by: java.lang.RuntimeException:
> org.apache.hadoop.hbase.
> >>>>>>>>>>>>> regionserver.LeaseException:
> >>>>>>>>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
> >>>>>>>>>>>>> '-4165751462641113359' does not exist
> >>>>>>>>>>>>>  at
> >>>>
> org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231)
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>  at org.apache.hadoop.hbase.regionserver.HRegionServer.
> >>>>>>>>>>>>> next(HRegionServer.java:2482)
> >>>>>>>>>>>>>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
> >>>> Method)
> >>>>>>>>>>>>>  at sun.reflect.NativeMethodAccessorImpl.invoke(
> >>>>>>>>>>>>> NativeMethodAccessorImpl.java:39)
> >>>>>>>>>>>>>  at sun.reflect.DelegatingMethodAccessorImpl.invoke(
> >>>>>>>>>>>>> DelegatingMethodAccessorImpl.java:25)
> >>>>>>>>>>>>>  at java.lang.reflect.Method.invoke(Method.java:597)
> >>>>>>>>>>>>>  at
> >>>> org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(
> >>>>>>>>>>>>> WritableRpcEngine.java:320)
> >>>>>>>>>>>>>  at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
> >>>>>>>>>>>>> HBaseServer.java:1428)
> >>>>>>>>>>>>>
> >>>>>>>>>>>>> I have read about increase the lease time and rpc time, but
> it's
> >>>> not
> >>>>>>>>>>>>> working.. what else could I try?? The table isn't too big. I
> have
> >>>>>>>>>>>>> been
> >>>>>>>>>>>>> checking the logs from GC, HMaster and some RegionServers
> and I
> >>>>>>>>>>>>> didn't see
> >>>>>>>>>>>>> anything weird. I tried as well to try with a couple of
> caching
> >>>>>>>>>>>>> values.
> >>>>>>>>>>>>>
> >>>>>>>>>>>>>
> >>>>>>>>>>> --
> >>>>>>>>>> *Guillermo Ortiz*
> >>>>>>>>>> /Big Data Developer/
> >>>>>>>>>>
> >>>>>>>>>> Telf.: +34 917 680 490<
> >>>>
> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
> >>>>>
> >>>>>>>>>> Fax: +34 913 833 301<
> >>>>
> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
> >>>>>
> >>>>>>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> >>>>>>>>>>
> >>>>>>>>>> _http://www.bidoop.es_
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>>>>>
> >>>>>>> --
> >>>>>>> *Guillermo Ortiz*
> >>>>>>> /Big Data Developer/
> >>>>>>>
> >>>>>>> Telf.: +34 917 680 490<
> >>>>
> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
> >>>>>
> >>>>>>> Fax: +34 913 833 301<
> >>>>
> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
> >>>>>
> >>>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> >>>>>>>
> >>>>>>> _http://www.bidoop.es_
> >>>>>>>
> >>>>>>>
> >>>>>
> >>>>>
> >>>>> --
> >>>>> *Guillermo Ortiz*
> >>>>> /Big Data Developer/
> >>>>>
> >>>>> Telf.: +34 917 680 490
> >>>>> Fax: +34 913 833 301
> >>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> >>>>>
> >>>>> _http://www.bidoop.es_
> >>>>>
> >>>>
> >>>>
> >>
> >>
> >
> >
>
>

Re: Lease exception when I execute large scan with filters.

Posted by Michael Segel <mi...@hotmail.com>.
You do realize that it is an internal feature and that the public API can change to not present access to it.
However, that wouldn’t be a good idea because you would want to be able to change it and in some cases review the versions of a cell.  How else do you describe versioning which is unique to HBase and/or other specific databases, yet temporal modeling is not? 

In fact if memory servers… going back to 2009-10 IIRC the ‘old API’ vs the ‘new API’ for Hadoop where the ‘new API’ had a subset of the exposed classes / methods than the old API? (It was an attempt to simplify the API… ) So again, APIs can change. 

The point is that you should be modeling your data on time if it is time sensitive data. Using versioning bypasses this with bad consequences. 

By all means keep abusing the cell’s versioning. 
Just don’t complain about poor performance and your HBase tossing exceptions left and right. I mean I can’t stop you from mixing booze, coke and meth. All I can do is tell you that its not a good idea and not recommended. 

If you want a good definition of why HBase has versioning… go ask StAck, Ted, Nick or one of the committers since they are more familiar with the internal workings of HBase than I. When you get a good answer, then have the online HBase book updated.

-Mike

PS… if you want a really good example of why not to use versioning to store temporal data… 
What happens if you’re storing 100 versions of a cell and you find out that you have a duplicate entry with the wrong timestamp and you want to delete that one version.
How do you do that? Going from memory, and I could very well be wrong, but the tombstone marker is on the cell, not the version, right? 

If it is on the version, what happens to the versions of the cell that are older than the tombstone marker?
Sorry, its been a while since I’ve been intimate with HBase. Doing a bit of other things at the moment, and I’m already overtaxing my last remaining living brain cell.  ;-) 


On Apr 12, 2014, at 9:14 PM, Brian Jeltema <bd...@gmail.com> wrote:

> I don't want to be argumentative here, but by definition is's not an internal feature because it's part of the
> public API. We use versioning in a way that makes me somewhat uncomfortable, but it's been quite
> useful. I'd like to see a clear explanation of why it exists and what use cases it was intended to support.
> 
> Brian
> 
>> Since you asked… 
>> 
>> Simplest answer… your schema should not rely upon internal features of the system.  Since you are tracking your data along the lines of a temporal attribute it should be part of the schema. In terms of a good design, by making it a part of the schema, you’re defining that the data has a temporal property/attribute. 
>> 
>> Cell versioning is an internal feature of HBase. Its there for a reason. 
>> Perhaps one of the committers should expand on why its there.  (When I asked this earlier, never got an answer. ) 
>> 
>> 
>> Longer answer… review how HBase stores the rows, including the versions of the cell. 
>> You’re putting an unnecessary stress on the system. 
>> 
>> Its just not Zen… ;-) 
>> 
>> The reason I’m a bit short on this topic is that its an issue that keeps coming up, over and over again because some idiot keeps looking to take a shortcut without understanding the implications of their decision. Just like salting the key. (Note:  prepending a truncated hash isn’t the same as using a salt.  Salting has a specific meaning and the salt is orthogonal to the underlying key. Any relationship between the salt and the key is purely random luck.) 
>> 
>> Does that help? 
>> (BTW, this should be part of any schema design talk… yet somehow I think its not covered… ) 
>> 
>> -Mike
>> 
>> PS. Its not weird that the cell versions are checked. It makes perfect sense. 
>> 
>> On Apr 12, 2014, at 2:55 PM, Guillermo Ortiz <ko...@gmail.com> wrote:
>> 
>>> Well, It was just a example why I could keep a thousand versions or a cell.
>>> I didn't know that HBase was checking each version when I do a scan, it's a
>>> little weird when data is sorted.
>>> 
>>> You get my attention with your comment, that it's better to store data over
>>> time with new columns that with versions. Why is it better?
>>> Versions looks that there're very convenient for that use case. So, does it
>>> work better a rowkey with 3600 columns, that a rowkey with a column with
>>> 3600 versions? What's the reason for avoiding a massive use of versions?
>>> 
>>> 
>>> 2014-04-12 15:07 GMT+02:00 Michael Segel <mi...@hotmail.com>:
>>> 
>>>> Silly question...
>>>> 
>>>> Why does the idea of using versioning to capture temporal changes to data
>>>> keep being propagated?
>>>> 
>>>> Seriously this issue keeps popping up...
>>>> 
>>>> If you want to capture data over time... use a timestamp as part of the
>>>> column name.  Don't abuse the cell's version.
>>>> 
>>>> 
>>>> 
>>>> On Apr 11, 2014, at 11:03 AM, gortiz <go...@pragsis.com> wrote:
>>>> 
>>>>> Yes, I have tried with two different values for that value of versions,
>>>> 1000 and maximum value for integers.
>>>>> 
>>>>> But, I want to keep those versions. I don't want to keep just 3
>>>> versions. Imagine that I want to record a new version each minute and store
>>>> a day, those are 1440 versions.
>>>>> 
>>>>> Why is HBase going to read all the versions?? , I thought, if you don't
>>>> indicate any versions it's just read the newest and skip the rest. It
>>>> doesn't make too much sense to read all of them if data is sorted, plus the
>>>> newest version is stored in the top.
>>>>> 
>>>>> 
>>>>> On 11/04/14 11:54, Anoop John wrote:
>>>>>> What is the max version setting u have done for ur table cf?  When u set
>>>>>> some a value, HBase has to keep all those versions.  During a scan it
>>>> will
>>>>>> read all those versions. In 94 version the default value for the max
>>>>>> versions is 3.  I guess you have set some bigger value.   If u have not,
>>>>>> mind testing after a major compaction?
>>>>>> 
>>>>>> -Anoop-
>>>>>> 
>>>>>> On Fri, Apr 11, 2014 at 1:01 PM, gortiz <go...@pragsis.com> wrote:
>>>>>> 
>>>>>>> Last test I have done it's to reduce the number of versions to 100.
>>>>>>> So, right now, I have 100 rows with 100 versions each one.
>>>>>>> Times are: (I got the same times for blocksize of 64Ks and 1Mb)
>>>>>>> 100row-1000versions + blockcache-> 80s.
>>>>>>> 100row-1000versions + No blockcache-> 70s.
>>>>>>> 
>>>>>>> 100row-*100*versions + blockcache-> 7.3s.
>>>>>>> 100row-*100*versions + No blockcache-> 6.1s.
>>>>>>> 
>>>>>>> What's the reasons of this? I guess HBase is enough smart for not
>>>> consider
>>>>>>> old versions, so, it just checks the newest. But, I reduce 10 times the
>>>>>>> size (in versions) and I got a 10x of performance.
>>>>>>> 
>>>>>>> The filter is scan 'filters', {FILTER => "ValueFilter(=,
>>>>>>> 'binary:5')",STARTROW => '1010000000000000000000000000000000000101',
>>>>>>> STOPROW => '6010000000000000000000000000000000000201'}
>>>>>>> 
>>>>>>> 
>>>>>>> 
>>>>>>> On 11/04/14 09:04, gortiz wrote:
>>>>>>> 
>>>>>>>> Well, I guessed that, what it doesn't make too much sense because
>>>> it's so
>>>>>>>> slow. I only have right now 100 rows with 1000 versions each row.
>>>>>>>> I have checked the size of the dataset and each row is about 700Kbytes
>>>>>>>> (around 7Gb, 100rowsx1000versions). So, it should only check 100 rows
>>>> x
>>>>>>>> 700Kbytes = 70Mb, since it just check the newest version. How can it
>>>> spend
>>>>>>>> too many time checking this quantity of data?
>>>>>>>> 
>>>>>>>> I'm generating again the dataset with a bigger blocksize (previously
>>>> was
>>>>>>>> 64Kb, now, it's going to be 1Mb). I could try tunning the scanning and
>>>>>>>> baching parameters, but I don't think they're going to affect too
>>>> much.
>>>>>>>> 
>>>>>>>> Another test I want to do, it's generate the same dataset with just
>>>>>>>> 100versions, It should spend around the same time, right? Or am I
>>>> wrong?
>>>>>>>> 
>>>>>>>> On 10/04/14 18:08, Ted Yu wrote:
>>>>>>>> 
>>>>>>>>> It should be newest version of each value.
>>>>>>>>> 
>>>>>>>>> Cheers
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>>>> 
>>>>>>>>> Another little question is, when the filter I'm using, Do I check
>>>> all the
>>>>>>>>>> versions? or just the newest? Because, I'm wondering if when I do a
>>>> scan
>>>>>>>>>> over all the table, I look for the value "5" in all the dataset or
>>>> I'm
>>>>>>>>>> just
>>>>>>>>>> looking for in one newest version of each value.
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> On 10/04/14 16:52, gortiz wrote:
>>>>>>>>>> 
>>>>>>>>>> I was trying to check the behaviour of HBase. The cluster is a
>>>> group of
>>>>>>>>>>> old computers, one master, five slaves, each one with 2Gb, so,
>>>> 12gb in
>>>>>>>>>>> total.
>>>>>>>>>>> The table has a column family with 1000 columns and each column
>>>> with
>>>>>>>>>>> 100
>>>>>>>>>>> versions.
>>>>>>>>>>> There's another column faimily with four columns an one image of
>>>> 100kb.
>>>>>>>>>>> (I've tried without this column family as well.)
>>>>>>>>>>> The table is partitioned manually in all the slaves, so data are
>>>>>>>>>>> balanced
>>>>>>>>>>> in the cluster.
>>>>>>>>>>> 
>>>>>>>>>>> I'm executing this sentence *scan 'table1', {FILTER =>
>>>> "ValueFilter(=,
>>>>>>>>>>> 'binary:5')"* in HBase 0.94.6
>>>>>>>>>>> My time for lease and rpc is three minutes.
>>>>>>>>>>> Since, it's a full scan of the table, I have been playing with the
>>>>>>>>>>> BLOCKCACHE as well (just disable and enable, not about the size of
>>>>>>>>>>> it). I
>>>>>>>>>>> thought that it was going to have too much calls to the GC. I'm not
>>>>>>>>>>> sure
>>>>>>>>>>> about this point.
>>>>>>>>>>> 
>>>>>>>>>>> I know that it's not the best way to use HBase, it's just a test. I
>>>>>>>>>>> think
>>>>>>>>>>> that it's not working because the hardware isn't enough, although,
>>>> I
>>>>>>>>>>> would
>>>>>>>>>>> like to try some kind of tunning to improve it.
>>>>>>>>>>> 
>>>>>>>>>>> 
>>>>>>>>>>> 
>>>>>>>>>>> 
>>>>>>>>>>> 
>>>>>>>>>>> 
>>>>>>>>>>> 
>>>>>>>>>>> 
>>>>>>>>>>> On 10/04/14 14:21, Ted Yu wrote:
>>>>>>>>>>> 
>>>>>>>>>>> Can you give us a bit more information:
>>>>>>>>>>>> HBase release you're running
>>>>>>>>>>>> What filters are used for the scan
>>>>>>>>>>>> 
>>>>>>>>>>>> Thanks
>>>>>>>>>>>> 
>>>>>>>>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>>>>>>> 
>>>>>>>>>>>> I got this error when I execute a full scan with filters about a
>>>>>>>>>>>> table.
>>>>>>>>>>>> 
>>>>>>>>>>>>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.
>>>>>>>>>>>>> regionserver.LeaseException:
>>>>>>>>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
>>>>>>>>>>>>> '-4165751462641113359' does not exist
>>>>>>>>>>>>>  at
>>>> org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231)
>>>>>>>>>>>>> 
>>>>>>>>>>>>> 
>>>>>>>>>>>>>  at org.apache.hadoop.hbase.regionserver.HRegionServer.
>>>>>>>>>>>>> next(HRegionServer.java:2482)
>>>>>>>>>>>>>  at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
>>>> Method)
>>>>>>>>>>>>>  at sun.reflect.NativeMethodAccessorImpl.invoke(
>>>>>>>>>>>>> NativeMethodAccessorImpl.java:39)
>>>>>>>>>>>>>  at sun.reflect.DelegatingMethodAccessorImpl.invoke(
>>>>>>>>>>>>> DelegatingMethodAccessorImpl.java:25)
>>>>>>>>>>>>>  at java.lang.reflect.Method.invoke(Method.java:597)
>>>>>>>>>>>>>  at
>>>> org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(
>>>>>>>>>>>>> WritableRpcEngine.java:320)
>>>>>>>>>>>>>  at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
>>>>>>>>>>>>> HBaseServer.java:1428)
>>>>>>>>>>>>> 
>>>>>>>>>>>>> I have read about increase the lease time and rpc time, but it's
>>>> not
>>>>>>>>>>>>> working.. what else could I try?? The table isn't too big. I have
>>>>>>>>>>>>> been
>>>>>>>>>>>>> checking the logs from GC, HMaster and some RegionServers and I
>>>>>>>>>>>>> didn't see
>>>>>>>>>>>>> anything weird. I tried as well to try with a couple of caching
>>>>>>>>>>>>> values.
>>>>>>>>>>>>> 
>>>>>>>>>>>>> 
>>>>>>>>>>> --
>>>>>>>>>> *Guillermo Ortiz*
>>>>>>>>>> /Big Data Developer/
>>>>>>>>>> 
>>>>>>>>>> Telf.: +34 917 680 490<
>>>> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
>>>>> 
>>>>>>>>>> Fax: +34 913 833 301<
>>>> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
>>>>> 
>>>>>>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>>>>>>> 
>>>>>>>>>> _http://www.bidoop.es_
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>> --
>>>>>>> *Guillermo Ortiz*
>>>>>>> /Big Data Developer/
>>>>>>> 
>>>>>>> Telf.: +34 917 680 490<
>>>> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
>>>>> 
>>>>>>> Fax: +34 913 833 301<
>>>> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
>>>>> 
>>>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>>>> 
>>>>>>> _http://www.bidoop.es_
>>>>>>> 
>>>>>>> 
>>>>> 
>>>>> 
>>>>> --
>>>>> *Guillermo Ortiz*
>>>>> /Big Data Developer/
>>>>> 
>>>>> Telf.: +34 917 680 490
>>>>> Fax: +34 913 833 301
>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>> 
>>>>> _http://www.bidoop.es_
>>>>> 
>>>> 
>>>> 
>> 
>> 
> 
> 


Re: Lease exception when I execute large scan with filters.

Posted by Brian Jeltema <bd...@gmail.com>.
I don't want to be argumentative here, but by definition is's not an internal feature because it's part of the
public API. We use versioning in a way that makes me somewhat uncomfortable, but it's been quite
useful. I'd like to see a clear explanation of why it exists and what use cases it was intended to support.

Brian

> Since you asked… 
> 
> Simplest answer… your schema should not rely upon internal features of the system.  Since you are tracking your data along the lines of a temporal attribute it should be part of the schema. In terms of a good design, by making it a part of the schema, you’re defining that the data has a temporal property/attribute. 
> 
> Cell versioning is an internal feature of HBase. Its there for a reason. 
> Perhaps one of the committers should expand on why its there.  (When I asked this earlier, never got an answer. ) 
> 
> 
> Longer answer… review how HBase stores the rows, including the versions of the cell. 
> You’re putting an unnecessary stress on the system. 
> 
> Its just not Zen… ;-) 
> 
> The reason I’m a bit short on this topic is that its an issue that keeps coming up, over and over again because some idiot keeps looking to take a shortcut without understanding the implications of their decision. Just like salting the key. (Note:  prepending a truncated hash isn’t the same as using a salt.  Salting has a specific meaning and the salt is orthogonal to the underlying key. Any relationship between the salt and the key is purely random luck.) 
> 
> Does that help? 
> (BTW, this should be part of any schema design talk… yet somehow I think its not covered… ) 
> 
> -Mike
> 
> PS. Its not weird that the cell versions are checked. It makes perfect sense. 
> 
> On Apr 12, 2014, at 2:55 PM, Guillermo Ortiz <ko...@gmail.com> wrote:
> 
>> Well, It was just a example why I could keep a thousand versions or a cell.
>> I didn't know that HBase was checking each version when I do a scan, it's a
>> little weird when data is sorted.
>> 
>> You get my attention with your comment, that it's better to store data over
>> time with new columns that with versions. Why is it better?
>> Versions looks that there're very convenient for that use case. So, does it
>> work better a rowkey with 3600 columns, that a rowkey with a column with
>> 3600 versions? What's the reason for avoiding a massive use of versions?
>> 
>> 
>> 2014-04-12 15:07 GMT+02:00 Michael Segel <mi...@hotmail.com>:
>> 
>>> Silly question...
>>> 
>>> Why does the idea of using versioning to capture temporal changes to data
>>> keep being propagated?
>>> 
>>> Seriously this issue keeps popping up...
>>> 
>>> If you want to capture data over time... use a timestamp as part of the
>>> column name.  Don't abuse the cell's version.
>>> 
>>> 
>>> 
>>> On Apr 11, 2014, at 11:03 AM, gortiz <go...@pragsis.com> wrote:
>>> 
>>>> Yes, I have tried with two different values for that value of versions,
>>> 1000 and maximum value for integers.
>>>> 
>>>> But, I want to keep those versions. I don't want to keep just 3
>>> versions. Imagine that I want to record a new version each minute and store
>>> a day, those are 1440 versions.
>>>> 
>>>> Why is HBase going to read all the versions?? , I thought, if you don't
>>> indicate any versions it's just read the newest and skip the rest. It
>>> doesn't make too much sense to read all of them if data is sorted, plus the
>>> newest version is stored in the top.
>>>> 
>>>> 
>>>> On 11/04/14 11:54, Anoop John wrote:
>>>>> What is the max version setting u have done for ur table cf?  When u set
>>>>> some a value, HBase has to keep all those versions.  During a scan it
>>> will
>>>>> read all those versions. In 94 version the default value for the max
>>>>> versions is 3.  I guess you have set some bigger value.   If u have not,
>>>>> mind testing after a major compaction?
>>>>> 
>>>>> -Anoop-
>>>>> 
>>>>> On Fri, Apr 11, 2014 at 1:01 PM, gortiz <go...@pragsis.com> wrote:
>>>>> 
>>>>>> Last test I have done it's to reduce the number of versions to 100.
>>>>>> So, right now, I have 100 rows with 100 versions each one.
>>>>>> Times are: (I got the same times for blocksize of 64Ks and 1Mb)
>>>>>> 100row-1000versions + blockcache-> 80s.
>>>>>> 100row-1000versions + No blockcache-> 70s.
>>>>>> 
>>>>>> 100row-*100*versions + blockcache-> 7.3s.
>>>>>> 100row-*100*versions + No blockcache-> 6.1s.
>>>>>> 
>>>>>> What's the reasons of this? I guess HBase is enough smart for not
>>> consider
>>>>>> old versions, so, it just checks the newest. But, I reduce 10 times the
>>>>>> size (in versions) and I got a 10x of performance.
>>>>>> 
>>>>>> The filter is scan 'filters', {FILTER => "ValueFilter(=,
>>>>>> 'binary:5')",STARTROW => '1010000000000000000000000000000000000101',
>>>>>> STOPROW => '6010000000000000000000000000000000000201'}
>>>>>> 
>>>>>> 
>>>>>> 
>>>>>> On 11/04/14 09:04, gortiz wrote:
>>>>>> 
>>>>>>> Well, I guessed that, what it doesn't make too much sense because
>>> it's so
>>>>>>> slow. I only have right now 100 rows with 1000 versions each row.
>>>>>>> I have checked the size of the dataset and each row is about 700Kbytes
>>>>>>> (around 7Gb, 100rowsx1000versions). So, it should only check 100 rows
>>> x
>>>>>>> 700Kbytes = 70Mb, since it just check the newest version. How can it
>>> spend
>>>>>>> too many time checking this quantity of data?
>>>>>>> 
>>>>>>> I'm generating again the dataset with a bigger blocksize (previously
>>> was
>>>>>>> 64Kb, now, it's going to be 1Mb). I could try tunning the scanning and
>>>>>>> baching parameters, but I don't think they're going to affect too
>>> much.
>>>>>>> 
>>>>>>> Another test I want to do, it's generate the same dataset with just
>>>>>>> 100versions, It should spend around the same time, right? Or am I
>>> wrong?
>>>>>>> 
>>>>>>> On 10/04/14 18:08, Ted Yu wrote:
>>>>>>> 
>>>>>>>> It should be newest version of each value.
>>>>>>>> 
>>>>>>>> Cheers
>>>>>>>> 
>>>>>>>> 
>>>>>>>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>>> 
>>>>>>>> Another little question is, when the filter I'm using, Do I check
>>> all the
>>>>>>>>> versions? or just the newest? Because, I'm wondering if when I do a
>>> scan
>>>>>>>>> over all the table, I look for the value "5" in all the dataset or
>>> I'm
>>>>>>>>> just
>>>>>>>>> looking for in one newest version of each value.
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> On 10/04/14 16:52, gortiz wrote:
>>>>>>>>> 
>>>>>>>>> I was trying to check the behaviour of HBase. The cluster is a
>>> group of
>>>>>>>>>> old computers, one master, five slaves, each one with 2Gb, so,
>>> 12gb in
>>>>>>>>>> total.
>>>>>>>>>> The table has a column family with 1000 columns and each column
>>> with
>>>>>>>>>> 100
>>>>>>>>>> versions.
>>>>>>>>>> There's another column faimily with four columns an one image of
>>> 100kb.
>>>>>>>>>> (I've tried without this column family as well.)
>>>>>>>>>> The table is partitioned manually in all the slaves, so data are
>>>>>>>>>> balanced
>>>>>>>>>> in the cluster.
>>>>>>>>>> 
>>>>>>>>>> I'm executing this sentence *scan 'table1', {FILTER =>
>>> "ValueFilter(=,
>>>>>>>>>> 'binary:5')"* in HBase 0.94.6
>>>>>>>>>> My time for lease and rpc is three minutes.
>>>>>>>>>> Since, it's a full scan of the table, I have been playing with the
>>>>>>>>>> BLOCKCACHE as well (just disable and enable, not about the size of
>>>>>>>>>> it). I
>>>>>>>>>> thought that it was going to have too much calls to the GC. I'm not
>>>>>>>>>> sure
>>>>>>>>>> about this point.
>>>>>>>>>> 
>>>>>>>>>> I know that it's not the best way to use HBase, it's just a test. I
>>>>>>>>>> think
>>>>>>>>>> that it's not working because the hardware isn't enough, although,
>>> I
>>>>>>>>>> would
>>>>>>>>>> like to try some kind of tunning to improve it.
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> 
>>>>>>>>>> On 10/04/14 14:21, Ted Yu wrote:
>>>>>>>>>> 
>>>>>>>>>> Can you give us a bit more information:
>>>>>>>>>>> HBase release you're running
>>>>>>>>>>> What filters are used for the scan
>>>>>>>>>>> 
>>>>>>>>>>> Thanks
>>>>>>>>>>> 
>>>>>>>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>>>>>> 
>>>>>>>>>>> I got this error when I execute a full scan with filters about a
>>>>>>>>>>> table.
>>>>>>>>>>> 
>>>>>>>>>>>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.
>>>>>>>>>>>> regionserver.LeaseException:
>>>>>>>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
>>>>>>>>>>>> '-4165751462641113359' does not exist
>>>>>>>>>>>>   at
>>> org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231)
>>>>>>>>>>>> 
>>>>>>>>>>>> 
>>>>>>>>>>>>   at org.apache.hadoop.hbase.regionserver.HRegionServer.
>>>>>>>>>>>> next(HRegionServer.java:2482)
>>>>>>>>>>>>   at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
>>> Method)
>>>>>>>>>>>>   at sun.reflect.NativeMethodAccessorImpl.invoke(
>>>>>>>>>>>> NativeMethodAccessorImpl.java:39)
>>>>>>>>>>>>   at sun.reflect.DelegatingMethodAccessorImpl.invoke(
>>>>>>>>>>>> DelegatingMethodAccessorImpl.java:25)
>>>>>>>>>>>>   at java.lang.reflect.Method.invoke(Method.java:597)
>>>>>>>>>>>>   at
>>> org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(
>>>>>>>>>>>> WritableRpcEngine.java:320)
>>>>>>>>>>>>   at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
>>>>>>>>>>>> HBaseServer.java:1428)
>>>>>>>>>>>> 
>>>>>>>>>>>> I have read about increase the lease time and rpc time, but it's
>>> not
>>>>>>>>>>>> working.. what else could I try?? The table isn't too big. I have
>>>>>>>>>>>> been
>>>>>>>>>>>> checking the logs from GC, HMaster and some RegionServers and I
>>>>>>>>>>>> didn't see
>>>>>>>>>>>> anything weird. I tried as well to try with a couple of caching
>>>>>>>>>>>> values.
>>>>>>>>>>>> 
>>>>>>>>>>>> 
>>>>>>>>>> --
>>>>>>>>> *Guillermo Ortiz*
>>>>>>>>> /Big Data Developer/
>>>>>>>>> 
>>>>>>>>> Telf.: +34 917 680 490<
>>> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
>>>> 
>>>>>>>>> Fax: +34 913 833 301<
>>> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
>>>> 
>>>>>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>>>>>> 
>>>>>>>>> _http://www.bidoop.es_
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> 
>>>>>> --
>>>>>> *Guillermo Ortiz*
>>>>>> /Big Data Developer/
>>>>>> 
>>>>>> Telf.: +34 917 680 490<
>>> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
>>>> 
>>>>>> Fax: +34 913 833 301<
>>> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
>>>> 
>>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>>> 
>>>>>> _http://www.bidoop.es_
>>>>>> 
>>>>>> 
>>>> 
>>>> 
>>>> --
>>>> *Guillermo Ortiz*
>>>> /Big Data Developer/
>>>> 
>>>> Telf.: +34 917 680 490
>>>> Fax: +34 913 833 301
>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>> 
>>>> _http://www.bidoop.es_
>>>> 
>>> 
>>> 
> 
> 


Re: Lease exception when I execute large scan with filters.

Posted by Michael Segel <mi...@hotmail.com>.
Since you asked… 

Simplest answer… your schema should not rely upon internal features of the system.  Since you are tracking your data along the lines of a temporal attribute it should be part of the schema. In terms of a good design, by making it a part of the schema, you’re defining that the data has a temporal property/attribute. 

Cell versioning is an internal feature of HBase. Its there for a reason. 
Perhaps one of the committers should expand on why its there.  (When I asked this earlier, never got an answer. ) 


Longer answer… review how HBase stores the rows, including the versions of the cell. 
You’re putting an unnecessary stress on the system. 

Its just not Zen… ;-) 

The reason I’m a bit short on this topic is that its an issue that keeps coming up, over and over again because some idiot keeps looking to take a shortcut without understanding the implications of their decision. Just like salting the key. (Note:  prepending a truncated hash isn’t the same as using a salt.  Salting has a specific meaning and the salt is orthogonal to the underlying key. Any relationship between the salt and the key is purely random luck.) 

Does that help? 
(BTW, this should be part of any schema design talk… yet somehow I think its not covered… ) 

-Mike

PS. Its not weird that the cell versions are checked. It makes perfect sense. 

On Apr 12, 2014, at 2:55 PM, Guillermo Ortiz <ko...@gmail.com> wrote:

> Well, It was just a example why I could keep a thousand versions or a cell.
> I didn't know that HBase was checking each version when I do a scan, it's a
> little weird when data is sorted.
> 
> You get my attention with your comment, that it's better to store data over
> time with new columns that with versions. Why is it better?
> Versions looks that there're very convenient for that use case. So, does it
> work better a rowkey with 3600 columns, that a rowkey with a column with
> 3600 versions? What's the reason for avoiding a massive use of versions?
> 
> 
> 2014-04-12 15:07 GMT+02:00 Michael Segel <mi...@hotmail.com>:
> 
>> Silly question...
>> 
>> Why does the idea of using versioning to capture temporal changes to data
>> keep being propagated?
>> 
>> Seriously this issue keeps popping up...
>> 
>> If you want to capture data over time... use a timestamp as part of the
>> column name.  Don't abuse the cell's version.
>> 
>> 
>> 
>> On Apr 11, 2014, at 11:03 AM, gortiz <go...@pragsis.com> wrote:
>> 
>>> Yes, I have tried with two different values for that value of versions,
>> 1000 and maximum value for integers.
>>> 
>>> But, I want to keep those versions. I don't want to keep just 3
>> versions. Imagine that I want to record a new version each minute and store
>> a day, those are 1440 versions.
>>> 
>>> Why is HBase going to read all the versions?? , I thought, if you don't
>> indicate any versions it's just read the newest and skip the rest. It
>> doesn't make too much sense to read all of them if data is sorted, plus the
>> newest version is stored in the top.
>>> 
>>> 
>>> On 11/04/14 11:54, Anoop John wrote:
>>>> What is the max version setting u have done for ur table cf?  When u set
>>>> some a value, HBase has to keep all those versions.  During a scan it
>> will
>>>> read all those versions. In 94 version the default value for the max
>>>> versions is 3.  I guess you have set some bigger value.   If u have not,
>>>> mind testing after a major compaction?
>>>> 
>>>> -Anoop-
>>>> 
>>>> On Fri, Apr 11, 2014 at 1:01 PM, gortiz <go...@pragsis.com> wrote:
>>>> 
>>>>> Last test I have done it's to reduce the number of versions to 100.
>>>>> So, right now, I have 100 rows with 100 versions each one.
>>>>> Times are: (I got the same times for blocksize of 64Ks and 1Mb)
>>>>> 100row-1000versions + blockcache-> 80s.
>>>>> 100row-1000versions + No blockcache-> 70s.
>>>>> 
>>>>> 100row-*100*versions + blockcache-> 7.3s.
>>>>> 100row-*100*versions + No blockcache-> 6.1s.
>>>>> 
>>>>> What's the reasons of this? I guess HBase is enough smart for not
>> consider
>>>>> old versions, so, it just checks the newest. But, I reduce 10 times the
>>>>> size (in versions) and I got a 10x of performance.
>>>>> 
>>>>> The filter is scan 'filters', {FILTER => "ValueFilter(=,
>>>>> 'binary:5')",STARTROW => '1010000000000000000000000000000000000101',
>>>>> STOPROW => '6010000000000000000000000000000000000201'}
>>>>> 
>>>>> 
>>>>> 
>>>>> On 11/04/14 09:04, gortiz wrote:
>>>>> 
>>>>>> Well, I guessed that, what it doesn't make too much sense because
>> it's so
>>>>>> slow. I only have right now 100 rows with 1000 versions each row.
>>>>>> I have checked the size of the dataset and each row is about 700Kbytes
>>>>>> (around 7Gb, 100rowsx1000versions). So, it should only check 100 rows
>> x
>>>>>> 700Kbytes = 70Mb, since it just check the newest version. How can it
>> spend
>>>>>> too many time checking this quantity of data?
>>>>>> 
>>>>>> I'm generating again the dataset with a bigger blocksize (previously
>> was
>>>>>> 64Kb, now, it's going to be 1Mb). I could try tunning the scanning and
>>>>>> baching parameters, but I don't think they're going to affect too
>> much.
>>>>>> 
>>>>>> Another test I want to do, it's generate the same dataset with just
>>>>>> 100versions, It should spend around the same time, right? Or am I
>> wrong?
>>>>>> 
>>>>>> On 10/04/14 18:08, Ted Yu wrote:
>>>>>> 
>>>>>>> It should be newest version of each value.
>>>>>>> 
>>>>>>> Cheers
>>>>>>> 
>>>>>>> 
>>>>>>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>> 
>>>>>>> Another little question is, when the filter I'm using, Do I check
>> all the
>>>>>>>> versions? or just the newest? Because, I'm wondering if when I do a
>> scan
>>>>>>>> over all the table, I look for the value "5" in all the dataset or
>> I'm
>>>>>>>> just
>>>>>>>> looking for in one newest version of each value.
>>>>>>>> 
>>>>>>>> 
>>>>>>>> On 10/04/14 16:52, gortiz wrote:
>>>>>>>> 
>>>>>>>> I was trying to check the behaviour of HBase. The cluster is a
>> group of
>>>>>>>>> old computers, one master, five slaves, each one with 2Gb, so,
>> 12gb in
>>>>>>>>> total.
>>>>>>>>> The table has a column family with 1000 columns and each column
>> with
>>>>>>>>> 100
>>>>>>>>> versions.
>>>>>>>>> There's another column faimily with four columns an one image of
>> 100kb.
>>>>>>>>>  (I've tried without this column family as well.)
>>>>>>>>> The table is partitioned manually in all the slaves, so data are
>>>>>>>>> balanced
>>>>>>>>> in the cluster.
>>>>>>>>> 
>>>>>>>>> I'm executing this sentence *scan 'table1', {FILTER =>
>> "ValueFilter(=,
>>>>>>>>> 'binary:5')"* in HBase 0.94.6
>>>>>>>>> My time for lease and rpc is three minutes.
>>>>>>>>> Since, it's a full scan of the table, I have been playing with the
>>>>>>>>> BLOCKCACHE as well (just disable and enable, not about the size of
>>>>>>>>> it). I
>>>>>>>>> thought that it was going to have too much calls to the GC. I'm not
>>>>>>>>> sure
>>>>>>>>> about this point.
>>>>>>>>> 
>>>>>>>>> I know that it's not the best way to use HBase, it's just a test. I
>>>>>>>>> think
>>>>>>>>> that it's not working because the hardware isn't enough, although,
>> I
>>>>>>>>> would
>>>>>>>>> like to try some kind of tunning to improve it.
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>> On 10/04/14 14:21, Ted Yu wrote:
>>>>>>>>> 
>>>>>>>>> Can you give us a bit more information:
>>>>>>>>>> HBase release you're running
>>>>>>>>>> What filters are used for the scan
>>>>>>>>>> 
>>>>>>>>>> Thanks
>>>>>>>>>> 
>>>>>>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>>>>> 
>>>>>>>>>>  I got this error when I execute a full scan with filters about a
>>>>>>>>>> table.
>>>>>>>>>> 
>>>>>>>>>>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.
>>>>>>>>>>> regionserver.LeaseException:
>>>>>>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
>>>>>>>>>>> '-4165751462641113359' does not exist
>>>>>>>>>>>     at
>> org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231)
>>>>>>>>>>> 
>>>>>>>>>>> 
>>>>>>>>>>>     at org.apache.hadoop.hbase.regionserver.HRegionServer.
>>>>>>>>>>> next(HRegionServer.java:2482)
>>>>>>>>>>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
>> Method)
>>>>>>>>>>>     at sun.reflect.NativeMethodAccessorImpl.invoke(
>>>>>>>>>>> NativeMethodAccessorImpl.java:39)
>>>>>>>>>>>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(
>>>>>>>>>>> DelegatingMethodAccessorImpl.java:25)
>>>>>>>>>>>     at java.lang.reflect.Method.invoke(Method.java:597)
>>>>>>>>>>>     at
>> org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(
>>>>>>>>>>> WritableRpcEngine.java:320)
>>>>>>>>>>>     at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
>>>>>>>>>>> HBaseServer.java:1428)
>>>>>>>>>>> 
>>>>>>>>>>> I have read about increase the lease time and rpc time, but it's
>> not
>>>>>>>>>>> working.. what else could I try?? The table isn't too big. I have
>>>>>>>>>>> been
>>>>>>>>>>> checking the logs from GC, HMaster and some RegionServers and I
>>>>>>>>>>> didn't see
>>>>>>>>>>> anything weird. I tried as well to try with a couple of caching
>>>>>>>>>>> values.
>>>>>>>>>>> 
>>>>>>>>>>> 
>>>>>>>>> --
>>>>>>>> *Guillermo Ortiz*
>>>>>>>> /Big Data Developer/
>>>>>>>> 
>>>>>>>> Telf.: +34 917 680 490<
>> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
>>> 
>>>>>>>> Fax: +34 913 833 301<
>> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
>>> 
>>>>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>>>>> 
>>>>>>>> _http://www.bidoop.es_
>>>>>>>> 
>>>>>>>> 
>>>>>>>> 
>>>>> --
>>>>> *Guillermo Ortiz*
>>>>> /Big Data Developer/
>>>>> 
>>>>> Telf.: +34 917 680 490<
>> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
>>> 
>>>>> Fax: +34 913 833 301<
>> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
>>> 
>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>> 
>>>>> _http://www.bidoop.es_
>>>>> 
>>>>> 
>>> 
>>> 
>>> --
>>> *Guillermo Ortiz*
>>> /Big Data Developer/
>>> 
>>> Telf.: +34 917 680 490
>>> Fax: +34 913 833 301
>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>> 
>>> _http://www.bidoop.es_
>>> 
>> 
>> 


Re: Lease exception when I execute large scan with filters.

Posted by Guillermo Ortiz <ko...@gmail.com>.
Well, It was just a example why I could keep a thousand versions or a cell.
I didn't know that HBase was checking each version when I do a scan, it's a
little weird when data is sorted.

You get my attention with your comment, that it's better to store data over
time with new columns that with versions. Why is it better?
Versions looks that there're very convenient for that use case. So, does it
work better a rowkey with 3600 columns, that a rowkey with a column with
3600 versions? What's the reason for avoiding a massive use of versions?


2014-04-12 15:07 GMT+02:00 Michael Segel <mi...@hotmail.com>:

> Silly question...
>
> Why does the idea of using versioning to capture temporal changes to data
> keep being propagated?
>
> Seriously this issue keeps popping up...
>
> If you want to capture data over time... use a timestamp as part of the
> column name.  Don't abuse the cell's version.
>
>
>
> On Apr 11, 2014, at 11:03 AM, gortiz <go...@pragsis.com> wrote:
>
> > Yes, I have tried with two different values for that value of versions,
> 1000 and maximum value for integers.
> >
> > But, I want to keep those versions. I don't want to keep just 3
> versions. Imagine that I want to record a new version each minute and store
> a day, those are 1440 versions.
> >
> > Why is HBase going to read all the versions?? , I thought, if you don't
> indicate any versions it's just read the newest and skip the rest. It
> doesn't make too much sense to read all of them if data is sorted, plus the
> newest version is stored in the top.
> >
> >
> > On 11/04/14 11:54, Anoop John wrote:
> >> What is the max version setting u have done for ur table cf?  When u set
> >> some a value, HBase has to keep all those versions.  During a scan it
> will
> >> read all those versions. In 94 version the default value for the max
> >> versions is 3.  I guess you have set some bigger value.   If u have not,
> >> mind testing after a major compaction?
> >>
> >> -Anoop-
> >>
> >> On Fri, Apr 11, 2014 at 1:01 PM, gortiz <go...@pragsis.com> wrote:
> >>
> >>> Last test I have done it's to reduce the number of versions to 100.
> >>> So, right now, I have 100 rows with 100 versions each one.
> >>> Times are: (I got the same times for blocksize of 64Ks and 1Mb)
> >>> 100row-1000versions + blockcache-> 80s.
> >>> 100row-1000versions + No blockcache-> 70s.
> >>>
> >>> 100row-*100*versions + blockcache-> 7.3s.
> >>> 100row-*100*versions + No blockcache-> 6.1s.
> >>>
> >>> What's the reasons of this? I guess HBase is enough smart for not
> consider
> >>> old versions, so, it just checks the newest. But, I reduce 10 times the
> >>> size (in versions) and I got a 10x of performance.
> >>>
> >>> The filter is scan 'filters', {FILTER => "ValueFilter(=,
> >>> 'binary:5')",STARTROW => '1010000000000000000000000000000000000101',
> >>> STOPROW => '6010000000000000000000000000000000000201'}
> >>>
> >>>
> >>>
> >>> On 11/04/14 09:04, gortiz wrote:
> >>>
> >>>> Well, I guessed that, what it doesn't make too much sense because
> it's so
> >>>> slow. I only have right now 100 rows with 1000 versions each row.
> >>>> I have checked the size of the dataset and each row is about 700Kbytes
> >>>> (around 7Gb, 100rowsx1000versions). So, it should only check 100 rows
> x
> >>>> 700Kbytes = 70Mb, since it just check the newest version. How can it
> spend
> >>>> too many time checking this quantity of data?
> >>>>
> >>>> I'm generating again the dataset with a bigger blocksize (previously
> was
> >>>> 64Kb, now, it's going to be 1Mb). I could try tunning the scanning and
> >>>> baching parameters, but I don't think they're going to affect too
> much.
> >>>>
> >>>> Another test I want to do, it's generate the same dataset with just
> >>>> 100versions, It should spend around the same time, right? Or am I
> wrong?
> >>>>
> >>>> On 10/04/14 18:08, Ted Yu wrote:
> >>>>
> >>>>> It should be newest version of each value.
> >>>>>
> >>>>> Cheers
> >>>>>
> >>>>>
> >>>>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com> wrote:
> >>>>>
> >>>>> Another little question is, when the filter I'm using, Do I check
> all the
> >>>>>> versions? or just the newest? Because, I'm wondering if when I do a
> scan
> >>>>>> over all the table, I look for the value "5" in all the dataset or
> I'm
> >>>>>> just
> >>>>>> looking for in one newest version of each value.
> >>>>>>
> >>>>>>
> >>>>>> On 10/04/14 16:52, gortiz wrote:
> >>>>>>
> >>>>>> I was trying to check the behaviour of HBase. The cluster is a
> group of
> >>>>>>> old computers, one master, five slaves, each one with 2Gb, so,
> 12gb in
> >>>>>>> total.
> >>>>>>> The table has a column family with 1000 columns and each column
> with
> >>>>>>> 100
> >>>>>>> versions.
> >>>>>>> There's another column faimily with four columns an one image of
> 100kb.
> >>>>>>>   (I've tried without this column family as well.)
> >>>>>>> The table is partitioned manually in all the slaves, so data are
> >>>>>>> balanced
> >>>>>>> in the cluster.
> >>>>>>>
> >>>>>>> I'm executing this sentence *scan 'table1', {FILTER =>
> "ValueFilter(=,
> >>>>>>> 'binary:5')"* in HBase 0.94.6
> >>>>>>> My time for lease and rpc is three minutes.
> >>>>>>> Since, it's a full scan of the table, I have been playing with the
> >>>>>>> BLOCKCACHE as well (just disable and enable, not about the size of
> >>>>>>> it). I
> >>>>>>> thought that it was going to have too much calls to the GC. I'm not
> >>>>>>> sure
> >>>>>>> about this point.
> >>>>>>>
> >>>>>>> I know that it's not the best way to use HBase, it's just a test. I
> >>>>>>> think
> >>>>>>> that it's not working because the hardware isn't enough, although,
> I
> >>>>>>> would
> >>>>>>> like to try some kind of tunning to improve it.
> >>>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>>
> >>>>>>> On 10/04/14 14:21, Ted Yu wrote:
> >>>>>>>
> >>>>>>> Can you give us a bit more information:
> >>>>>>>> HBase release you're running
> >>>>>>>> What filters are used for the scan
> >>>>>>>>
> >>>>>>>> Thanks
> >>>>>>>>
> >>>>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
> >>>>>>>>
> >>>>>>>>   I got this error when I execute a full scan with filters about a
> >>>>>>>> table.
> >>>>>>>>
> >>>>>>>>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.
> >>>>>>>>> regionserver.LeaseException:
> >>>>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
> >>>>>>>>> '-4165751462641113359' does not exist
> >>>>>>>>>      at
> org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231)
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>>>>      at org.apache.hadoop.hbase.regionserver.HRegionServer.
> >>>>>>>>> next(HRegionServer.java:2482)
> >>>>>>>>>      at sun.reflect.NativeMethodAccessorImpl.invoke0(Native
> Method)
> >>>>>>>>>      at sun.reflect.NativeMethodAccessorImpl.invoke(
> >>>>>>>>> NativeMethodAccessorImpl.java:39)
> >>>>>>>>>      at sun.reflect.DelegatingMethodAccessorImpl.invoke(
> >>>>>>>>> DelegatingMethodAccessorImpl.java:25)
> >>>>>>>>>      at java.lang.reflect.Method.invoke(Method.java:597)
> >>>>>>>>>      at
> org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(
> >>>>>>>>> WritableRpcEngine.java:320)
> >>>>>>>>>      at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
> >>>>>>>>> HBaseServer.java:1428)
> >>>>>>>>>
> >>>>>>>>> I have read about increase the lease time and rpc time, but it's
> not
> >>>>>>>>> working.. what else could I try?? The table isn't too big. I have
> >>>>>>>>> been
> >>>>>>>>> checking the logs from GC, HMaster and some RegionServers and I
> >>>>>>>>> didn't see
> >>>>>>>>> anything weird. I tried as well to try with a couple of caching
> >>>>>>>>> values.
> >>>>>>>>>
> >>>>>>>>>
> >>>>>>> --
> >>>>>> *Guillermo Ortiz*
> >>>>>> /Big Data Developer/
> >>>>>>
> >>>>>> Telf.: +34 917 680 490<
> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
> >
> >>>>>> Fax: +34 913 833 301<
> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
> >
> >>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> >>>>>>
> >>>>>> _http://www.bidoop.es_
> >>>>>>
> >>>>>>
> >>>>>>
> >>> --
> >>> *Guillermo Ortiz*
> >>> /Big Data Developer/
> >>>
> >>> Telf.: +34 917 680 490<
> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
> >
> >>> Fax: +34 913 833 301<
> https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#
> >
> >>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> >>>
> >>> _http://www.bidoop.es_
> >>>
> >>>
> >
> >
> > --
> > *Guillermo Ortiz*
> > /Big Data Developer/
> >
> > Telf.: +34 917 680 490
> > Fax: +34 913 833 301
> > C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> >
> > _http://www.bidoop.es_
> >
>
>

Re: Lease exception when I execute large scan with filters.

Posted by Michael Segel <mi...@hotmail.com>.
Silly question… 

Why does the idea of using versioning to capture temporal changes to data keep being propagated? 

Seriously this issue keeps popping up… 

If you want to capture data over time… use a timestamp as part of the column name.  Don’t abuse the cell’s version.



On Apr 11, 2014, at 11:03 AM, gortiz <go...@pragsis.com> wrote:

> Yes, I have tried with two different values for that value of versions, 1000 and maximum value for integers.
> 
> But, I want to keep those versions. I don't want to keep just 3 versions. Imagine that I want to record a new version each minute and store a day, those are 1440 versions.
> 
> Why is HBase going to read all the versions?? , I thought, if you don't indicate any versions it's just read the newest and skip the rest. It doesn't make too much sense to read all of them if data is sorted, plus the newest version is stored in the top.
> 
> 
> On 11/04/14 11:54, Anoop John wrote:
>> What is the max version setting u have done for ur table cf?  When u set
>> some a value, HBase has to keep all those versions.  During a scan it will
>> read all those versions. In 94 version the default value for the max
>> versions is 3.  I guess you have set some bigger value.   If u have not,
>> mind testing after a major compaction?
>> 
>> -Anoop-
>> 
>> On Fri, Apr 11, 2014 at 1:01 PM, gortiz <go...@pragsis.com> wrote:
>> 
>>> Last test I have done it's to reduce the number of versions to 100.
>>> So, right now, I have 100 rows with 100 versions each one.
>>> Times are: (I got the same times for blocksize of 64Ks and 1Mb)
>>> 100row-1000versions + blockcache-> 80s.
>>> 100row-1000versions + No blockcache-> 70s.
>>> 
>>> 100row-*100*versions + blockcache-> 7.3s.
>>> 100row-*100*versions + No blockcache-> 6.1s.
>>> 
>>> What's the reasons of this? I guess HBase is enough smart for not consider
>>> old versions, so, it just checks the newest. But, I reduce 10 times the
>>> size (in versions) and I got a 10x of performance.
>>> 
>>> The filter is scan 'filters', {FILTER => "ValueFilter(=,
>>> 'binary:5')",STARTROW => '1010000000000000000000000000000000000101',
>>> STOPROW => '6010000000000000000000000000000000000201'}
>>> 
>>> 
>>> 
>>> On 11/04/14 09:04, gortiz wrote:
>>> 
>>>> Well, I guessed that, what it doesn't make too much sense because it's so
>>>> slow. I only have right now 100 rows with 1000 versions each row.
>>>> I have checked the size of the dataset and each row is about 700Kbytes
>>>> (around 7Gb, 100rowsx1000versions). So, it should only check 100 rows x
>>>> 700Kbytes = 70Mb, since it just check the newest version. How can it spend
>>>> too many time checking this quantity of data?
>>>> 
>>>> I'm generating again the dataset with a bigger blocksize (previously was
>>>> 64Kb, now, it's going to be 1Mb). I could try tunning the scanning and
>>>> baching parameters, but I don't think they're going to affect too much.
>>>> 
>>>> Another test I want to do, it's generate the same dataset with just
>>>> 100versions, It should spend around the same time, right? Or am I wrong?
>>>> 
>>>> On 10/04/14 18:08, Ted Yu wrote:
>>>> 
>>>>> It should be newest version of each value.
>>>>> 
>>>>> Cheers
>>>>> 
>>>>> 
>>>>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com> wrote:
>>>>> 
>>>>> Another little question is, when the filter I'm using, Do I check all the
>>>>>> versions? or just the newest? Because, I'm wondering if when I do a scan
>>>>>> over all the table, I look for the value "5" in all the dataset or I'm
>>>>>> just
>>>>>> looking for in one newest version of each value.
>>>>>> 
>>>>>> 
>>>>>> On 10/04/14 16:52, gortiz wrote:
>>>>>> 
>>>>>> I was trying to check the behaviour of HBase. The cluster is a group of
>>>>>>> old computers, one master, five slaves, each one with 2Gb, so, 12gb in
>>>>>>> total.
>>>>>>> The table has a column family with 1000 columns and each column with
>>>>>>> 100
>>>>>>> versions.
>>>>>>> There's another column faimily with four columns an one image of 100kb.
>>>>>>>   (I've tried without this column family as well.)
>>>>>>> The table is partitioned manually in all the slaves, so data are
>>>>>>> balanced
>>>>>>> in the cluster.
>>>>>>> 
>>>>>>> I'm executing this sentence *scan 'table1', {FILTER => "ValueFilter(=,
>>>>>>> 'binary:5')"* in HBase 0.94.6
>>>>>>> My time for lease and rpc is three minutes.
>>>>>>> Since, it's a full scan of the table, I have been playing with the
>>>>>>> BLOCKCACHE as well (just disable and enable, not about the size of
>>>>>>> it). I
>>>>>>> thought that it was going to have too much calls to the GC. I'm not
>>>>>>> sure
>>>>>>> about this point.
>>>>>>> 
>>>>>>> I know that it's not the best way to use HBase, it's just a test. I
>>>>>>> think
>>>>>>> that it's not working because the hardware isn't enough, although, I
>>>>>>> would
>>>>>>> like to try some kind of tunning to improve it.
>>>>>>> 
>>>>>>> 
>>>>>>> 
>>>>>>> 
>>>>>>> 
>>>>>>> 
>>>>>>> 
>>>>>>> 
>>>>>>> On 10/04/14 14:21, Ted Yu wrote:
>>>>>>> 
>>>>>>> Can you give us a bit more information:
>>>>>>>> HBase release you're running
>>>>>>>> What filters are used for the scan
>>>>>>>> 
>>>>>>>> Thanks
>>>>>>>> 
>>>>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>>> 
>>>>>>>>   I got this error when I execute a full scan with filters about a
>>>>>>>> table.
>>>>>>>> 
>>>>>>>>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.
>>>>>>>>> regionserver.LeaseException:
>>>>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
>>>>>>>>> '-4165751462641113359' does not exist
>>>>>>>>>      at org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231)
>>>>>>>>> 
>>>>>>>>> 
>>>>>>>>>      at org.apache.hadoop.hbase.regionserver.HRegionServer.
>>>>>>>>> next(HRegionServer.java:2482)
>>>>>>>>>      at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>>>>>>>>      at sun.reflect.NativeMethodAccessorImpl.invoke(
>>>>>>>>> NativeMethodAccessorImpl.java:39)
>>>>>>>>>      at sun.reflect.DelegatingMethodAccessorImpl.invoke(
>>>>>>>>> DelegatingMethodAccessorImpl.java:25)
>>>>>>>>>      at java.lang.reflect.Method.invoke(Method.java:597)
>>>>>>>>>      at org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(
>>>>>>>>> WritableRpcEngine.java:320)
>>>>>>>>>      at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
>>>>>>>>> HBaseServer.java:1428)
>>>>>>>>> 
>>>>>>>>> I have read about increase the lease time and rpc time, but it's not
>>>>>>>>> working.. what else could I try?? The table isn't too big. I have
>>>>>>>>> been
>>>>>>>>> checking the logs from GC, HMaster and some RegionServers and I
>>>>>>>>> didn't see
>>>>>>>>> anything weird. I tried as well to try with a couple of caching
>>>>>>>>> values.
>>>>>>>>> 
>>>>>>>>> 
>>>>>>> --
>>>>>> *Guillermo Ortiz*
>>>>>> /Big Data Developer/
>>>>>> 
>>>>>> Telf.: +34 917 680 490<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>>>> Fax: +34 913 833 301<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>>> 
>>>>>> _http://www.bidoop.es_
>>>>>> 
>>>>>> 
>>>>>> 
>>> --
>>> *Guillermo Ortiz*
>>> /Big Data Developer/
>>> 
>>> Telf.: +34 917 680 490<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
>>> Fax: +34 913 833 301<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>> 
>>> _http://www.bidoop.es_
>>> 
>>> 
> 
> 
> -- 
> *Guillermo Ortiz*
> /Big Data Developer/
> 
> Telf.: +34 917 680 490
> Fax: +34 913 833 301
> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
> 
> _http://www.bidoop.es_
> 


Re: Lease exception when I execute large scan with filters.

Posted by gortiz <go...@pragsis.com>.
Yes, I have tried with two different values for that value of versions, 
1000 and maximum value for integers.

But, I want to keep those versions. I don't want to keep just 3 
versions. Imagine that I want to record a new version each minute and 
store a day, those are 1440 versions.

Why is HBase going to read all the versions?? , I thought, if you don't 
indicate any versions it's just read the newest and skip the rest. It 
doesn't make too much sense to read all of them if data is sorted, plus 
the newest version is stored in the top.


On 11/04/14 11:54, Anoop John wrote:
> What is the max version setting u have done for ur table cf?  When u set
> some a value, HBase has to keep all those versions.  During a scan it will
> read all those versions. In 94 version the default value for the max
> versions is 3.  I guess you have set some bigger value.   If u have not,
> mind testing after a major compaction?
>
> -Anoop-
>
> On Fri, Apr 11, 2014 at 1:01 PM, gortiz <go...@pragsis.com> wrote:
>
>> Last test I have done it's to reduce the number of versions to 100.
>> So, right now, I have 100 rows with 100 versions each one.
>> Times are: (I got the same times for blocksize of 64Ks and 1Mb)
>> 100row-1000versions + blockcache-> 80s.
>> 100row-1000versions + No blockcache-> 70s.
>>
>> 100row-*100*versions + blockcache-> 7.3s.
>> 100row-*100*versions + No blockcache-> 6.1s.
>>
>> What's the reasons of this? I guess HBase is enough smart for not consider
>> old versions, so, it just checks the newest. But, I reduce 10 times the
>> size (in versions) and I got a 10x of performance.
>>
>> The filter is scan 'filters', {FILTER => "ValueFilter(=,
>> 'binary:5')",STARTROW => '1010000000000000000000000000000000000101',
>> STOPROW => '6010000000000000000000000000000000000201'}
>>
>>
>>
>> On 11/04/14 09:04, gortiz wrote:
>>
>>> Well, I guessed that, what it doesn't make too much sense because it's so
>>> slow. I only have right now 100 rows with 1000 versions each row.
>>> I have checked the size of the dataset and each row is about 700Kbytes
>>> (around 7Gb, 100rowsx1000versions). So, it should only check 100 rows x
>>> 700Kbytes = 70Mb, since it just check the newest version. How can it spend
>>> too many time checking this quantity of data?
>>>
>>> I'm generating again the dataset with a bigger blocksize (previously was
>>> 64Kb, now, it's going to be 1Mb). I could try tunning the scanning and
>>> baching parameters, but I don't think they're going to affect too much.
>>>
>>> Another test I want to do, it's generate the same dataset with just
>>> 100versions, It should spend around the same time, right? Or am I wrong?
>>>
>>> On 10/04/14 18:08, Ted Yu wrote:
>>>
>>>> It should be newest version of each value.
>>>>
>>>> Cheers
>>>>
>>>>
>>>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com> wrote:
>>>>
>>>> Another little question is, when the filter I'm using, Do I check all the
>>>>> versions? or just the newest? Because, I'm wondering if when I do a scan
>>>>> over all the table, I look for the value "5" in all the dataset or I'm
>>>>> just
>>>>> looking for in one newest version of each value.
>>>>>
>>>>>
>>>>> On 10/04/14 16:52, gortiz wrote:
>>>>>
>>>>> I was trying to check the behaviour of HBase. The cluster is a group of
>>>>>> old computers, one master, five slaves, each one with 2Gb, so, 12gb in
>>>>>> total.
>>>>>> The table has a column family with 1000 columns and each column with
>>>>>> 100
>>>>>> versions.
>>>>>> There's another column faimily with four columns an one image of 100kb.
>>>>>>    (I've tried without this column family as well.)
>>>>>> The table is partitioned manually in all the slaves, so data are
>>>>>> balanced
>>>>>> in the cluster.
>>>>>>
>>>>>> I'm executing this sentence *scan 'table1', {FILTER => "ValueFilter(=,
>>>>>> 'binary:5')"* in HBase 0.94.6
>>>>>> My time for lease and rpc is three minutes.
>>>>>> Since, it's a full scan of the table, I have been playing with the
>>>>>> BLOCKCACHE as well (just disable and enable, not about the size of
>>>>>> it). I
>>>>>> thought that it was going to have too much calls to the GC. I'm not
>>>>>> sure
>>>>>> about this point.
>>>>>>
>>>>>> I know that it's not the best way to use HBase, it's just a test. I
>>>>>> think
>>>>>> that it's not working because the hardware isn't enough, although, I
>>>>>> would
>>>>>> like to try some kind of tunning to improve it.
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>>
>>>>>> On 10/04/14 14:21, Ted Yu wrote:
>>>>>>
>>>>>> Can you give us a bit more information:
>>>>>>> HBase release you're running
>>>>>>> What filters are used for the scan
>>>>>>>
>>>>>>> Thanks
>>>>>>>
>>>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>>
>>>>>>>    I got this error when I execute a full scan with filters about a
>>>>>>> table.
>>>>>>>
>>>>>>>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.
>>>>>>>> regionserver.LeaseException:
>>>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
>>>>>>>> '-4165751462641113359' does not exist
>>>>>>>>       at org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231)
>>>>>>>>
>>>>>>>>
>>>>>>>>       at org.apache.hadoop.hbase.regionserver.HRegionServer.
>>>>>>>> next(HRegionServer.java:2482)
>>>>>>>>       at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>>>>>>>       at sun.reflect.NativeMethodAccessorImpl.invoke(
>>>>>>>> NativeMethodAccessorImpl.java:39)
>>>>>>>>       at sun.reflect.DelegatingMethodAccessorImpl.invoke(
>>>>>>>> DelegatingMethodAccessorImpl.java:25)
>>>>>>>>       at java.lang.reflect.Method.invoke(Method.java:597)
>>>>>>>>       at org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(
>>>>>>>> WritableRpcEngine.java:320)
>>>>>>>>       at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
>>>>>>>> HBaseServer.java:1428)
>>>>>>>>
>>>>>>>> I have read about increase the lease time and rpc time, but it's not
>>>>>>>> working.. what else could I try?? The table isn't too big. I have
>>>>>>>> been
>>>>>>>> checking the logs from GC, HMaster and some RegionServers and I
>>>>>>>> didn't see
>>>>>>>> anything weird. I tried as well to try with a couple of caching
>>>>>>>> values.
>>>>>>>>
>>>>>>>>
>>>>>> --
>>>>> *Guillermo Ortiz*
>>>>> /Big Data Developer/
>>>>>
>>>>> Telf.: +34 917 680 490<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>>> Fax: +34 913 833 301<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>>
>>>>> _http://www.bidoop.es_
>>>>>
>>>>>
>>>>>
>> --
>> *Guillermo Ortiz*
>> /Big Data Developer/
>>
>> Telf.: +34 917 680 490<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
>> Fax: +34 913 833 301<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>
>> _http://www.bidoop.es_
>>
>>


-- 
*Guillermo Ortiz*
/Big Data Developer/

Telf.: +34 917 680 490
Fax: +34 913 833 301
C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain

_http://www.bidoop.es_


Re: Lease exception when I execute large scan with filters.

Posted by Anoop John <an...@gmail.com>.
What is the max version setting u have done for ur table cf?  When u set
some a value, HBase has to keep all those versions.  During a scan it will
read all those versions. In 94 version the default value for the max
versions is 3.  I guess you have set some bigger value.   If u have not,
mind testing after a major compaction?

-Anoop-

On Fri, Apr 11, 2014 at 1:01 PM, gortiz <go...@pragsis.com> wrote:

> Last test I have done it's to reduce the number of versions to 100.
> So, right now, I have 100 rows with 100 versions each one.
> Times are: (I got the same times for blocksize of 64Ks and 1Mb)
> 100row-1000versions + blockcache-> 80s.
> 100row-1000versions + No blockcache-> 70s.
>
> 100row-*100*versions + blockcache-> 7.3s.
> 100row-*100*versions + No blockcache-> 6.1s.
>
> What's the reasons of this? I guess HBase is enough smart for not consider
> old versions, so, it just checks the newest. But, I reduce 10 times the
> size (in versions) and I got a 10x of performance.
>
> The filter is scan 'filters', {FILTER => "ValueFilter(=,
> 'binary:5')",STARTROW => '1010000000000000000000000000000000000101',
> STOPROW => '6010000000000000000000000000000000000201'}
>
>
>
> On 11/04/14 09:04, gortiz wrote:
>
>> Well, I guessed that, what it doesn't make too much sense because it's so
>> slow. I only have right now 100 rows with 1000 versions each row.
>> I have checked the size of the dataset and each row is about 700Kbytes
>> (around 7Gb, 100rowsx1000versions). So, it should only check 100 rows x
>> 700Kbytes = 70Mb, since it just check the newest version. How can it spend
>> too many time checking this quantity of data?
>>
>> I'm generating again the dataset with a bigger blocksize (previously was
>> 64Kb, now, it's going to be 1Mb). I could try tunning the scanning and
>> baching parameters, but I don't think they're going to affect too much.
>>
>> Another test I want to do, it's generate the same dataset with just
>> 100versions, It should spend around the same time, right? Or am I wrong?
>>
>> On 10/04/14 18:08, Ted Yu wrote:
>>
>>> It should be newest version of each value.
>>>
>>> Cheers
>>>
>>>
>>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com> wrote:
>>>
>>> Another little question is, when the filter I'm using, Do I check all the
>>>> versions? or just the newest? Because, I'm wondering if when I do a scan
>>>> over all the table, I look for the value "5" in all the dataset or I'm
>>>> just
>>>> looking for in one newest version of each value.
>>>>
>>>>
>>>> On 10/04/14 16:52, gortiz wrote:
>>>>
>>>> I was trying to check the behaviour of HBase. The cluster is a group of
>>>>> old computers, one master, five slaves, each one with 2Gb, so, 12gb in
>>>>> total.
>>>>> The table has a column family with 1000 columns and each column with
>>>>> 100
>>>>> versions.
>>>>> There's another column faimily with four columns an one image of 100kb.
>>>>>   (I've tried without this column family as well.)
>>>>> The table is partitioned manually in all the slaves, so data are
>>>>> balanced
>>>>> in the cluster.
>>>>>
>>>>> I'm executing this sentence *scan 'table1', {FILTER => "ValueFilter(=,
>>>>> 'binary:5')"* in HBase 0.94.6
>>>>> My time for lease and rpc is three minutes.
>>>>> Since, it's a full scan of the table, I have been playing with the
>>>>> BLOCKCACHE as well (just disable and enable, not about the size of
>>>>> it). I
>>>>> thought that it was going to have too much calls to the GC. I'm not
>>>>> sure
>>>>> about this point.
>>>>>
>>>>> I know that it's not the best way to use HBase, it's just a test. I
>>>>> think
>>>>> that it's not working because the hardware isn't enough, although, I
>>>>> would
>>>>> like to try some kind of tunning to improve it.
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>>
>>>>> On 10/04/14 14:21, Ted Yu wrote:
>>>>>
>>>>> Can you give us a bit more information:
>>>>>>
>>>>>> HBase release you're running
>>>>>> What filters are used for the scan
>>>>>>
>>>>>> Thanks
>>>>>>
>>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>>>>>>
>>>>>>   I got this error when I execute a full scan with filters about a
>>>>>> table.
>>>>>>
>>>>>>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.
>>>>>>> regionserver.LeaseException:
>>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
>>>>>>> '-4165751462641113359' does not exist
>>>>>>>      at org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231)
>>>>>>>
>>>>>>>
>>>>>>>      at org.apache.hadoop.hbase.regionserver.HRegionServer.
>>>>>>> next(HRegionServer.java:2482)
>>>>>>>      at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>>>>>>      at sun.reflect.NativeMethodAccessorImpl.invoke(
>>>>>>> NativeMethodAccessorImpl.java:39)
>>>>>>>      at sun.reflect.DelegatingMethodAccessorImpl.invoke(
>>>>>>> DelegatingMethodAccessorImpl.java:25)
>>>>>>>      at java.lang.reflect.Method.invoke(Method.java:597)
>>>>>>>      at org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(
>>>>>>> WritableRpcEngine.java:320)
>>>>>>>      at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
>>>>>>> HBaseServer.java:1428)
>>>>>>>
>>>>>>> I have read about increase the lease time and rpc time, but it's not
>>>>>>> working.. what else could I try?? The table isn't too big. I have
>>>>>>> been
>>>>>>> checking the logs from GC, HMaster and some RegionServers and I
>>>>>>> didn't see
>>>>>>> anything weird. I tried as well to try with a couple of caching
>>>>>>> values.
>>>>>>>
>>>>>>>
>>>>> --
>>>> *Guillermo Ortiz*
>>>> /Big Data Developer/
>>>>
>>>> Telf.: +34 917 680 490<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>> Fax: +34 913 833 301<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
>>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>>
>>>> _http://www.bidoop.es_
>>>>
>>>>
>>>>
>>
>
> --
> *Guillermo Ortiz*
> /Big Data Developer/
>
> Telf.: +34 917 680 490<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
> Fax: +34 913 833 301<https://mail.google.com/mail/u/0/html/compose/static_files/blank_quirks.html#>
> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>
> _http://www.bidoop.es_
>
>

Re: Lease exception when I execute large scan with filters.

Posted by gortiz <go...@pragsis.com>.
Last test I have done it's to reduce the number of versions to 100.
So, right now, I have 100 rows with 100 versions each one.
Times are: (I got the same times for blocksize of 64Ks and 1Mb)
100row-1000versions + blockcache-> 80s.
100row-1000versions + No blockcache-> 70s.

100row-*100*versions + blockcache-> 7.3s.
100row-*100*versions + No blockcache-> 6.1s.

What's the reasons of this? I guess HBase is enough smart for not 
consider old versions, so, it just checks the newest. But, I reduce 10 
times the size (in versions) and I got a 10x of performance.

The filter is scan 'filters', {FILTER => "ValueFilter(=, 
'binary:5')",STARTROW => '1010000000000000000000000000000000000101', 
STOPROW => '6010000000000000000000000000000000000201'}


On 11/04/14 09:04, gortiz wrote:
> Well, I guessed that, what it doesn't make too much sense because it's 
> so slow. I only have right now 100 rows with 1000 versions each row.
> I have checked the size of the dataset and each row is about 700Kbytes 
> (around 7Gb, 100rowsx1000versions). So, it should only check 100 rows 
> x 700Kbytes = 70Mb, since it just check the newest version. How can it 
> spend too many time checking this quantity of data?
>
> I'm generating again the dataset with a bigger blocksize (previously 
> was 64Kb, now, it's going to be 1Mb). I could try tunning the scanning 
> and baching parameters, but I don't think they're going to affect too 
> much.
>
> Another test I want to do, it's generate the same dataset with just 
> 100versions, It should spend around the same time, right? Or am I wrong?
>
> On 10/04/14 18:08, Ted Yu wrote:
>> It should be newest version of each value.
>>
>> Cheers
>>
>>
>> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com> wrote:
>>
>>> Another little question is, when the filter I'm using, Do I check 
>>> all the
>>> versions? or just the newest? Because, I'm wondering if when I do a 
>>> scan
>>> over all the table, I look for the value "5" in all the dataset or 
>>> I'm just
>>> looking for in one newest version of each value.
>>>
>>>
>>> On 10/04/14 16:52, gortiz wrote:
>>>
>>>> I was trying to check the behaviour of HBase. The cluster is a 
>>>> group of
>>>> old computers, one master, five slaves, each one with 2Gb, so, 12gb in
>>>> total.
>>>> The table has a column family with 1000 columns and each column 
>>>> with 100
>>>> versions.
>>>> There's another column faimily with four columns an one image of 
>>>> 100kb.
>>>>   (I've tried without this column family as well.)
>>>> The table is partitioned manually in all the slaves, so data are 
>>>> balanced
>>>> in the cluster.
>>>>
>>>> I'm executing this sentence *scan 'table1', {FILTER => "ValueFilter(=,
>>>> 'binary:5')"* in HBase 0.94.6
>>>> My time for lease and rpc is three minutes.
>>>> Since, it's a full scan of the table, I have been playing with the
>>>> BLOCKCACHE as well (just disable and enable, not about the size of 
>>>> it). I
>>>> thought that it was going to have too much calls to the GC. I'm not 
>>>> sure
>>>> about this point.
>>>>
>>>> I know that it's not the best way to use HBase, it's just a test. I 
>>>> think
>>>> that it's not working because the hardware isn't enough, although, 
>>>> I would
>>>> like to try some kind of tunning to improve it.
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>>
>>>> On 10/04/14 14:21, Ted Yu wrote:
>>>>
>>>>> Can you give us a bit more information:
>>>>>
>>>>> HBase release you're running
>>>>> What filters are used for the scan
>>>>>
>>>>> Thanks
>>>>>
>>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>>>>>
>>>>>   I got this error when I execute a full scan with filters about a 
>>>>> table.
>>>>>> Caused by: java.lang.RuntimeException: 
>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException:
>>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
>>>>>> '-4165751462641113359' does not exist
>>>>>>      at 
>>>>>> org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231) 
>>>>>>
>>>>>>
>>>>>>      at org.apache.hadoop.hbase.regionserver.HRegionServer.
>>>>>> next(HRegionServer.java:2482)
>>>>>>      at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>>>>>      at sun.reflect.NativeMethodAccessorImpl.invoke(
>>>>>> NativeMethodAccessorImpl.java:39)
>>>>>>      at sun.reflect.DelegatingMethodAccessorImpl.invoke(
>>>>>> DelegatingMethodAccessorImpl.java:25)
>>>>>>      at java.lang.reflect.Method.invoke(Method.java:597)
>>>>>>      at org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(
>>>>>> WritableRpcEngine.java:320)
>>>>>>      at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
>>>>>> HBaseServer.java:1428)
>>>>>>
>>>>>> I have read about increase the lease time and rpc time, but it's not
>>>>>> working.. what else could I try?? The table isn't too big. I have 
>>>>>> been
>>>>>> checking the logs from GC, HMaster and some RegionServers and I 
>>>>>> didn't see
>>>>>> anything weird. I tried as well to try with a couple of caching 
>>>>>> values.
>>>>>>
>>>>
>>> -- 
>>> *Guillermo Ortiz*
>>> /Big Data Developer/
>>>
>>> Telf.: +34 917 680 490
>>> Fax: +34 913 833 301
>>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>>
>>> _http://www.bidoop.es_
>>>
>>>
>


-- 
*Guillermo Ortiz*
/Big Data Developer/

Telf.: +34 917 680 490
Fax: +34 913 833 301
C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain

_http://www.bidoop.es_


Re: Lease exception when I execute large scan with filters.

Posted by gortiz <go...@pragsis.com>.
Well, I guessed that, what it doesn't make too much sense because it's 
so slow. I only have right now 100 rows with 1000 versions each row.
I have checked the size of the dataset and each row is about 700Kbytes 
(around 7Gb, 100rowsx1000versions). So, it should only check 100 rows x 
700Kbytes = 70Mb, since it just check the newest version. How can it 
spend too many time checking this quantity of data?

I'm generating again the dataset with a bigger blocksize (previously was 
64Kb, now, it's going to be 1Mb). I could try tunning the scanning and 
baching parameters, but I don't think they're going to affect too much.

Another test I want to do, it's generate the same dataset with just 
100versions, It should spend around the same time, right? Or am I wrong?

On 10/04/14 18:08, Ted Yu wrote:
> It should be newest version of each value.
>
> Cheers
>
>
> On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com> wrote:
>
>> Another little question is, when the filter I'm using, Do I check all the
>> versions? or just the newest? Because, I'm wondering if when I do a scan
>> over all the table, I look for the value "5" in all the dataset or I'm just
>> looking for in one newest version of each value.
>>
>>
>> On 10/04/14 16:52, gortiz wrote:
>>
>>> I was trying to check the behaviour of HBase. The cluster is a group of
>>> old computers, one master, five slaves, each one with 2Gb, so, 12gb in
>>> total.
>>> The table has a column family with 1000 columns and each column with 100
>>> versions.
>>> There's another column faimily with four columns an one image of 100kb.
>>>   (I've tried without this column family as well.)
>>> The table is partitioned manually in all the slaves, so data are balanced
>>> in the cluster.
>>>
>>> I'm executing this sentence *scan 'table1', {FILTER => "ValueFilter(=,
>>> 'binary:5')"* in HBase 0.94.6
>>> My time for lease and rpc is three minutes.
>>> Since, it's a full scan of the table, I have been playing with the
>>> BLOCKCACHE as well (just disable and enable, not about the size of it). I
>>> thought that it was going to have too much calls to the GC. I'm not sure
>>> about this point.
>>>
>>> I know that it's not the best way to use HBase, it's just a test. I think
>>> that it's not working because the hardware isn't enough, although, I would
>>> like to try some kind of tunning to improve it.
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>>
>>> On 10/04/14 14:21, Ted Yu wrote:
>>>
>>>> Can you give us a bit more information:
>>>>
>>>> HBase release you're running
>>>> What filters are used for the scan
>>>>
>>>> Thanks
>>>>
>>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>>>>
>>>>   I got this error when I execute a full scan with filters about a table.
>>>>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.regionserver.LeaseException:
>>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
>>>>> '-4165751462641113359' does not exist
>>>>>      at org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231)
>>>>>
>>>>>      at org.apache.hadoop.hbase.regionserver.HRegionServer.
>>>>> next(HRegionServer.java:2482)
>>>>>      at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>>>>      at sun.reflect.NativeMethodAccessorImpl.invoke(
>>>>> NativeMethodAccessorImpl.java:39)
>>>>>      at sun.reflect.DelegatingMethodAccessorImpl.invoke(
>>>>> DelegatingMethodAccessorImpl.java:25)
>>>>>      at java.lang.reflect.Method.invoke(Method.java:597)
>>>>>      at org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(
>>>>> WritableRpcEngine.java:320)
>>>>>      at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
>>>>> HBaseServer.java:1428)
>>>>>
>>>>> I have read about increase the lease time and rpc time, but it's not
>>>>> working.. what else could I try?? The table isn't too big. I have been
>>>>> checking the logs from GC, HMaster and some RegionServers and I didn't see
>>>>> anything weird. I tried as well to try with a couple of caching values.
>>>>>
>>>
>> --
>> *Guillermo Ortiz*
>> /Big Data Developer/
>>
>> Telf.: +34 917 680 490
>> Fax: +34 913 833 301
>> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>>
>> _http://www.bidoop.es_
>>
>>

-- 
*Guillermo Ortiz*
/Big Data Developer/

Telf.: +34 917 680 490
Fax: +34 913 833 301
C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain

_http://www.bidoop.es_


Re: Lease exception when I execute large scan with filters.

Posted by Ted Yu <yu...@gmail.com>.
It should be newest version of each value.

Cheers


On Thu, Apr 10, 2014 at 9:55 AM, gortiz <go...@pragsis.com> wrote:

> Another little question is, when the filter I'm using, Do I check all the
> versions? or just the newest? Because, I'm wondering if when I do a scan
> over all the table, I look for the value "5" in all the dataset or I'm just
> looking for in one newest version of each value.
>
>
> On 10/04/14 16:52, gortiz wrote:
>
>> I was trying to check the behaviour of HBase. The cluster is a group of
>> old computers, one master, five slaves, each one with 2Gb, so, 12gb in
>> total.
>> The table has a column family with 1000 columns and each column with 100
>> versions.
>> There's another column faimily with four columns an one image of 100kb.
>>  (I've tried without this column family as well.)
>> The table is partitioned manually in all the slaves, so data are balanced
>> in the cluster.
>>
>> I'm executing this sentence *scan 'table1', {FILTER => "ValueFilter(=,
>> 'binary:5')"* in HBase 0.94.6
>> My time for lease and rpc is three minutes.
>> Since, it's a full scan of the table, I have been playing with the
>> BLOCKCACHE as well (just disable and enable, not about the size of it). I
>> thought that it was going to have too much calls to the GC. I'm not sure
>> about this point.
>>
>> I know that it's not the best way to use HBase, it's just a test. I think
>> that it's not working because the hardware isn't enough, although, I would
>> like to try some kind of tunning to improve it.
>>
>>
>>
>>
>>
>>
>>
>>
>> On 10/04/14 14:21, Ted Yu wrote:
>>
>>> Can you give us a bit more information:
>>>
>>> HBase release you're running
>>> What filters are used for the scan
>>>
>>> Thanks
>>>
>>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>>>
>>>  I got this error when I execute a full scan with filters about a table.
>>>>
>>>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.regionserver.LeaseException:
>>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease
>>>> '-4165751462641113359' does not exist
>>>>     at org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231)
>>>>
>>>>     at org.apache.hadoop.hbase.regionserver.HRegionServer.
>>>> next(HRegionServer.java:2482)
>>>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>>>     at sun.reflect.NativeMethodAccessorImpl.invoke(
>>>> NativeMethodAccessorImpl.java:39)
>>>>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(
>>>> DelegatingMethodAccessorImpl.java:25)
>>>>     at java.lang.reflect.Method.invoke(Method.java:597)
>>>>     at org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(
>>>> WritableRpcEngine.java:320)
>>>>     at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(
>>>> HBaseServer.java:1428)
>>>>
>>>> I have read about increase the lease time and rpc time, but it's not
>>>> working.. what else could I try?? The table isn't too big. I have been
>>>> checking the logs from GC, HMaster and some RegionServers and I didn't see
>>>> anything weird. I tried as well to try with a couple of caching values.
>>>>
>>>
>>
>>
>
> --
> *Guillermo Ortiz*
> /Big Data Developer/
>
> Telf.: +34 917 680 490
> Fax: +34 913 833 301
> C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain
>
> _http://www.bidoop.es_
>
>

Re: Lease exception when I execute large scan with filters.

Posted by gortiz <go...@pragsis.com>.
Another little question is, when the filter I'm using, Do I check all 
the versions? or just the newest? Because, I'm wondering if when I do a 
scan over all the table, I look for the value "5" in all the dataset or 
I'm just looking for in one newest version of each value.

On 10/04/14 16:52, gortiz wrote:
> I was trying to check the behaviour of HBase. The cluster is a group 
> of old computers, one master, five slaves, each one with 2Gb, so, 12gb 
> in total.
> The table has a column family with 1000 columns and each column with 
> 100 versions.
> There's another column faimily with four columns an one image of 
> 100kb.  (I've tried without this column family as well.)
> The table is partitioned manually in all the slaves, so data are 
> balanced in the cluster.
>
> I'm executing this sentence *scan 'table1', {FILTER => "ValueFilter(=, 
> 'binary:5')"* in HBase 0.94.6
> My time for lease and rpc is three minutes.
> Since, it's a full scan of the table, I have been playing with the 
> BLOCKCACHE as well (just disable and enable, not about the size of 
> it). I thought that it was going to have too much calls to the GC. I'm 
> not sure about this point.
>
> I know that it's not the best way to use HBase, it's just a test. I 
> think that it's not working because the hardware isn't enough, 
> although, I would like to try some kind of tunning to improve it.
>
>
>
>
>
>
>
>
> On 10/04/14 14:21, Ted Yu wrote:
>> Can you give us a bit more information:
>>
>> HBase release you're running
>> What filters are used for the scan
>>
>> Thanks
>>
>> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>>
>>> I got this error when I execute a full scan with filters about a table.
>>>
>>> Caused by: java.lang.RuntimeException: 
>>> org.apache.hadoop.hbase.regionserver.LeaseException: 
>>> org.apache.hadoop.hbase.regionserver.LeaseException: lease 
>>> '-4165751462641113359' does not exist
>>>     at 
>>> org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231) 
>>>
>>>     at 
>>> org.apache.hadoop.hbase.regionserver.HRegionServer.next(HRegionServer.java:2482)
>>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>>     at 
>>> sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>>>     at 
>>> sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>>>     at java.lang.reflect.Method.invoke(Method.java:597)
>>>     at 
>>> org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(WritableRpcEngine.java:320)
>>>     at 
>>> org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1428)
>>>
>>> I have read about increase the lease time and rpc time, but it's not 
>>> working.. what else could I try?? The table isn't too big. I have 
>>> been checking the logs from GC, HMaster and some RegionServers and I 
>>> didn't see anything weird. I tried as well to try with a couple of 
>>> caching values.
>
>


-- 
*Guillermo Ortiz*
/Big Data Developer/

Telf.: +34 917 680 490
Fax: +34 913 833 301
C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain

_http://www.bidoop.es_


Re: Lease exception when I execute large scan with filters.

Posted by gortiz <go...@pragsis.com>.
I was trying to check the behaviour of HBase. The cluster is a group of 
old computers, one master, five slaves, each one with 2Gb, so, 12gb in 
total.
The table has a column family with 1000 columns and each column with 100 
versions.
There's another column faimily with four columns an one image of 100kb.  
(I've tried without this column family as well.)
The table is partitioned manually in all the slaves, so data are 
balanced in the cluster.

I'm executing this sentence *scan 'table1', {FILTER => "ValueFilter(=, 
'binary:5')"* in HBase 0.94.6
My time for lease and rpc is three minutes.
Since, it's a full scan of the table, I have been playing with the 
BLOCKCACHE as well (just disable and enable, not about the size of it). 
I thought that it was going to have too much calls to the GC. I'm not 
sure about this point.

I know that it's not the best way to use HBase, it's just a test. I 
think that it's not working because the hardware isn't enough, although, 
I would like to try some kind of tunning to improve it.




	



On 10/04/14 14:21, Ted Yu wrote:
> Can you give us a bit more information:
>
> HBase release you're running
> What filters are used for the scan
>
> Thanks
>
> On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:
>
>> I got this error when I execute a full scan with filters about a table.
>>
>> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.regionserver.LeaseException: org.apache.hadoop.hbase.regionserver.LeaseException: lease '-4165751462641113359' does not exist
>>     at org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231)
>>     at org.apache.hadoop.hbase.regionserver.HRegionServer.next(HRegionServer.java:2482)
>>     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>>     at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>>     at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>>     at java.lang.reflect.Method.invoke(Method.java:597)
>>     at org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(WritableRpcEngine.java:320)
>>     at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1428)
>>
>> I have read about increase the lease time and rpc time, but it's not working.. what else could I try?? The table isn't too big. I have been checking the logs from GC, HMaster and some RegionServers and I didn't see anything weird. I tried as well to try with a couple of caching values.


-- 
*Guillermo Ortiz*
/Big Data Developer/

Telf.: +34 917 680 490
Fax: +34 913 833 301
C/ Manuel Tovar, 49-53 - 28034 Madrid - Spain

_http://www.bidoop.es_


Re: Lease exception when I execute large scan with filters.

Posted by Ted Yu <yu...@gmail.com>.
Can you give us a bit more information:

HBase release you're running
What filters are used for the scan

Thanks

On Apr 10, 2014, at 2:36 AM, gortiz <go...@pragsis.com> wrote:

> I got this error when I execute a full scan with filters about a table.
> 
> Caused by: java.lang.RuntimeException: org.apache.hadoop.hbase.regionserver.LeaseException: org.apache.hadoop.hbase.regionserver.LeaseException: lease '-4165751462641113359' does not exist
>    at org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231)
>    at org.apache.hadoop.hbase.regionserver.HRegionServer.next(HRegionServer.java:2482)
>    at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
>    at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
>    at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
>    at java.lang.reflect.Method.invoke(Method.java:597)
>    at org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(WritableRpcEngine.java:320)
>    at org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1428)
> 
> I have read about increase the lease time and rpc time, but it's not working.. what else could I try?? The table isn't too big. I have been checking the logs from GC, HMaster and some RegionServers and I didn't see anything weird. I tried as well to try with a couple of caching values.

Lease exception when I execute large scan with filters.

Posted by gortiz <go...@pragsis.com>.
I got this error when I execute a full scan with filters about a table.

Caused by: java.lang.RuntimeException: 
org.apache.hadoop.hbase.regionserver.LeaseException: 
org.apache.hadoop.hbase.regionserver.LeaseException: lease 
'-4165751462641113359' does not exist
     at 
org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:231)
     at 
org.apache.hadoop.hbase.regionserver.HRegionServer.next(HRegionServer.java:2482)
     at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method)
     at 
sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:39)
     at 
sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:25)
     at java.lang.reflect.Method.invoke(Method.java:597)
     at 
org.apache.hadoop.hbase.ipc.WritableRpcEngine$Server.call(WritableRpcEngine.java:320)
     at 
org.apache.hadoop.hbase.ipc.HBaseServer$Handler.run(HBaseServer.java:1428)

I have read about increase the lease time and rpc time, but it's not 
working.. what else could I try?? The table isn't too big. I have been 
checking the logs from GC, HMaster and some RegionServers and I didn't 
see anything weird. I tried as well to try with a couple of caching values.

Re: This server is in the failed servers list

Posted by Margusja <ma...@roo.ee>.
Hi

Found soluion. I used non hortonworks hadoop lib "hadoop-core-1.2.1.jar "
removed hadoop-core-1.2.1.jar and copied:
cp /usr/lib/hadoop/hadoop-common-2.2.0.2.0.6.0-76.jar ./libs/
[hbase@sandbox hbase_connect]$ javac -cp 
./libs/*:./libs/hbase-0.96.2-hadoop2/lib/* Hbase_connect.java
[hbase@sandbox hbase_connect]$ java -cp 
./:./libs/*:./libs/hbase-0.96.2-hadoop2/lib/* Hbase_connect
2014-04-05 12:09:32,795 WARN  [main] util.NativeCodeLoader 
(NativeCodeLoader.java:<clinit>(62)) - Unable to load native-hadoop 
library for your platform... using builtin-java classes where applicable
2014-04-05 12:09:34,378 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client 
environment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 GMT
2014-04-05 12:09:34,381 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client 
environment:host.name=sandbox.hortonworks.com
2014-04-05 12:09:34,381 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client environment:java.version=1.6.0_30
2014-04-05 12:09:34,382 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client environment:java.vendor=Sun 
Microsystems Inc.
2014-04-05 12:09:34,382 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client 
environment:java.home=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre
2014-04-05 12:09:34,382 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client 
environment:java.class.path=./:./libs/hadoop-client-2.2.0.jar:./libs/hadoop-common-2.2.0.2.0.6.0-76.jar:./libs/hbase-0.96.2-hadoop2/lib/management-api-3.0.0-b012.jar:./libs/hbase-0.96.2-hadoop2/lib/jackson-core-asl-1.8.8.jar:./libs/hbase-0.96.2-hadoop2/lib/slf4j-log4j12-1.6.4.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-server-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jsp-2.1-6.1.14.jar:./libs/hbase-0.96.2-hadoop2/lib/log4j-1.2.17.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-core-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-codec-1.7.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-jobclient-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-common-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-server-1.8.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-it-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-el-1.0.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-collections-3.2.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-common-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-grizzly2-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/protobuf-java-2.5.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-client-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-common-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-api-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-app-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hamcrest-core-1.3.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-beanutils-core-1.8.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-client-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/slf4j-api-1.6.4.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-compress-1.4.1.jar:./libs/hbase-0.96.2-hadoop2/lib/xmlenc-0.52.jar:./libs/hbase-0.96.2-hadoop2/lib/javax.servlet-api-3.0.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-hadoop-compat-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-http-servlet-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-io-2.4.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-guice-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/findbugs-annotations-1.3.9-1.jar:./libs/hbase-0.96.2-hadoop2/lib/avro-1.7.4.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-testing-util-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-configuration-1.6.jar:./libs/hbase-0.96.2-hadoop2/lib/zookeeper-3.4.5.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-server-0.96.2-hadoop2-tests.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-json-1.8.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-logging-1.1.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-server-common-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/gmbal-api-only-3.0.0-b023.jar:./libs/hbase-0.96.2-hadoop2/lib/jasper-compiler-5.5.23.jar:./libs/hbase-0.96.2-hadoop2/lib/jasper-runtime-5.5.23.jar:./libs/hbase-0.96.2-hadoop2/lib/guava-12.0.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-prefix-tree-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jetty-6.1.26.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-math-2.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-hdfs-2.2.0-tests.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-test-framework-grizzly2-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-client-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/jaxb-api-2.2.2.jar:./libs/hbase-0.96.2-hadoop2/lib/jackson-jaxrs-1.8.8.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-http-server-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/metrics-core-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-core-1.8.jar:./libs/hbase-0.96.2-hadoop2/lib/jruby-complete-1.6.8.jar:./libs/hbase-0.96.2-hadoop2/lib/javax.servlet-3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-httpclient-3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-hdfs-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-shuffle-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jaxb-impl-2.2.3-1.jar:./libs/hbase-0.96.2-hadoop2/lib/paranamer-2.3.jar:./libs/hbase-0.96.2-hadoop2/lib/xz-1.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-shell-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jsp-api-2.1-6.1.14.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-cli-1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/aopalliance-1.0.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-rcm-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-auth-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/snappy-java-1.0.4.1.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-lang-2.6.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-daemon-1.0.13.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-jobclient-2.2.0-tests.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-test-framework-core-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/jettison-1.3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/jackson-mapper-asl-1.8.8.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-net-3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-common-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-annotations-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-protocol-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-beanutils-1.7.0.jar:./libs/hbase-0.96.2-hadoop2/lib/servlet-api-2.5-6.1.14.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-hadoop2-compat-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-client-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-server-nodemanager-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jsr305-1.3.9.jar:./libs/hbase-0.96.2-hadoop2/lib/jetty-sslengine-6.1.26.jar:./libs/hbase-0.96.2-hadoop2/lib/libthrift-0.9.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jsch-0.1.42.jar:./libs/hbase-0.96.2-hadoop2/lib/guice-servlet-3.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jackson-xc-1.8.8.jar:./libs/hbase-0.96.2-hadoop2/lib/netty-3.6.6.Final.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-digester-1.8.jar:./libs/hbase-0.96.2-hadoop2/lib/httpcore-4.1.3.jar:./libs/hbase-0.96.2-hadoop2/lib/javax.inject-1.jar:./libs/hbase-0.96.2-hadoop2/lib/activation-1.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-thrift-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jamon-runtime-2.3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/htrace-core-2.04.jar:./libs/hbase-0.96.2-hadoop2/lib/guice-3.0.jar:./libs/hbase-0.96.2-hadoop2/lib/junit-4.11.jar:./libs/hbase-0.96.2-hadoop2/lib/jets3t-0.6.1.jar:./libs/hbase-0.96.2-hadoop2/lib/asm-3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-examples-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-http-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-framework-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/jetty-util-6.1.26.jar:./libs/hbase-0.96.2-hadoop2/lib/httpclient-4.1.3.jar
2014-04-05 12:09:34,384 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client 
environment:java.library.path=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64/server:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib
2014-04-05 12:09:34,386 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client environment:java.io.tmpdir=/tmp
2014-04-05 12:09:34,387 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client environment:java.compiler=<NA>
2014-04-05 12:09:34,387 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client environment:os.name=Linux
2014-04-05 12:09:34,388 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client environment:os.arch=amd64
2014-04-05 12:09:34,388 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client 
environment:os.version=2.6.32-431.11.2.el6.x86_64
2014-04-05 12:09:34,389 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client environment:user.name=hbase
2014-04-05 12:09:34,389 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client environment:user.home=/home/hbase
2014-04-05 12:09:34,390 INFO  [main] zookeeper.ZooKeeper 
(Environment.java:logEnv(100)) - Client 
environment:user.dir=/home/hbase/hbase_connect
2014-04-05 12:09:34,410 INFO  [main] zookeeper.ZooKeeper 
(ZooKeeper.java:<init>(438)) - Initiating client connection, 
connectString=sandbox.hortonworks.com:2181 sessionTimeout=90000 
watcher=hconnection-0x3e8a5356, quorum=sandbox.hortonworks.com:2181, 
baseZNode=/hbase-unsecure
2014-04-05 12:09:34,606 INFO  [main] zookeeper.RecoverableZooKeeper 
(RecoverableZooKeeper.java:<init>(120)) - Process 
identifier=hconnection-0x3e8a5356 connecting to ZooKeeper 
ensemble=sandbox.hortonworks.com:2181
2014-04-05 12:09:34,615 INFO 
[main-SendThread(sandbox.hortonworks.com:2181)] zookeeper.ClientCnxn 
(ClientCnxn.java:logStartConnect(966)) - Opening socket connection to 
server sandbox.hortonworks.com/10.0.2.15:2181. Will not attempt to 
authenticate using SASL (unknown error)
2014-04-05 12:09:34,663 INFO 
[main-SendThread(sandbox.hortonworks.com:2181)] zookeeper.ClientCnxn 
(ClientCnxn.java:primeConnection(849)) - Socket connection established 
to sandbox.hortonworks.com/10.0.2.15:2181, initiating session
2014-04-05 12:09:34,741 INFO 
[main-SendThread(sandbox.hortonworks.com:2181)] zookeeper.ClientCnxn 
(ClientCnxn.java:onConnected(1207)) - Session establishment complete on 
server sandbox.hortonworks.com/10.0.2.15:2181, sessionid = 
0x1453145e9500058, negotiated timeout = 40000
2014-04-05 12:09:36,539 INFO  [main] Configuration.deprecation 
(Configuration.java:warnOnceIfDeprecated(840)) - hadoop.native.lib is 
deprecated. Instead, use io.native.lib.available
Table = ambarismoketest
Table = mytable
Table = simple_hcat_load_table
Table = users
Table = weblogs

Maybe my mistake helps somebody :)

Best regards, Margus (Margusja) Roo
+372 51 48 780
http://margus.roo.ee
http://ee.linkedin.com/in/margusroo
skype: margusja
ldapsearch -x -h ldap.sk.ee -b c=EE "(serialNumber=37303140314)"

On 10/04/14 11:12, Margusja wrote:
> Hi
> I have java code:
>
> import org.apache.hadoop.conf.Configuration;
> import org.apache.hadoop.hbase.HBaseConfiguration;
> import org.apache.hadoop.hbase.HColumnDescriptor;
> import org.apache.hadoop.hbase.HTableDescriptor;
> import org.apache.hadoop.hbase.client.HBaseAdmin;
> import org.apache.hadoop.hbase.util.Bytes;
>
> public class Hbase_connect {
>
>         public static void main(String[] args) throws Exception {
>                 Configuration conf = HBaseConfiguration.create();
>                 conf.set("hbase.zookeeper.quorum", 
> "sandbox.hortonworks.com");
>                 conf.set("hbase.zookeeper.property.clientPort", "2181");
>                 conf.set("hbase.rootdir", 
> "hdfs://sandbox.hortonworks.com:8020/apps/hbase/data");
>                 conf.set("zookeeper.znode.parent", "/hbase-unsecure");
>                 HBaseAdmin admin = new HBaseAdmin(conf);
>                 HTableDescriptor[] tabdesc = admin.listTables();
>                 for(int i=0; i<tabdesc.length; i++) {
>                         System.out.println("Table = " + new 
> String(tabdesc [i].getName()));
>                 }
>         }
> }
>
> ^C[hbase@sandbox hbase_connect]$ ls -lah libs/
> total 80M
> drwxr-xr-x 3 hbase hadoop 4.0K Apr  5 10:42 .
> drwxr-xr-x 3 hbase hadoop 4.0K Apr  5 11:02 ..
> -rw-r--r-- 1 hbase hadoop 2.5K Oct  6 23:39 hadoop-client-2.2.0.jar
> -rw-r--r-- 1 hbase hadoop 4.1M Jul 24  2013 hadoop-core-1.2.1.jar
> drwxr-xr-x 4 hbase hadoop 4.0K Apr  5 09:40 hbase-0.96.2-hadoop2
> -rw-r--r-- 1 hbase hadoop  76M Apr  3 16:18 
> hbase-0.96.2-hadoop2-bin.tar.gz
>
> [hbase@sandbox hbase_connect]$ java -cp 
> ./:./libs/*:./libs/hbase-0.96.2-hadoop2/lib/* Hbase_connect
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:zookeeper.version=3.4.5-1392090, built on 09/30/2012 17:52 
> GMT
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:host.name=sandbox.hortonworks.com
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:java.version=1.6.0_30
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:java.vendor=Sun Microsystems Inc.
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:java.home=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:java.class.path=./:./libs/hadoop-client-2.2.0.jar:./libs/hadoop-core-1.2.1.jar:./libs/hbase-0.96.2-hadoop2/lib/management-api-3.0.0-b012.jar:./libs/hbase-0.96.2-hadoop2/lib/jackson-core-asl-1.8.8.jar:./libs/hbase-0.96.2-hadoop2/lib/slf4j-log4j12-1.6.4.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-server-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jsp-2.1-6.1.14.jar:./libs/hbase-0.96.2-hadoop2/lib/log4j-1.2.17.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-core-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-codec-1.7.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-jobclient-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-common-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-server-1.8.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-it-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-el-1.0.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-collections-3.2.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-common-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-grizzly2-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/protobuf-java-2.5.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-client-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-common-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-api-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-app-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hamcrest-core-1.3.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-beanutils-core-1.8.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-client-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/slf4j-api-1.6.4.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-compress-1.4.1.jar:./libs/hbase-0.96.2-hadoop2/lib/xmlenc-0.52.jar:./libs/hbase-0.96.2-hadoop2/lib/javax.servlet-api-3.0.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-hadoop-compat-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-http-servlet-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-io-2.4.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-guice-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/findbugs-annotations-1.3.9-1.jar:./libs/hbase-0.96.2-hadoop2/lib/avro-1.7.4.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-testing-util-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-configuration-1.6.jar:./libs/hbase-0.96.2-hadoop2/lib/zookeeper-3.4.5.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-server-0.96.2-hadoop2-tests.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-json-1.8.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-logging-1.1.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-server-common-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/gmbal-api-only-3.0.0-b023.jar:./libs/hbase-0.96.2-hadoop2/lib/jasper-compiler-5.5.23.jar:./libs/hbase-0.96.2-hadoop2/lib/jasper-runtime-5.5.23.jar:./libs/hbase-0.96.2-hadoop2/lib/guava-12.0.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-prefix-tree-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jetty-6.1.26.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-math-2.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-hdfs-2.2.0-tests.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-test-framework-grizzly2-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-client-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/jaxb-api-2.2.2.jar:./libs/hbase-0.96.2-hadoop2/lib/jackson-jaxrs-1.8.8.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-http-server-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/metrics-core-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-core-1.8.jar:./libs/hbase-0.96.2-hadoop2/lib/jruby-complete-1.6.8.jar:./libs/hbase-0.96.2-hadoop2/lib/javax.servlet-3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-httpclient-3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-hdfs-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-shuffle-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jaxb-impl-2.2.3-1.jar:./libs/hbase-0.96.2-hadoop2/lib/paranamer-2.3.jar:./libs/hbase-0.96.2-hadoop2/lib/xz-1.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-shell-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jsp-api-2.1-6.1.14.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-cli-1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/aopalliance-1.0.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-rcm-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-auth-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/snappy-java-1.0.4.1.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-lang-2.6.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-daemon-1.0.13.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-mapreduce-client-jobclient-2.2.0-tests.jar:./libs/hbase-0.96.2-hadoop2/lib/jersey-test-framework-core-1.9.jar:./libs/hbase-0.96.2-hadoop2/lib/jettison-1.3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/jackson-mapper-asl-1.8.8.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-net-3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-common-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-annotations-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-protocol-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-beanutils-1.7.0.jar:./libs/hbase-0.96.2-hadoop2/lib/servlet-api-2.5-6.1.14.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-hadoop2-compat-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-client-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/hadoop-yarn-server-nodemanager-2.2.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jsr305-1.3.9.jar:./libs/hbase-0.96.2-hadoop2/lib/jetty-sslengine-6.1.26.jar:./libs/hbase-0.96.2-hadoop2/lib/libthrift-0.9.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jsch-0.1.42.jar:./libs/hbase-0.96.2-hadoop2/lib/guice-servlet-3.0.jar:./libs/hbase-0.96.2-hadoop2/lib/jackson-xc-1.8.8.jar:./libs/hbase-0.96.2-hadoop2/lib/netty-3.6.6.Final.jar:./libs/hbase-0.96.2-hadoop2/lib/commons-digester-1.8.jar:./libs/hbase-0.96.2-hadoop2/lib/httpcore-4.1.3.jar:./libs/hbase-0.96.2-hadoop2/lib/javax.inject-1.jar:./libs/hbase-0.96.2-hadoop2/lib/activation-1.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-thrift-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/jamon-runtime-2.3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/htrace-core-2.04.jar:./libs/hbase-0.96.2-hadoop2/lib/guice-3.0.jar:./libs/hbase-0.96.2-hadoop2/lib/junit-4.11.jar:./libs/hbase-0.96.2-hadoop2/lib/jets3t-0.6.1.jar:./libs/hbase-0.96.2-hadoop2/lib/asm-3.1.jar:./libs/hbase-0.96.2-hadoop2/lib/hbase-examples-0.96.2-hadoop2.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-http-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/grizzly-framework-2.1.2.jar:./libs/hbase-0.96.2-hadoop2/lib/jetty-util-6.1.26.jar:./libs/hbase-0.96.2-hadoop2/lib/httpclient-4.1.3.jar
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:java.library.path=/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64/server:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/lib/amd64:/usr/lib/jvm/java-1.6.0-openjdk-1.6.0.0.x86_64/jre/../lib/amd64:/usr/java/packages/lib/amd64:/usr/lib64:/lib64:/lib:/usr/lib
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:java.io.tmpdir=/tmp
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:java.compiler=<NA>
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:os.name=Linux
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:os.arch=amd64
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:os.version=2.6.32-431.11.2.el6.x86_64
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:user.name=hbase
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:user.home=/home/hbase
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Client 
> environment:user.dir=/home/hbase/hbase_connect
> 14/04/05 11:03:03 INFO zookeeper.ZooKeeper: Initiating client 
> connection, connectString=sandbox.hortonworks.com:2181 
> sessionTimeout=90000 watcher=hconnection-0x35ec28b7, 
> quorum=sandbox.hortonworks.com:2181, baseZNode=/hbase-unsecure
> 14/04/05 11:03:03 INFO zookeeper.RecoverableZooKeeper: Process 
> identifier=hconnection-0x35ec28b7 connecting to ZooKeeper 
> ensemble=sandbox.hortonworks.com:2181
> 14/04/05 11:03:03 INFO zookeeper.ClientCnxn: Opening socket connection 
> to server sandbox.hortonworks.com/10.0.2.15:2181. Will not attempt to 
> authenticate using SASL (unknown error)
> 14/04/05 11:03:03 INFO zookeeper.ClientCnxn: Socket connection 
> established to sandbox.hortonworks.com/10.0.2.15:2181, initiating session
> 14/04/05 11:03:03 INFO zookeeper.ClientCnxn: Session establishment 
> complete on server sandbox.hortonworks.com/10.0.2.15:2181, sessionid = 
> 0x1453145e9500056, negotiated timeout = 40000
> 14/04/05 11:03:04 INFO 
> client.HConnectionManager$HConnectionImplementation: getMaster attempt 
> 1 of 35 failed; retrying after sleep of 100, 
> exception=com.google.protobuf.ServiceException: 
> org.apache.hadoop.hbase.DoNotRetryIOException: 
> java.lang.NoSuchMethodError: 
> org.apache.hadoop.net.NetUtils.getInputStream(Ljava/net/Socket;)Lorg/apache/hadoop/net/SocketInputWrapper;
> 14/04/05 11:03:04 INFO 
> client.HConnectionManager$HConnectionImplementation: getMaster attempt 
> 2 of 35 failed; retrying after sleep of 201, 
> exception=com.google.protobuf.ServiceException: 
> org.apache.hadoop.hbase.ipc.RpcClient$FailedServerException: This 
> server is in the failed servers list: 
> sandbox.hortonworks.com/10.0.2.15:60000
> 14/04/05 11:03:04 INFO 
> client.HConnectionManager$HConnectionImplementation: getMaster attempt 
> 3 of 35 failed; retrying after sleep of 300, 
> exception=com.google.protobuf.ServiceException: 
> org.apache.hadoop.hbase.ipc.RpcClient$FailedServerException: This 
> server is in the failed servers list: 
> sandbox.hortonworks.com/10.0.2.15:60000
> 14/04/05 11:03:05 INFO 
> client.HConnectionManager$HConnectionImplementation: getMaster attempt 
> 4 of 35 failed; retrying after sleep of 500, 
> exception=com.google.protobuf.ServiceException: 
> org.apache.hadoop.hbase.ipc.RpcClient$FailedServerException: This 
> server is in the failed servers list: 
> sandbox.hortonworks.com/10.0.2.15:60000
> 14/04/05 11:03:05 INFO 
> client.HConnectionManager$HConnectionImplementation: getMaster attempt 
> 5 of 35 failed; retrying after sleep of 1001, 
> exception=com.google.protobuf.ServiceException: 
> org.apache.hadoop.hbase.ipc.RpcClient$FailedServerException: This 
> server is in the failed servers list: 
> sandbox.hortonworks.com/10.0.2.15:60000
> 14/04/05 11:03:06 INFO 
> client.HConnectionManager$HConnectionImplementation: getMaster attempt 
> 6 of 35 failed; retrying after sleep of 2014, 
> exception=com.google.protobuf.ServiceException: 
> org.apache.hadoop.hbase.DoNotRetryIOException: 
> java.lang.NoSuchMethodError: 
> org.apache.hadoop.net.NetUtils.getInputStream(Ljava/net/Socket;)Lorg/apache/hadoop/net/SocketInputWrapper;
> 14/04/05 11:03:08 INFO 
> client.HConnectionManager$HConnectionImplementation: getMaster attempt 
> 7 of 35 failed; retrying after sleep of 4027, 
> exception=com.google.protobuf.ServiceException: 
> org.apache.hadoop.hbase.DoNotRetryIOException: 
> java.lang.NoSuchMethodError: 
> org.apache.hadoop.net.NetUtils.getInputStream(Ljava/net/Socket;)Lorg/apache/hadoop/net/SocketInputWrapper;
>
>
> [hbase@sandbox hbase_connect]$ jps
> 4355 HMaster
> 5335 Jps
> 4711 HRegionServer
> 4715 ThriftServer
> 4717 RESTServer
>
> tcp 0 0 0.0.0.0:2181 0.0.0.0:* LISTEN
> tcp 0 0 10.0.2.15:60000 0.0.0.0:* LISTEN 4355/java
>
> [root@sandbox ~]# cat /etc/hosts
> 127.0.0.1       localhost.localdomain localhost
> 10.0.2.15       sandbox.hortonworks.com sandbox
>
> Any hints?
>