You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@phoenix.apache.org by 景涛 <84...@qq.com> on 2016/05/23 07:12:45 UTC

is there a way to Join two big tables?

Now, i have two big table; 
One is about 20 million records. 
Another one  is about 10 million records. 
When i joins,  it is error, and the message is following: 


Caused by: org.apache.phoenix.join.MaxServerCacheSizeExceededException: Size of hash cache (104857617 bytes) exceeds the maximum allowed size (104857600 bytes)
        at org.apache.phoenix.join.HashCacheClient.serialize(HashCacheClient.java:109)
        at org.apache.phoenix.join.HashCacheClient.addHashCache(HashCacheClient.java:82)
        at org.apache.phoenix.execute.HashJoinPlan$HashSubPlan.execute(HashJoinPlan.java:353)
        at org.apache.phoenix.execute.HashJoinPlan$1.call(HashJoinPlan.java:145)
        at java.util.concurrent.FutureTask.run(FutureTask.java:262)
        at org.apache.phoenix.job.JobManager$InstrumentedJobFutureTask.run(JobManager.java:183)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        at java.lang.Thread.run(Thread.java:745)





I also try to hint 'USE_SORT_MERGE_JOIN' in query  insteading of 'HASH_JOIN', but it is yet error....


16/05/23 15:01:58 WARN client.ScannerCallable: Ignore, probably already closed
org.apache.hadoop.hbase.regionserver.LeaseException: org.apache.hadoop.hbase.regionserver.LeaseException: lease '44' does not exist
        at org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:221)
        at org.apache.hadoop.hbase.regionserver.Leases.cancelLease(Leases.java:206)
        at org.apache.hadoop.hbase.regionserver.RSRpcServices.scan(RSRpcServices.java:2379)
        at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:32205)
        at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2034)
        at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:107)
        at org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:130)
        at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:107)
        at java.lang.Thread.run(Thread.java:745)


        at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native Method)
        at sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)
        at sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)
        at java.lang.reflect.Constructor.newInstance(Constructor.java:526)
        at org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)
        at org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:95)
        at org.apache.hadoop.hbase.protobuf.ProtobufUtil.getRemoteException(ProtobufUtil.java:328)
        at org.apache.hadoop.hbase.client.ScannerCallable.close(ScannerCallable.java:357)
        at org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:195)
        at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:144)
        at org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:63)
        at org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:200)
        at org.apache.hadoop.hbase.client.ClientScanner.call(ClientScanner.java:314)
        at org.apache.hadoop.hbase.client.ClientScanner.close(ClientScanner.java:712)
        at org.apache.phoenix.iterate.ScanningResultIterator.close(ScanningResultIterator.java:49)
        at org.apache.phoenix.iterate.TableResultIterator.close(TableResultIterator.java:95)
        at org.apache.phoenix.iterate.SpoolingResultIterator.<init>(SpoolingResultIterator.java:154)
        at org.apache.phoenix.iterate.SpoolingResultIterator.<init>(SpoolingResultIterator.java:83)
        at org.apache.phoenix.iterate.SpoolingResultIterator.<init>(SpoolingResultIterator.java:62)
        at org.apache.phoenix.iterate.SpoolingResultIterator$SpoolingResultIteratorFactory.newIterator(SpoolingResultIterator.java:78)
        at org.apache.phoenix.iterate.ParallelIterators$1.call(ParallelIterators.java:109)
        at org.apache.phoenix.iterate.ParallelIterators$1.call(ParallelIterators.java:100)
        at java.util.concurrent.FutureTask.run(FutureTask.java:262)
        at org.apache.phoenix.job.JobManager$InstrumentedJobFutureTask.run(JobManager.java:183)
        at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)
        at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)
        at java.lang.Thread.run(Thread.java:745)
Caused by: org.apache.hadoop.hbase.ipc.RemoteWithExtrasException(org.apache.hadoop.hbase.regionserver.LeaseException): org.apache.hadoop.hbase.regionserver.LeaseException: lease '44' does not exist
        at org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:221)
        at org.apache.hadoop.hbase.regionserver.Leases.cancelLease(Leases.java:206)
        at org.apache.hadoop.hbase.regionserver.RSRpcServices.scan(RSRpcServices.java:2379)
        at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:32205)
        at org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2034)
        at org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:107)
        at org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:130)
        at org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:107)
        at java.lang.Thread.run(Thread.java:745)


        at org.apache.hadoop.hbase.ipc.RpcClientImpl.call(RpcClientImpl.java:1219)
        at org.apache.hadoop.hbase.ipc.AbstractRpcClient.callBlockingMethod(AbstractRpcClient.java:216)
        at org.apache.hadoop.hbase.ipc.AbstractRpcClient$BlockingRpcChannelImplementation.callBlockingMethod(AbstractRpcClient.java:300)
        at org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$BlockingStub.scan(ClientProtos.java:32651)
        at org.apache.hadoop.hbase.client



Is there any way to solve it?
Thanks very much!

Re: is there a way to Join two big tables?

Posted by Ciureanu Constantin <ci...@gmail.com>.
Yes, of course it's possible.
Just not using Phoenix - try writing a Spark job (or MapReduce) and if you
pick the right join condition it might actually be not that slow at all
(time to read the 2 tables in Spark included).

If you still want to do it in Phoenix - try to increase those limits (hash
cache size / lease timeout + also TMP allowed maximum size etc.) but I
think this is a too difficult task for Phoenix.

2016-05-23 9:12 GMT+02:00 景涛 <84...@qq.com>:

> *Now, i have two big table; *
> *One is about 20 million records. *
> *Another one  is about 10 million records. *
> *When i joins,  it is error, and the message is following: *
>
> *Caused by: org.apache.phoenix.join.MaxServerCacheSizeExceededException:
> Size of hash cache (104857617 bytes) exceeds the maximum allowed size
> (104857600 bytes)*
> *        at
> org.apache.phoenix.join.HashCacheClient.serialize(HashCacheClient.java:109)*
> *        at
> org.apache.phoenix.join.HashCacheClient.addHashCache(HashCacheClient.java:82)*
> *        at
> org.apache.phoenix.execute.HashJoinPlan$HashSubPlan.execute(HashJoinPlan.java:353)*
> *        at
> org.apache.phoenix.execute.HashJoinPlan$1.call(HashJoinPlan.java:145)*
> *        at java.util.concurrent.FutureTask.run(FutureTask.java:262)*
> *        at
> org.apache.phoenix.job.JobManager$InstrumentedJobFutureTask.run(JobManager.java:183)*
> *        at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)*
> *        at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)*
> *        at java.lang.Thread.run(Thread.java:745)*
>
>
> *I also try to hint 'USE_SORT_MERGE_JOIN' in query  insteading of
> 'HASH_JOIN', but it is yet error....*
>
> *16/05/23 15:01:58 WARN client.ScannerCallable: Ignore, probably already
> closed*
> *org.apache.hadoop.hbase.regionserver.LeaseException:
> org.apache.hadoop.hbase.regionserver.LeaseException: lease '44' does not
> exist*
> *        at
> org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:221)*
> *        at
> org.apache.hadoop.hbase.regionserver.Leases.cancelLease(Leases.java:206)*
> *        at
> org.apache.hadoop.hbase.regionserver.RSRpcServices.scan(RSRpcServices.java:2379)*
> *        at
> org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:32205)*
> *        at
> org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2034)*
> *        at
> org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:107)*
> *        at
> org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:130)*
> *        at
> org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:107)*
> *        at java.lang.Thread.run(Thread.java:745)*
>
> *        at sun.reflect.NativeConstructorAccessorImpl.newInstance0(Native
> Method)*
> *        at
> sun.reflect.NativeConstructorAccessorImpl.newInstance(NativeConstructorAccessorImpl.java:57)*
> *        at
> sun.reflect.DelegatingConstructorAccessorImpl.newInstance(DelegatingConstructorAccessorImpl.java:45)*
> *        at
> java.lang.reflect.Constructor.newInstance(Constructor.java:526)*
> *        at
> org.apache.hadoop.ipc.RemoteException.instantiateException(RemoteException.java:106)*
> *        at
> org.apache.hadoop.ipc.RemoteException.unwrapRemoteException(RemoteException.java:95)*
> *        at
> org.apache.hadoop.hbase.protobuf.ProtobufUtil.getRemoteException(ProtobufUtil.java:328)*
> *        at
> org.apache.hadoop.hbase.client.ScannerCallable.close(ScannerCallable.java:357)*
> *        at
> org.apache.hadoop.hbase.client.ScannerCallable.call(ScannerCallable.java:195)*
> *        at
> org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:144)*
> *        at
> org.apache.hadoop.hbase.client.ScannerCallableWithReplicas.call(ScannerCallableWithReplicas.java:63)*
> *        at
> org.apache.hadoop.hbase.client.RpcRetryingCaller.callWithoutRetries(RpcRetryingCaller.java:200)*
> *        at
> org.apache.hadoop.hbase.client.ClientScanner.call(ClientScanner.java:314)*
> *        at
> org.apache.hadoop.hbase.client.ClientScanner.close(ClientScanner.java:712)*
> *        at
> org.apache.phoenix.iterate.ScanningResultIterator.close(ScanningResultIterator.java:49)*
> *        at
> org.apache.phoenix.iterate.TableResultIterator.close(TableResultIterator.java:95)*
> *        at
> org.apache.phoenix.iterate.SpoolingResultIterator.<init>(SpoolingResultIterator.java:154)*
> *        at
> org.apache.phoenix.iterate.SpoolingResultIterator.<init>(SpoolingResultIterator.java:83)*
> *        at
> org.apache.phoenix.iterate.SpoolingResultIterator.<init>(SpoolingResultIterator.java:62)*
> *        at
> org.apache.phoenix.iterate.SpoolingResultIterator$SpoolingResultIteratorFactory.newIterator(SpoolingResultIterator.java:78)*
> *        at
> org.apache.phoenix.iterate.ParallelIterators$1.call(ParallelIterators.java:109)*
> *        at
> org.apache.phoenix.iterate.ParallelIterators$1.call(ParallelIterators.java:100)*
> *        at java.util.concurrent.FutureTask.run(FutureTask.java:262)*
> *        at
> org.apache.phoenix.job.JobManager$InstrumentedJobFutureTask.run(JobManager.java:183)*
> *        at
> java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1145)*
> *        at
> java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:615)*
> *        at java.lang.Thread.run(Thread.java:745)*
> *Caused by:
> org.apache.hadoop.hbase.ipc.RemoteWithExtrasException(org.apache.hadoop.hbase.regionserver.LeaseException):
> org.apache.hadoop.hbase.regionserver.LeaseException: lease '44' does not
> exist*
> *        at
> org.apache.hadoop.hbase.regionserver.Leases.removeLease(Leases.java:221)*
> *        at
> org.apache.hadoop.hbase.regionserver.Leases.cancelLease(Leases.java:206)*
> *        at
> org.apache.hadoop.hbase.regionserver.RSRpcServices.scan(RSRpcServices.java:2379)*
> *        at
> org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$2.callBlockingMethod(ClientProtos.java:32205)*
> *        at
> org.apache.hadoop.hbase.ipc.RpcServer.call(RpcServer.java:2034)*
> *        at
> org.apache.hadoop.hbase.ipc.CallRunner.run(CallRunner.java:107)*
> *        at
> org.apache.hadoop.hbase.ipc.RpcExecutor.consumerLoop(RpcExecutor.java:130)*
> *        at
> org.apache.hadoop.hbase.ipc.RpcExecutor$1.run(RpcExecutor.java:107)*
> *        at java.lang.Thread.run(Thread.java:745)*
>
> *        at
> org.apache.hadoop.hbase.ipc.RpcClientImpl.call(RpcClientImpl.java:1219)*
> *        at
> org.apache.hadoop.hbase.ipc.AbstractRpcClient.callBlockingMethod(AbstractRpcClient.java:216)*
> *        at
> org.apache.hadoop.hbase.ipc.AbstractRpcClient$BlockingRpcChannelImplementation.callBlockingMethod(AbstractRpcClient.java:300)*
> *        at
> org.apache.hadoop.hbase.protobuf.generated.ClientProtos$ClientService$BlockingStub.scan(ClientProtos.java:32651)*
> *        at org.apache.hadoop.hbase.client*
>
> *Is there any way to solve it?*
> *Thanks very much!*
>