You are viewing a plain text version of this content. The canonical link for it is here.
Posted to issues@hbase.apache.org by "Elliott Clark (JIRA)" <ji...@apache.org> on 2012/11/26 20:10:58 UTC
[jira] [Commented] (HBASE-7220) Creating a table with 3000 regions on 2 nodes fails after 1 hour

    [ https://issues.apache.org/jira/browse/HBASE-7220?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=13503993#comment-13503993 ] 

Elliott Clark commented on HBASE-7220:
--------------------------------------

Just tried this locally and the reason this is taking so long is that when creating a new MetricsRegion, the whole metrics system is stopped and then started (to clear all jmx caches).  This is taking a long time, and there is no need to do this on the main thread.  All that matters is that the metrics system is restarted some time after the regions are added or removed.  

I'll add a patch soon that moves this off of the main threads and into the metrics executor.
Additionally we can merge several of the reset requests.
                
> Creating a table with 3000 regions on 2 nodes fails after 1 hour
> ----------------------------------------------------------------
>
>                 Key: HBASE-7220
>                 URL: https://issues.apache.org/jira/browse/HBASE-7220
>             Project: HBase
>          Issue Type: Bug
>          Components: metrics, Performance, regionserver
>    Affects Versions: 0.96.0
>         Environment: small test cluster. not reproducible on a local cluster (needs a network).
>            Reporter: nkeywal
>            Assignee: Elliott Clark
>
> I'm trying to create a table with 3000 regions on two regions servers, from the shell.
> It's ok on trunk a standalone config.
> It's ok on 0.94
> It's not ok on trunk: it fails after around 1 hour.
> If I remove all the code related to metrics in HRegion, the 3000 regions are created in 3 minutes (twice faster than the 0.94).
> On trunk, the region server spends its time in "waitForWork", while the master is in the tcp connection related code. It's a 1Gb network.
> I haven't looked at the metric code itself.
> Patch used to remove the metrics from HRegion:
> {noformat}
> index c70e9ab..6677e65 100644
> --- a/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
> +++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
> @@ -364,7 +364,7 @@ public class HRegion implements HeapSize { // , Writable{
>    private HTableDescriptor htableDescriptor = null;
>    private RegionSplitPolicy splitPolicy;
>  
> -  private final MetricsRegion metricsRegion;
> +  private final MetricsRegion metricsRegion = null;
>  
>    /**
>     * Should only be used for testing purposes
> @@ -388,7 +388,7 @@ public class HRegion implements HeapSize { // , Writable{
>      this.coprocessorHost = null;
>      this.scannerReadPoints = new ConcurrentHashMap<RegionScanner, Long>();
>  
> -    this.metricsRegion = new MetricsRegion(new MetricsRegionWrapperImpl(this));
> +    //this.metricsRegion = new MetricsRegion(new MetricsRegionWrapperImpl(this));
>    }
>  
>    /**
> @@ -451,7 +451,7 @@ public class HRegion implements HeapSize { // , Writable{
>      this.regiondir = getRegionDir(this.tableDir, encodedNameStr);
>      this.scannerReadPoints = new ConcurrentHashMap<RegionScanner, Long>();
>  
> -    this.metricsRegion = new MetricsRegion(new MetricsRegionWrapperImpl(this));
> +    //this.metricsRegion = new MetricsRegion(new MetricsRegionWrapperImpl(this));
>  
>      /*
>       * timestamp.slop provides a server-side constraint on the timestamp. This
> @@ -1024,7 +1024,7 @@ public class HRegion implements HeapSize { // , Writable{
>          status.setStatus("Running coprocessor post-close hooks");
>          this.coprocessorHost.postClose(abort);
>        }
> -      this.metricsRegion.close();
> +      //this.metricsRegion.close();
>        status.markComplete("Closed");
>        LOG.info("Closed " + this);
>        return result;
> @@ -2331,11 +2331,11 @@ public class HRegion implements HeapSize { // , Writable{
>        if (noOfPuts > 0) {
>          // There were some Puts in the batch.
>          double noOfMutations = noOfPuts + noOfDeletes;
> -        this.metricsRegion.updatePut();
> +        //this.metricsRegion.updatePut();
>        }
>        if (noOfDeletes > 0) {
>          // There were some Deletes in the batch.
> -        this.metricsRegion.updateDelete();
> +        //this.metricsRegion.updateDelete();
>        }
>        if (!success) {
>          for (int i = firstIndex; i < lastIndexExclusive; i++) {
> @@ -4270,7 +4270,7 @@ public class HRegion implements HeapSize { // , Writable{
>  
>      // do after lock
>  
> -    this.metricsRegion.updateGet();
> +    //this.metricsRegion.updateGet();
>  
>      return results;
>    }
> @@ -4657,7 +4657,7 @@ public class HRegion implements HeapSize { // , Writable{
>        closeRegionOperation();
>      }
>  
> -    this.metricsRegion.updateAppend();
> +    //this.metricsRegion.updateAppend();
>  
>  
>      if (flush) {
> @@ -4795,7 +4795,7 @@ public class HRegion implements HeapSize { // , Writable{
>          mvcc.completeMemstoreInsert(w);
>        }
>        closeRegionOperation();
> -      this.metricsRegion.updateIncrement();
> +      //this.metricsRegion.updateIncrement();
>      }
>  
>      if (flush) {
> {noformat}

--
This message is automatically generated by JIRA.
If you think it was sent incorrectly, please contact your JIRA administrators
For more information on JIRA, see: http://www.atlassian.com/software/jira