You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by mb...@apache.org on 2012/09/20 14:50:19 UTC

svn commit: r1387998 - in /hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase: client/Result.java regionserver/HRegion.java regionserver/HRegionServer.java regionserver/StoreScanner.java

Author: mbautin
Date: Thu Sep 20 12:50:19 2012
New Revision: 1387998

URL: http://svn.apache.org/viewvc?rev=1387998&view=rev
Log:
[jira] [HBASE-6066] [89-fb] Some read performance improvements

Author: michalgr

Summary: Changes suggested in HBASE-6066

Test Plan: I run unit tests.

Reviewers: kranganathan, kannan, stack, mbautin, liyintang

Reviewed By: kannan

CC: HBase Diffs Facebook Group, lhofhansl, JIRA, tedyu

Differential Revision: https://reviews.facebook.net/D4191

Modified:
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/Result.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
    hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/Result.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/Result.java?rev=1387998&r1=1387997&r2=1387998&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/Result.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/client/Result.java Thu Sep 20 12:50:19 2012
@@ -90,12 +90,14 @@ public class Result implements Writable,
     }
   }
 
+  private static final KeyValue[] EMPTY_KEY_VALUE_ARRAY = new KeyValue[0];
+
   /**
    * Instantiate a Result with the specified List of KeyValues.
    * @param kvs List of KeyValues
    */
   public Result(List<KeyValue> kvs) {
-    this(kvs.toArray(new KeyValue[0]));
+    this(kvs.toArray(EMPTY_KEY_VALUE_ARRAY));
   }
 
   /**

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java?rev=1387998&r1=1387997&r2=1387998&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegion.java Thu Sep 20 12:50:19 2012
@@ -2985,8 +2985,7 @@ public class HRegion implements HeapSize
     KeyValueHeap storeHeap = null;
     private final byte [] stopRow;
     private Filter filter;
-    private List<KeyValue> results = new ArrayList<KeyValue>();
-    private int batch;
+    private final int batch;
     private int isScan;
     private boolean filterClosed = false;
     private long readPt;
@@ -3043,7 +3042,7 @@ public class HRegion implements HeapSize
     }
 
     @Override
-    public synchronized boolean next(List<KeyValue> outResults, int limit)
+    public boolean next(List<KeyValue> outResults, int limit)
         throws IOException {
       return next(outResults, limit, null);
     }
@@ -3066,10 +3065,17 @@ public class HRegion implements HeapSize
       // This could be a new thread from the last time we called next().
       MultiVersionConsistencyControl.setThreadReadPoint(this.readPt);
 
-      results.clear();
-      boolean returnResult = nextInternal(limit, metric);
+      boolean returnResult;
+      if (outResults.isEmpty()) {
+         // Usually outResults is empty. This is true when next is called
+         // to handle scan or get operation.
+        returnResult = nextInternal(outResults, limit, metric);
+      } else {
+        List<KeyValue> tmpList = new ArrayList<KeyValue>();
+        returnResult = nextInternal(tmpList, limit, metric);
+        outResults.addAll(tmpList);
+      }
 
-      outResults.addAll(results);
       resetFilters();
       if (isFilterDone()) {
         return false;
@@ -3078,14 +3084,14 @@ public class HRegion implements HeapSize
     }
 
     @Override
-    public synchronized boolean next(List<KeyValue> outResults)
+    public boolean next(List<KeyValue> outResults)
         throws IOException {
       // apply the batching limit by default
       return next(outResults, batch, null);
     }
 
     @Override
-    public synchronized boolean next(List<KeyValue> outResults, String metric)
+    public boolean next(List<KeyValue> outResults, String metric)
         throws IOException {
       // apply the batching limit by default
       return next(outResults, batch, metric);
@@ -3098,7 +3104,16 @@ public class HRegion implements HeapSize
       return this.filter != null && this.filter.filterAllRemaining();
     }
 
-    private boolean nextInternal(int limit, String metric) throws IOException {
+    /**
+     * @param results empty list in which results will be stored
+     */
+    private boolean nextInternal(List<KeyValue> results, int limit, String metric)
+        throws IOException {
+
+      if (!results.isEmpty()) {
+        throw new IllegalArgumentException("First parameter should be an empty list");
+      }
+
       while (true) {
         byte [] currentRow = peekRow();
         if (isStopRow(currentRow)) {
@@ -3112,6 +3127,7 @@ public class HRegion implements HeapSize
           return false;
         } else if (filterRowKey(currentRow)) {
           nextRow(currentRow);
+          results.clear();
         } else {
           byte [] nextRow;
           do {
@@ -3133,12 +3149,8 @@ public class HRegion implements HeapSize
           }
 
           if (results.isEmpty() || filterRow()) {
-            // this seems like a redundant step - we already consumed the row
-            // there're no left overs.
-            // the reasons for calling this method are:
-            // 1. reset the filters.
-            // 2. provide a hook to fast forward the row (used by subclasses)
             nextRow(currentRow);
+            results.clear();
 
             // This row was totally filtered out, if this is NOT the last row,
             // we should continue on.
@@ -3163,7 +3175,6 @@ public class HRegion implements HeapSize
       while (Bytes.equals(currentRow, peekRow())) {
         this.storeHeap.next(MOCKED_LIST);
       }
-      results.clear();
       resetFilters();
     }
 

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java?rev=1387998&r1=1387997&r2=1387998&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/HRegionServer.java Thu Sep 20 12:50:19 2012
@@ -2539,13 +2539,16 @@ public class HRegionServer implements HR
       List<Result> results = new ArrayList<Result>(nbRows);
       long currentScanResultSize = 0;
       List<KeyValue> values = new ArrayList<KeyValue>();
-      for (int i = 0; i < nbRows && currentScanResultSize < maxScannerResultSize; i++) {
-        requestCount.incrementAndGet();
+
+      int i = 0;
+      for (; i < nbRows && currentScanResultSize < maxScannerResultSize; i++) {
         // Collect values to be returned here
         boolean moreRows = s.next(values, HRegion.METRIC_NEXTSIZE);
         if (!values.isEmpty()) {
-          for (KeyValue kv : values) {
-            currentScanResultSize += kv.heapSize();
+          if (maxScannerResultSize < Long.MAX_VALUE){
+            for (KeyValue kv : values) {
+              currentScanResultSize += kv.heapSize();
+            }
           }
           results.add(new Result(values));
         }
@@ -2554,6 +2557,8 @@ public class HRegionServer implements HR
         }
         values.clear();
       }
+      requestCount.addAndGet(i);
+
       // Below is an ugly hack where we cast the InternalScanner to be a
       // HRegion.RegionScanner.  The alternative is to change InternalScanner
       // interface but its used everywhere whereas we just need a bit of info

Modified: hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java
URL: http://svn.apache.org/viewvc/hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java?rev=1387998&r1=1387997&r2=1387998&view=diff
==============================================================================
--- hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java (original)
+++ hbase/branches/0.89-fb/src/main/java/org/apache/hadoop/hbase/regionserver/StoreScanner.java Thu Sep 20 12:50:19 2012
@@ -23,6 +23,7 @@ package org.apache.hadoop.hbase.regionse
 import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
+import java.util.ListIterator;
 import java.util.NavigableSet;
 
 import org.apache.commons.logging.Log;
@@ -329,8 +330,7 @@ class StoreScanner extends NonLazyKeyVal
     }
     KeyValue kv;
     KeyValue prevKV = null;
-    List<KeyValue> results = new ArrayList<KeyValue>();
-
+    int numNewKeyValues = 0;
 
     Call call = HRegionServer.callContext.get();
     long quotaRemaining = (call == null) ? Long.MAX_VALUE
@@ -363,7 +363,6 @@ class StoreScanner extends NonLazyKeyVal
                 this.countPerRow > (storeLimit + storeOffset)) {
               // do what SEEK_NEXT_ROW does.
               if (!matcher.moreRowsMayExistAfter(kv)) {
-                outResult.addAll(results);
                 return false;
               }
               reseek(matcher.getKeyForNextRow(kv));
@@ -381,12 +380,12 @@ class StoreScanner extends NonLazyKeyVal
                 throw new DoNotRetryIOException("Result too large");
               }
 
-              results.add(copyKv);
+              outResult.add(copyKv);
+              numNewKeyValues++;
             }
 
             if (qcode == ScanQueryMatcher.MatchCode.INCLUDE_AND_SEEK_NEXT_ROW) {
               if (!matcher.moreRowsMayExistAfter(kv)) {
-                outResult.addAll(results);
                 return false;
               }
               reseek(matcher.getKeyForNextRow(kv));
@@ -396,29 +395,23 @@ class StoreScanner extends NonLazyKeyVal
               this.heap.next();
             }
 
-            if (limit > 0 && (results.size() == limit)) {
+            if (limit > 0 && (numNewKeyValues == limit)) {
               break LOOP;
             }
             continue;
 
           case DONE:
-            // copy jazz
-            outResult.addAll(results);
             return true;
 
           case DONE_SCAN:
             close();
 
-            // copy jazz
-            outResult.addAll(results);
-
             return false;
 
           case SEEK_NEXT_ROW:
             // This is just a relatively simple end of scan fix, to short-cut end
             // us if there is an endKey in the scan.
             if (!matcher.moreRowsMayExistAfter(kv)) {
-              outResult.addAll(results);
               return false;
             }
 
@@ -446,6 +439,31 @@ class StoreScanner extends NonLazyKeyVal
             throw new RuntimeException("UNEXPECTED");
         }
       }
+    } catch (IOException e) {
+      /*
+       * Function should not modify its outResult argument if
+       * exception was thrown. In ths case we should remove
+       * last numNewKeyValues elements from outResults.
+       */
+
+      final int length = outResult.size();
+
+      // this should be rare situation, we can use reflection
+      if (outResult instanceof ArrayList<?>){
+        // efficient code for ArrayList
+        ArrayList<?> asArrayList = (ArrayList<?>)outResult;
+        asArrayList.subList(length - numNewKeyValues, length).clear();
+      } else {
+        // generic case
+        ListIterator<?> iterator = outResult.listIterator(length);
+        while (numNewKeyValues-- > 0) {
+          iterator.previous();
+          iterator.remove();
+        }
+      }
+
+      throw e;
+
     } finally { 
       // update the counter 
       if (addedResultsSize > 0 && metric != null) {
@@ -459,9 +477,7 @@ class StoreScanner extends NonLazyKeyVal
       }
     }
 
-    if (!results.isEmpty()) {
-      // copy jazz
-      outResult.addAll(results);
+    if (numNewKeyValues > 0) {
       return true;
     }