You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@hbase.apache.org by te...@apache.org on 2014/12/18 14:09:08 UTC

hbase git commit: HBASE-12223 MultiTableInputFormatBase.getSplits is too slow (Yuanbo Peng)

Repository: hbase
Updated Branches:
  refs/heads/master 83e4bfaf7 -> 15cf0a6e7


HBASE-12223 MultiTableInputFormatBase.getSplits is too slow (Yuanbo Peng)


Project: http://git-wip-us.apache.org/repos/asf/hbase/repo
Commit: http://git-wip-us.apache.org/repos/asf/hbase/commit/15cf0a6e
Tree: http://git-wip-us.apache.org/repos/asf/hbase/tree/15cf0a6e
Diff: http://git-wip-us.apache.org/repos/asf/hbase/diff/15cf0a6e

Branch: refs/heads/master
Commit: 15cf0a6e7b10882f7fd205b65c2ef265a690597d
Parents: 83e4bfa
Author: tedyu <yu...@gmail.com>
Authored: Thu Dec 18 05:09:01 2014 -0800
Committer: tedyu <yu...@gmail.com>
Committed: Thu Dec 18 05:09:01 2014 -0800

----------------------------------------------------------------------
 .../mapreduce/MultiTableInputFormatBase.java    | 101 +++++++++++--------
 1 file changed, 60 insertions(+), 41 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/hbase/blob/15cf0a6e/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
----------------------------------------------------------------------
diff --git a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
index 5c253cb..b9a2db7 100644
--- a/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
+++ b/hbase-server/src/main/java/org/apache/hadoop/hbase/mapreduce/MultiTableInputFormatBase.java
@@ -46,6 +46,9 @@ import org.apache.hadoop.mapreduce.JobContext;
 import org.apache.hadoop.mapreduce.RecordReader;
 import org.apache.hadoop.mapreduce.TaskAttemptContext;
 
+import java.util.Map;
+import java.util.HashMap;
+import java.util.Iterator;
 /**
  * A base for {@link MultiTableInputFormat}s. Receives a list of
  * {@link Scan} instances that define the input tables and
@@ -129,67 +132,82 @@ public abstract class MultiTableInputFormatBase extends
     if (scans.isEmpty()) {
       throw new IOException("No scans were provided.");
     }
-    List<InputSplit> splits = new ArrayList<InputSplit>();
 
+    Map<TableName, List<Scan>> tableMaps = new HashMap<TableName, List<Scan>>();
     for (Scan scan : scans) {
       byte[] tableNameBytes = scan.getAttribute(Scan.SCAN_ATTRIBUTES_TABLE_NAME);
       if (tableNameBytes == null)
         throw new IOException("A scan object did not have a table name");
 
       TableName tableName = TableName.valueOf(tableNameBytes);
+
+      List<Scan> scanList = tableMaps.get(tableName);
+      if (scanList == null) {
+        scanList = new ArrayList<Scan>();
+        tableMaps.put(tableName, scanList);
+      }
+      scanList.add(scan);
+    }
+
+    List<InputSplit> splits = new ArrayList<InputSplit>();
+    Iterator iter = tableMaps.entrySet().iterator();
+    while (iter.hasNext()) {
+      Map.Entry<TableName, List<Scan>> entry = (Map.Entry<TableName, List<Scan>>) iter.next();
+      TableName tableName = entry.getKey();
+      List<Scan> scanList = entry.getValue();
       Table table = null;
       RegionLocator regionLocator = null;
       Connection conn = null;
-      try {
+
+      try{
         conn = ConnectionFactory.createConnection(context.getConfiguration());
         table = conn.getTable(tableName);
-        regionLocator = conn.getRegionLocator(tableName);
         regionLocator = (RegionLocator) table;
+        RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(
+                regionLocator, conn.getAdmin());
         Pair<byte[][], byte[][]> keys = regionLocator.getStartEndKeys();
-        if (keys == null || keys.getFirst() == null ||
-            keys.getFirst().length == 0) {
-          throw new IOException("Expecting at least one region for table : "
-              + tableName.getNameAsString());
-        }
-        int count = 0;
+        for (Scan scan : scanList) {
+          if (keys == null || keys.getFirst() == null || keys.getFirst().length == 0) {
+            throw new IOException("Expecting at least one region for table : "
+                    + tableName.getNameAsString());
+          }
+          int count = 0;
 
-        byte[] startRow = scan.getStartRow();
-        byte[] stopRow = scan.getStopRow();
+          byte[] startRow = scan.getStartRow();
+          byte[] stopRow = scan.getStopRow();
 
-        RegionSizeCalculator sizeCalculator = new RegionSizeCalculator(
-            regionLocator, conn.getAdmin());
+          for (int i = 0; i < keys.getFirst().length; i++) {
+            if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
+              continue;
+            }
 
-        for (int i = 0; i < keys.getFirst().length; i++) {
-          if (!includeRegionInSplit(keys.getFirst()[i], keys.getSecond()[i])) {
-            continue;
-          }
-          HRegionLocation hregionLocation = regionLocator.getRegionLocation(
-              keys.getFirst()[i], false);
-          String regionHostname = hregionLocation.getHostname();
-          HRegionInfo regionInfo = hregionLocation.getRegionInfo();
+            if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
+                    Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
+                    (stopRow.length == 0 || Bytes.compareTo(stopRow,
+                            keys.getFirst()[i]) > 0)) {
+              byte[] splitStart = startRow.length == 0 ||
+                      Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ?
+                      keys.getFirst()[i] : startRow;
+              byte[] splitStop = (stopRow.length == 0 ||
+                      Bytes.compareTo(keys.getSecond()[i], stopRow) <= 0) &&
+                      keys.getSecond()[i].length > 0 ?
+                      keys.getSecond()[i] : stopRow;
+
+              HRegionLocation hregionLocation = regionLocator.getRegionLocation(
+                      keys.getFirst()[i], false);
+              String regionHostname = hregionLocation.getHostname();
+              HRegionInfo regionInfo = hregionLocation.getRegionInfo();
+              long regionSize = sizeCalculator.getRegionSize(
+                      regionInfo.getRegionName());
 
-          // determine if the given start and stop keys fall into the range
-          if ((startRow.length == 0 || keys.getSecond()[i].length == 0 ||
-              Bytes.compareTo(startRow, keys.getSecond()[i]) < 0) &&
-              (stopRow.length == 0 ||
-                  Bytes.compareTo(stopRow, keys.getFirst()[i]) > 0)) {
-            byte[] splitStart =
-                startRow.length == 0 ||
-                    Bytes.compareTo(keys.getFirst()[i], startRow) >= 0 ? keys
-                    .getFirst()[i] : startRow;
-            byte[] splitStop =
-                (stopRow.length == 0 || Bytes.compareTo(keys.getSecond()[i],
-                    stopRow) <= 0) && keys.getSecond()[i].length > 0 ? keys
-                    .getSecond()[i] : stopRow;
+              TableSplit split = new TableSplit(table.getName(),
+                      scan, splitStart, splitStop, regionHostname, regionSize);
 
-            long regionSize = sizeCalculator.getRegionSize(regionInfo.getRegionName());
-            TableSplit split =
-                new TableSplit(regionLocator.getName(),
-                    scan, splitStart, splitStop, regionHostname, regionSize);
+              splits.add(split);
 
-            splits.add(split);
-            if (LOG.isDebugEnabled())
-              LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
+              if (LOG.isDebugEnabled())
+                LOG.debug("getSplits: split -> " + (count++) + " -> " + split);
+            }
           }
         }
       } finally {
@@ -198,6 +216,7 @@ public abstract class MultiTableInputFormatBase extends
         if (null != conn) conn.close();
       }
     }
+
     return splits;
   }