You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by jh...@apache.org on 2014/02/05 07:38:18 UTC

git commit: TAJO-582: Invalid split calculation. (jinho)

Updated Branches:
  refs/heads/master 5177dcfa4 -> 2761436b2


TAJO-582: Invalid split calculation. (jinho)


Project: http://git-wip-us.apache.org/repos/asf/incubator-tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tajo/commit/2761436b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tajo/tree/2761436b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tajo/diff/2761436b

Branch: refs/heads/master
Commit: 2761436b2725ee983d28ad44c7f83475abd09628
Parents: 5177dcf
Author: jinossy <ji...@gmail.com>
Authored: Wed Feb 5 15:37:36 2014 +0900
Committer: jinossy <ji...@gmail.com>
Committed: Wed Feb 5 15:37:36 2014 +0900

----------------------------------------------------------------------
 CHANGES.txt                                     |  2 +
 .../java/org/apache/tajo/conf/TajoConf.java     |  3 +-
 .../tajo/storage/AbstractStorageManager.java    | 42 ++++++--------------
 .../apache/tajo/storage/TestFileSystems.java    | 22 +++++-----
 .../src/test/resources/storage-default.xml      | 15 -------
 5 files changed, 27 insertions(+), 57 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/2761436b/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index ac08b75..9cffeee 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -246,6 +246,8 @@ Release 0.8.0 - unreleased
 
   BUG FIXES
 
+    TAJO-582: Invalid split calculation. (jinho)
+
     TAJO-581: Inline view on column partitioned table causes NPE. (hyunsik)
 
     TAJO-577: Support S3FileSystem split. (Yongjun Park via jihoon)

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/2761436b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
index 1699848..d465ca3 100644
--- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
+++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
@@ -142,8 +142,7 @@ public class TajoConf extends Configuration {
     // Storage Configuration
     //////////////////////////////////
     RAWFILE_SYNC_INTERVAL("rawfile.sync.interval", null),
-    MINIMUM_SPLIT_SIZE("tajo.min.split.size", (long) 536870912),
-    MAXIMUM_SPLIT_SIZE("tajo.max.split.size", (long) 67108864),
+    MINIMUM_SPLIT_SIZE("tajo.min.split.size", (long) 1),
     // for RCFile
     HIVEUSEEXPLICITRCFILEHEADER("tajo.exec.rcfile.use.explicit.header", true),
 

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/2761436b/tajo-storage/src/main/java/org/apache/tajo/storage/AbstractStorageManager.java
----------------------------------------------------------------------
diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/AbstractStorageManager.java b/tajo-storage/src/main/java/org/apache/tajo/storage/AbstractStorageManager.java
index 122e639..a7ed981 100644
--- a/tajo-storage/src/main/java/org/apache/tajo/storage/AbstractStorageManager.java
+++ b/tajo-storage/src/main/java/org/apache/tajo/storage/AbstractStorageManager.java
@@ -384,15 +384,6 @@ public abstract class AbstractStorageManager {
   }
 
   /**
-   * Get the lower bound on split size imposed by the format.
-   *
-   * @return the number of bytes of the minimal split for this format
-   */
-  protected long getFormatMinSplitSize() {
-    return 1;
-  }
-
-  /**
    * Is the given filename splitable? Usually, true, but if the file is
    * stream compressed, it will not be.
    * <p/>
@@ -485,15 +476,6 @@ public abstract class AbstractStorageManager {
   }
 
   /**
-   * Get the maximum split size.
-   *
-   * @return the maximum number of bytes a split can include
-   */
-  public long getMaxSplitSize() {
-    return conf.getLongVar(TajoConf.ConfVars.MAXIMUM_SPLIT_SIZE);
-  }
-
-  /**
    * Get the minimum split size
    *
    * @return the minimum number of bytes that can be in a split
@@ -556,9 +538,6 @@ public abstract class AbstractStorageManager {
   public List<FileFragment> getSplits(String tableName, TableMeta meta, Schema schema, Path inputPath) throws IOException {
     // generate splits'
 
-    long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize());
-    long maxSize = getMaxSplitSize();
-
     List<FileFragment> splits = new ArrayList<FileFragment>();
     FileSystem fs = inputPath.getFileSystem(conf);
     List<FileStatus> files;
@@ -596,20 +575,23 @@ public abstract class AbstractStorageManager {
 
         } else {
           if (splittable) {
-            // for s3
-            long blockSize = file.getBlockSize();
-            long splitSize = computeSplitSize(blockSize, minSize, maxSize);
 
+            long minSize = Math.max(getMinSplitSize(), 1);
+
+            long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one
+            long splitSize = Math.max(minSize, blockSize);
             long bytesRemaining = length;
-            while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
-              int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
-              splits.add(makeSplit(tableName, meta, path, length-bytesRemaining, splitSize,
+
+            // for s3
+            while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
+              int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
+              splits.add(makeSplit(tableName, meta, path, length - bytesRemaining, splitSize,
                   blkLocations[blkIndex].getHosts()));
               bytesRemaining -= splitSize;
             }
-            if (bytesRemaining != 0) {
-              int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
-              splits.add(makeSplit(tableName, meta, path, length-bytesRemaining, bytesRemaining,
+            if (bytesRemaining > 0) {
+              int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
+              splits.add(makeSplit(tableName, meta, path, length - bytesRemaining, bytesRemaining,
                   blkLocations[blkIndex].getHosts()));
             }
           } else { // Non splittable

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/2761436b/tajo-storage/src/test/java/org/apache/tajo/storage/TestFileSystems.java
----------------------------------------------------------------------
diff --git a/tajo-storage/src/test/java/org/apache/tajo/storage/TestFileSystems.java b/tajo-storage/src/test/java/org/apache/tajo/storage/TestFileSystems.java
index 974cc9f..302e0da 100644
--- a/tajo-storage/src/test/java/org/apache/tajo/storage/TestFileSystems.java
+++ b/tajo-storage/src/test/java/org/apache/tajo/storage/TestFileSystems.java
@@ -21,6 +21,8 @@ package org.apache.tajo.storage;
 import org.apache.hadoop.fs.FileStatus;
 import org.apache.hadoop.fs.FileSystem;
 import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3.S3FileSystem;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
 import org.apache.tajo.catalog.CatalogUtil;
 import org.apache.tajo.catalog.Schema;
 import org.apache.tajo.catalog.TableMeta;
@@ -32,7 +34,6 @@ import org.apache.tajo.datum.DatumFactory;
 import org.apache.tajo.storage.fragment.FileFragment;
 import org.apache.tajo.storage.s3.InMemoryFileSystemStore;
 import org.apache.tajo.storage.s3.SmallBlockS3FileSystem;
-import org.junit.After;
 import org.junit.Test;
 import org.junit.runner.RunWith;
 import org.junit.runners.Parameterized;
@@ -44,6 +45,7 @@ import java.util.Collection;
 import java.util.List;
 
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
 
 @RunWith(Parameterized.class)
 public class TestFileSystems {
@@ -58,10 +60,13 @@ public class TestFileSystems {
 
   public TestFileSystems(FileSystem fs) throws IOException {
     conf = new TajoConf();
-    sm = StorageManagerFactory.getStorageManager(conf);
 
+    if(fs instanceof S3FileSystem){
+      conf.set(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, "10");
+      fs.initialize(URI.create(fs.getScheme() + ":///"), conf);
+    }
     this.fs = fs;
-    fs.initialize(URI.create(fs.getScheme() + ":///"), conf);
+    sm = StorageManagerFactory.getStorageManager(conf);
     testDir = getTestDir(this.fs, TEST_PATH);
   }
 
@@ -75,10 +80,6 @@ public class TestFileSystems {
     return fs.makeQualified(path);
   }
 
-  @After
-  public void tearDown() throws Exception {
-  }
-
   @Parameterized.Parameters
   public static Collection<Object[]> generateParameters() {
     return Arrays.asList(new Object[][] {
@@ -119,9 +120,10 @@ public class TestFileSystems {
 
     List<FileFragment> splits = sm.getSplits("table", meta, schema, path);
     int splitSize = (int) Math.ceil(fileStatus.getLen() / (double) fileStatus.getBlockSize());
-    assertEquals(splits.size(), splitSize);
+    assertEquals(splitSize, splits.size());
 
+    for (FileFragment fragment : splits) {
+      assertTrue(fragment.getEndKey() <= fileStatus.getBlockSize());
+    }
   }
-
-
 }

http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/2761436b/tajo-storage/src/test/resources/storage-default.xml
----------------------------------------------------------------------
diff --git a/tajo-storage/src/test/resources/storage-default.xml b/tajo-storage/src/test/resources/storage-default.xml
index d825c4b..304af10 100644
--- a/tajo-storage/src/test/resources/storage-default.xml
+++ b/tajo-storage/src/test/resources/storage-default.xml
@@ -38,21 +38,6 @@
   </property>
 
   <property>
-    <name>fs.block.size</name>
-    <value>10</value>
-  </property>
-
-  <property>
-    <name>fs.local.block.size</name>
-    <value>10</value>
-  </property>
-
-  <property>
-    <name>tajo.min.split.size</name>
-    <value>10</value>
-  </property>
-
-  <property>
     <name>fs.s3.impl</name>
     <value>org.apache.tajo.storage.s3.SmallBlockS3FileSystem</value>
   </property>