You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by jh...@apache.org on 2014/02/05 07:38:18 UTC
git commit: TAJO-582: Invalid split calculation. (jinho)
Updated Branches:
refs/heads/master 5177dcfa4 -> 2761436b2
TAJO-582: Invalid split calculation. (jinho)
Project: http://git-wip-us.apache.org/repos/asf/incubator-tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-tajo/commit/2761436b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-tajo/tree/2761436b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-tajo/diff/2761436b
Branch: refs/heads/master
Commit: 2761436b2725ee983d28ad44c7f83475abd09628
Parents: 5177dcf
Author: jinossy <ji...@gmail.com>
Authored: Wed Feb 5 15:37:36 2014 +0900
Committer: jinossy <ji...@gmail.com>
Committed: Wed Feb 5 15:37:36 2014 +0900
----------------------------------------------------------------------
CHANGES.txt | 2 +
.../java/org/apache/tajo/conf/TajoConf.java | 3 +-
.../tajo/storage/AbstractStorageManager.java | 42 ++++++--------------
.../apache/tajo/storage/TestFileSystems.java | 22 +++++-----
.../src/test/resources/storage-default.xml | 15 -------
5 files changed, 27 insertions(+), 57 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/2761436b/CHANGES.txt
----------------------------------------------------------------------
diff --git a/CHANGES.txt b/CHANGES.txt
index ac08b75..9cffeee 100644
--- a/CHANGES.txt
+++ b/CHANGES.txt
@@ -246,6 +246,8 @@ Release 0.8.0 - unreleased
BUG FIXES
+ TAJO-582: Invalid split calculation. (jinho)
+
TAJO-581: Inline view on column partitioned table causes NPE. (hyunsik)
TAJO-577: Support S3FileSystem split. (Yongjun Park via jihoon)
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/2761436b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
index 1699848..d465ca3 100644
--- a/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
+++ b/tajo-common/src/main/java/org/apache/tajo/conf/TajoConf.java
@@ -142,8 +142,7 @@ public class TajoConf extends Configuration {
// Storage Configuration
//////////////////////////////////
RAWFILE_SYNC_INTERVAL("rawfile.sync.interval", null),
- MINIMUM_SPLIT_SIZE("tajo.min.split.size", (long) 536870912),
- MAXIMUM_SPLIT_SIZE("tajo.max.split.size", (long) 67108864),
+ MINIMUM_SPLIT_SIZE("tajo.min.split.size", (long) 1),
// for RCFile
HIVEUSEEXPLICITRCFILEHEADER("tajo.exec.rcfile.use.explicit.header", true),
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/2761436b/tajo-storage/src/main/java/org/apache/tajo/storage/AbstractStorageManager.java
----------------------------------------------------------------------
diff --git a/tajo-storage/src/main/java/org/apache/tajo/storage/AbstractStorageManager.java b/tajo-storage/src/main/java/org/apache/tajo/storage/AbstractStorageManager.java
index 122e639..a7ed981 100644
--- a/tajo-storage/src/main/java/org/apache/tajo/storage/AbstractStorageManager.java
+++ b/tajo-storage/src/main/java/org/apache/tajo/storage/AbstractStorageManager.java
@@ -384,15 +384,6 @@ public abstract class AbstractStorageManager {
}
/**
- * Get the lower bound on split size imposed by the format.
- *
- * @return the number of bytes of the minimal split for this format
- */
- protected long getFormatMinSplitSize() {
- return 1;
- }
-
- /**
* Is the given filename splitable? Usually, true, but if the file is
* stream compressed, it will not be.
* <p/>
@@ -485,15 +476,6 @@ public abstract class AbstractStorageManager {
}
/**
- * Get the maximum split size.
- *
- * @return the maximum number of bytes a split can include
- */
- public long getMaxSplitSize() {
- return conf.getLongVar(TajoConf.ConfVars.MAXIMUM_SPLIT_SIZE);
- }
-
- /**
* Get the minimum split size
*
* @return the minimum number of bytes that can be in a split
@@ -556,9 +538,6 @@ public abstract class AbstractStorageManager {
public List<FileFragment> getSplits(String tableName, TableMeta meta, Schema schema, Path inputPath) throws IOException {
// generate splits'
- long minSize = Math.max(getFormatMinSplitSize(), getMinSplitSize());
- long maxSize = getMaxSplitSize();
-
List<FileFragment> splits = new ArrayList<FileFragment>();
FileSystem fs = inputPath.getFileSystem(conf);
List<FileStatus> files;
@@ -596,20 +575,23 @@ public abstract class AbstractStorageManager {
} else {
if (splittable) {
- // for s3
- long blockSize = file.getBlockSize();
- long splitSize = computeSplitSize(blockSize, minSize, maxSize);
+ long minSize = Math.max(getMinSplitSize(), 1);
+
+ long blockSize = file.getBlockSize(); // s3n rest api contained block size but blockLocations is one
+ long splitSize = Math.max(minSize, blockSize);
long bytesRemaining = length;
- while (((double) bytesRemaining)/splitSize > SPLIT_SLOP) {
- int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
- splits.add(makeSplit(tableName, meta, path, length-bytesRemaining, splitSize,
+
+ // for s3
+ while (((double) bytesRemaining) / splitSize > SPLIT_SLOP) {
+ int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
+ splits.add(makeSplit(tableName, meta, path, length - bytesRemaining, splitSize,
blkLocations[blkIndex].getHosts()));
bytesRemaining -= splitSize;
}
- if (bytesRemaining != 0) {
- int blkIndex = getBlockIndex(blkLocations, length-bytesRemaining);
- splits.add(makeSplit(tableName, meta, path, length-bytesRemaining, bytesRemaining,
+ if (bytesRemaining > 0) {
+ int blkIndex = getBlockIndex(blkLocations, length - bytesRemaining);
+ splits.add(makeSplit(tableName, meta, path, length - bytesRemaining, bytesRemaining,
blkLocations[blkIndex].getHosts()));
}
} else { // Non splittable
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/2761436b/tajo-storage/src/test/java/org/apache/tajo/storage/TestFileSystems.java
----------------------------------------------------------------------
diff --git a/tajo-storage/src/test/java/org/apache/tajo/storage/TestFileSystems.java b/tajo-storage/src/test/java/org/apache/tajo/storage/TestFileSystems.java
index 974cc9f..302e0da 100644
--- a/tajo-storage/src/test/java/org/apache/tajo/storage/TestFileSystems.java
+++ b/tajo-storage/src/test/java/org/apache/tajo/storage/TestFileSystems.java
@@ -21,6 +21,8 @@ package org.apache.tajo.storage;
import org.apache.hadoop.fs.FileStatus;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.fs.s3.S3FileSystem;
+import org.apache.hadoop.hdfs.DFSConfigKeys;
import org.apache.tajo.catalog.CatalogUtil;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.catalog.TableMeta;
@@ -32,7 +34,6 @@ import org.apache.tajo.datum.DatumFactory;
import org.apache.tajo.storage.fragment.FileFragment;
import org.apache.tajo.storage.s3.InMemoryFileSystemStore;
import org.apache.tajo.storage.s3.SmallBlockS3FileSystem;
-import org.junit.After;
import org.junit.Test;
import org.junit.runner.RunWith;
import org.junit.runners.Parameterized;
@@ -44,6 +45,7 @@ import java.util.Collection;
import java.util.List;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertTrue;
@RunWith(Parameterized.class)
public class TestFileSystems {
@@ -58,10 +60,13 @@ public class TestFileSystems {
public TestFileSystems(FileSystem fs) throws IOException {
conf = new TajoConf();
- sm = StorageManagerFactory.getStorageManager(conf);
+ if(fs instanceof S3FileSystem){
+ conf.set(DFSConfigKeys.DFS_BLOCK_SIZE_KEY, "10");
+ fs.initialize(URI.create(fs.getScheme() + ":///"), conf);
+ }
this.fs = fs;
- fs.initialize(URI.create(fs.getScheme() + ":///"), conf);
+ sm = StorageManagerFactory.getStorageManager(conf);
testDir = getTestDir(this.fs, TEST_PATH);
}
@@ -75,10 +80,6 @@ public class TestFileSystems {
return fs.makeQualified(path);
}
- @After
- public void tearDown() throws Exception {
- }
-
@Parameterized.Parameters
public static Collection<Object[]> generateParameters() {
return Arrays.asList(new Object[][] {
@@ -119,9 +120,10 @@ public class TestFileSystems {
List<FileFragment> splits = sm.getSplits("table", meta, schema, path);
int splitSize = (int) Math.ceil(fileStatus.getLen() / (double) fileStatus.getBlockSize());
- assertEquals(splits.size(), splitSize);
+ assertEquals(splitSize, splits.size());
+ for (FileFragment fragment : splits) {
+ assertTrue(fragment.getEndKey() <= fileStatus.getBlockSize());
+ }
}
-
-
}
http://git-wip-us.apache.org/repos/asf/incubator-tajo/blob/2761436b/tajo-storage/src/test/resources/storage-default.xml
----------------------------------------------------------------------
diff --git a/tajo-storage/src/test/resources/storage-default.xml b/tajo-storage/src/test/resources/storage-default.xml
index d825c4b..304af10 100644
--- a/tajo-storage/src/test/resources/storage-default.xml
+++ b/tajo-storage/src/test/resources/storage-default.xml
@@ -38,21 +38,6 @@
</property>
<property>
- <name>fs.block.size</name>
- <value>10</value>
- </property>
-
- <property>
- <name>fs.local.block.size</name>
- <value>10</value>
- </property>
-
- <property>
- <name>tajo.min.split.size</name>
- <value>10</value>
- </property>
-
- <property>
<name>fs.s3.impl</name>
<value>org.apache.tajo.storage.s3.SmallBlockS3FileSystem</value>
</property>