You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@tajo.apache.org by jh...@apache.org on 2015/08/10 07:29:33 UTC
tajo git commit: TAJO-1743: Improve calculation of intermediate table
statistics.
Repository: tajo
Updated Branches:
refs/heads/master f868c0e23 -> f87f66729
TAJO-1743: Improve calculation of intermediate table statistics.
Closes #678
Project: http://git-wip-us.apache.org/repos/asf/tajo/repo
Commit: http://git-wip-us.apache.org/repos/asf/tajo/commit/f87f6672
Tree: http://git-wip-us.apache.org/repos/asf/tajo/tree/f87f6672
Diff: http://git-wip-us.apache.org/repos/asf/tajo/diff/f87f6672
Branch: refs/heads/master
Commit: f87f6672991cabfcb80332d0a0fb9869751ea665
Parents: f868c0e
Author: Jinho Kim <jh...@apache.org>
Authored: Mon Aug 10 14:28:57 2015 +0900
Committer: Jinho Kim <jh...@apache.org>
Committed: Mon Aug 10 14:28:57 2015 +0900
----------------------------------------------------------------------
CHANGES | 2 ++
.../apache/tajo/storage/StorageConstants.java | 3 +++
.../java/org/apache/tajo/util/StringUtils.java | 22 ++++++++++++++++++++
.../engine/planner/PhysicalPlannerImpl.java | 5 ++++-
.../physical/RangeShuffleFileWriteExec.java | 19 +++++++++++------
.../tajo/engine/query/TestCreateTable.java | 2 +-
.../org/apache/tajo/plan/util/PlannerUtil.java | 13 +++++++++++-
.../java/org/apache/tajo/storage/RawFile.java | 9 +++++++-
.../java/org/apache/tajo/storage/RowFile.java | 9 +++++++-
.../apache/tajo/storage/avro/AvroAppender.java | 6 +-----
.../tajo/storage/parquet/ParquetAppender.java | 5 -----
.../storage/rawfile/DirectRawFileWriter.java | 18 ++++++++++------
.../org/apache/tajo/storage/rcfile/RCFile.java | 14 -------------
.../sequencefile/SequenceFileAppender.java | 22 --------------------
14 files changed, 86 insertions(+), 63 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/CHANGES
----------------------------------------------------------------------
diff --git a/CHANGES b/CHANGES
index b478ffb..2b873b4 100644
--- a/CHANGES
+++ b/CHANGES
@@ -33,6 +33,8 @@ Release 0.11.0 - unreleased
IMPROVEMENT
+ TAJO-1743: Improve calculation of intermediate table statistics. (jinho)
+
TAJO-1699: Tajo Java Client version 2. (hyunsik)
TAJO-1721: Separate routine for CREATE TABLE from DDLExecutor. (hyunsik)
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java b/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java
index 6df6228..75af292 100644
--- a/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java
+++ b/tajo-common/src/main/java/org/apache/tajo/storage/StorageConstants.java
@@ -96,6 +96,9 @@ public class StorageConstants {
public static final String AVRO_SCHEMA_LITERAL = "avro.schema.literal";
public static final String AVRO_SCHEMA_URL = "avro.schema.url";
+ // Internal storage properties -------------------------------------------------
+ public static final String SHUFFLE_TYPE = "shuffle.type";
+
static {
PARQUET_DEFAULT_BLOCK_SIZE = Integer.toString(DEFAULT_BLOCK_SIZE);
PARQUET_DEFAULT_PAGE_SIZE = Integer.toString(DEFAULT_PAGE_SIZE);
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java
----------------------------------------------------------------------
diff --git a/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java b/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java
index 018c62a..61dc855 100644
--- a/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java
+++ b/tajo-common/src/main/java/org/apache/tajo/util/StringUtils.java
@@ -444,4 +444,26 @@ public class StringUtils {
return sb.toString();
}
+
+ /**
+ * <p>Checks if a String is empty ("") or null.</p>
+ *
+ * <pre>
+ * StringUtils.isEmpty(null) = true
+ * StringUtils.isEmpty("") = true
+ * StringUtils.isEmpty(" ") = false
+ * StringUtils.isEmpty("bob") = false
+ * StringUtils.isEmpty(" bob ") = false
+ * </pre>
+ *
+ * <p>NOTE: This method changed in Lang version 2.0.
+ * It no longer trims the String.
+ * That functionality is available in isBlank().</p>
+ *
+ * @param str the String to check, may be null
+ * @return <code>true</code> if the String is empty or null
+ */
+ public static boolean isEmpty(String str) {
+ return str == null || str.length() == 0;
+ }
}
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java
index ef3d039..7b1b1d7 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/PhysicalPlannerImpl.java
@@ -770,6 +770,9 @@ public class PhysicalPlannerImpl implements PhysicalPlanner {
*/
public PhysicalExec createShuffleFileWritePlan(TaskAttemptContext ctx,
ShuffleFileWriteNode plan, PhysicalExec subOp) throws IOException {
+ plan.getOptions().set(StorageConstants.SHUFFLE_TYPE,
+ PlannerUtil.getShuffleType(ctx.getDataChannel().getShuffleType()));
+
switch (plan.getShuffleType()) {
case HASH_SHUFFLE:
case SCATTERED_HASH_SHUFFLE:
@@ -788,7 +791,7 @@ public class PhysicalPlannerImpl implements PhysicalPlanner {
specs[i] = new SortSpec(columns[i]);
}
}
- return new RangeShuffleFileWriteExec(ctx, subOp, plan.getInSchema(), plan.getInSchema(), sortSpecs);
+ return new RangeShuffleFileWriteExec(ctx, plan, subOp, sortSpecs);
case NONE_SHUFFLE:
// if there is no given NULL CHAR property in the table property and the query is neither CTAS or INSERT,
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/RangeShuffleFileWriteExec.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/RangeShuffleFileWriteExec.java b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/RangeShuffleFileWriteExec.java
index ac3d1b2..4d01b00 100644
--- a/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/RangeShuffleFileWriteExec.java
+++ b/tajo-core/src/main/java/org/apache/tajo/engine/planner/physical/RangeShuffleFileWriteExec.java
@@ -30,6 +30,7 @@ import org.apache.tajo.catalog.SortSpec;
import org.apache.tajo.catalog.TableMeta;
import org.apache.tajo.conf.TajoConf;
import org.apache.tajo.engine.planner.KeyProjector;
+import org.apache.tajo.plan.logical.ShuffleFileWriteNode;
import org.apache.tajo.plan.util.PlannerUtil;
import org.apache.tajo.storage.*;
import org.apache.tajo.storage.index.bst.BSTIndex;
@@ -56,14 +57,19 @@ public class RangeShuffleFileWriteExec extends UnaryPhysicalExec {
private KeyProjector keyProjector;
public RangeShuffleFileWriteExec(final TaskAttemptContext context,
- final PhysicalExec child, final Schema inSchema, final Schema outSchema,
- final SortSpec[] sortSpecs) throws IOException {
- super(context, inSchema, outSchema, child);
+ final ShuffleFileWriteNode plan,
+ final PhysicalExec child, final SortSpec[] sortSpecs) throws IOException {
+ super(context, plan.getInSchema(), plan.getInSchema(), child);
this.sortSpecs = sortSpecs;
+
+ if (plan.hasOptions()) {
+ this.meta = CatalogUtil.newTableMeta(plan.getStorageType(), plan.getOptions());
+ } else {
+ this.meta = CatalogUtil.newTableMeta(plan.getStorageType());
+ }
}
public void init() throws IOException {
- super.init();
keySchema = PlannerUtil.sortSpecsToSchema(sortSpecs);
keyProjector = new KeyProjector(inSchema, keySchema.toArray());
@@ -72,8 +78,7 @@ public class RangeShuffleFileWriteExec extends UnaryPhysicalExec {
this.comp = new BaseTupleComparator(keySchema, sortSpecs);
Path storeTablePath = new Path(context.getWorkDir(), "output");
LOG.info("Output data directory: " + storeTablePath);
- this.meta = CatalogUtil.newTableMeta(context.getDataChannel() != null ?
- context.getDataChannel().getStoreType() : "RAW");
+
FileSystem fs = new RawLocalFileSystem();
fs.mkdirs(storeTablePath);
this.appender = (FileAppender) ((FileTablespace) TablespaceManager.getDefault())
@@ -84,6 +89,8 @@ public class RangeShuffleFileWriteExec extends UnaryPhysicalExec {
BSTIndex.TWO_LEVEL_INDEX, keySchema, comp);
this.indexWriter.setLoadNum(100);
this.indexWriter.open();
+
+ super.init();
}
@Override
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java
----------------------------------------------------------------------
diff --git a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java
index 08aabf4..f34cbce 100644
--- a/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java
+++ b/tajo-core/src/test/java/org/apache/tajo/engine/query/TestCreateTable.java
@@ -448,7 +448,7 @@ public class TestCreateTable extends QueryTestCaseBase {
res = executeString(
"INSERT INTO LOCATION '/testCreateExternalTable1FromOnlyPath' SELECT * FROM default.lineitem");
res = executeString(
- "CREATE EXTERNAL TABLE table1 (col1 INTEGER) USING CSV LOCATION '/testCreateExternalTable1FromOnlyPath';");
+ "CREATE EXTERNAL TABLE table1 (col1 INTEGER) USING TEXT LOCATION '/testCreateExternalTable1FromOnlyPath';");
} catch (Throwable t) {
if (res != null) {
res.close();
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-plan/src/main/java/org/apache/tajo/plan/util/PlannerUtil.java
----------------------------------------------------------------------
diff --git a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PlannerUtil.java b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PlannerUtil.java
index c4a4367..445dc8a 100644
--- a/tajo-plan/src/main/java/org/apache/tajo/plan/util/PlannerUtil.java
+++ b/tajo-plan/src/main/java/org/apache/tajo/plan/util/PlannerUtil.java
@@ -29,14 +29,15 @@ import org.apache.tajo.exception.TajoException;
import org.apache.tajo.exception.TajoInternalError;
import org.apache.tajo.plan.InvalidQueryException;
import org.apache.tajo.plan.LogicalPlan;
-import org.apache.tajo.plan.PlanningException;
import org.apache.tajo.plan.Target;
import org.apache.tajo.plan.expr.*;
import org.apache.tajo.plan.logical.*;
+import org.apache.tajo.plan.serder.PlanProto.ShuffleType;
import org.apache.tajo.plan.visitor.BasicLogicalPlanVisitor;
import org.apache.tajo.plan.visitor.ExplainLogicalPlanVisitor;
import org.apache.tajo.plan.visitor.SimpleAlgebraVisitor;
import org.apache.tajo.util.KeyValueSet;
+import org.apache.tajo.util.StringUtils;
import org.apache.tajo.util.TUtil;
import java.io.IOException;
@@ -864,6 +865,16 @@ public class PlannerUtil {
return explains.toString();
}
+ public static String getShuffleType(ShuffleType shuffleType) {
+ if (shuffleType == null) return ShuffleType.NONE_SHUFFLE.toString();
+ return shuffleType.toString();
+ }
+
+ public static ShuffleType getShuffleType(String shuffleType) {
+ if (StringUtils.isEmpty(shuffleType)) return ShuffleType.NONE_SHUFFLE;
+ return ShuffleType.valueOf(shuffleType);
+ }
+
public static boolean isFileStorageType(String storageType) {
if (storageType.equalsIgnoreCase("hbase")) {
return false;
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java
index e3594f9..20c5d6d 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RawFile.java
@@ -35,6 +35,8 @@ import org.apache.tajo.datum.NullDatum;
import org.apache.tajo.datum.ProtobufDatumFactory;
import org.apache.tajo.exception.UnsupportedException;
import org.apache.tajo.plan.expr.EvalNode;
+import org.apache.tajo.plan.serder.PlanProto.ShuffleType;
+import org.apache.tajo.plan.util.PlannerUtil;
import org.apache.tajo.storage.fragment.Fragment;
import org.apache.tajo.unit.StorageUnit;
import org.apache.tajo.util.BitArray;
@@ -472,6 +474,7 @@ public class RawFile {
private int headerSize = 0;
private static final int RECORD_SIZE = 4;
private long pos;
+ private ShuffleType shuffleType;
private TableStatistics stats;
@@ -511,6 +514,9 @@ public class RawFile {
if (enabledStats) {
this.stats = new TableStatistics(this.schema);
+ this.shuffleType = PlannerUtil.getShuffleType(
+ meta.getOption(StorageConstants.SHUFFLE_TYPE,
+ PlannerUtil.getShuffleType(ShuffleType.NONE_SHUFFLE)));
}
super.init();
@@ -640,7 +646,8 @@ public class RawFile {
// reset the null flags
nullFlags.clear();
for (int i = 0; i < schema.size(); i++) {
- if (enabledStats) {
+ if (shuffleType == ShuffleType.RANGE_SHUFFLE) {
+ // it is to calculate min/max values, and it is only used for the intermediate file.
stats.analyzeField(i, t);
}
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RowFile.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RowFile.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RowFile.java
index ef597ea..dbe438c 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RowFile.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/RowFile.java
@@ -36,6 +36,8 @@ import org.apache.tajo.datum.Datum;
import org.apache.tajo.datum.DatumFactory;
import org.apache.tajo.exception.UnsupportedException;
import org.apache.tajo.plan.expr.EvalNode;
+import org.apache.tajo.plan.serder.PlanProto.ShuffleType;
+import org.apache.tajo.plan.util.PlannerUtil;
import org.apache.tajo.storage.exception.AlreadyExistsStorageException;
import org.apache.tajo.storage.fragment.Fragment;
import org.apache.tajo.util.BitArray;
@@ -322,6 +324,7 @@ public class RowFile {
private BitArray nullFlags;
// statistics
private TableStatistics stats;
+ private ShuffleType shuffleType;
public RowFileAppender(Configuration conf, final TaskAttemptId taskAttemptId,
final Schema schema, final TableMeta meta, final Path workDir)
@@ -364,6 +367,9 @@ public class RowFile {
if (enabledStats) {
this.stats = new TableStatistics(this.schema);
+ this.shuffleType = PlannerUtil.getShuffleType(
+ meta.getOption(StorageConstants.SHUFFLE_TYPE,
+ PlannerUtil.getShuffleType(ShuffleType.NONE_SHUFFLE)));
}
}
@@ -383,7 +389,8 @@ public class RowFile {
nullFlags.clear();
for (int i = 0; i < schema.size(); i++) {
- if (enabledStats) {
+ if (shuffleType == ShuffleType.RANGE_SHUFFLE) {
+ // it is to calculate min/max values, and it is only used for the intermediate file.
stats.analyzeField(i, t);
}
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/avro/AvroAppender.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/avro/AvroAppender.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/avro/AvroAppender.java
index 2782955..0c67320 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/avro/AvroAppender.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/avro/AvroAppender.java
@@ -30,7 +30,6 @@ import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.IOUtils;
import org.apache.tajo.TaskAttemptId;
-import org.apache.tajo.catalog.Column;
import org.apache.tajo.catalog.TableMeta;
import org.apache.tajo.catalog.statistics.TableStats;
import org.apache.tajo.storage.FileAppender;
@@ -138,10 +137,7 @@ public class AvroAppender extends FileAppender {
public void addTuple(Tuple tuple) throws IOException {
GenericRecord record = new GenericData.Record(avroSchema);
for (int i = 0; i < schema.size(); ++i) {
- Column column = schema.getColumn(i);
- if (enabledStats) {
- stats.analyzeField(i, tuple);
- }
+
Object value;
Schema.Field avroField = avroFields.get(i);
Schema.Type avroType = avroField.schema().getType();
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/ParquetAppender.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/ParquetAppender.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/ParquetAppender.java
index 4a8b256..6cb99d1 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/ParquetAppender.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/parquet/ParquetAppender.java
@@ -106,11 +106,6 @@ public class ParquetAppender extends FileAppender {
*/
@Override
public void addTuple(Tuple tuple) throws IOException {
- if (enabledStats) {
- for (int i = 0; i < schema.size(); ++i) {
- stats.analyzeField(i, tuple);
- }
- }
writer.write(tuple);
if (enabledStats) {
stats.incrementRow();
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rawfile/DirectRawFileWriter.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rawfile/DirectRawFileWriter.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rawfile/DirectRawFileWriter.java
index bb81d6e..912649f 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rawfile/DirectRawFileWriter.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rawfile/DirectRawFileWriter.java
@@ -31,16 +31,17 @@ import org.apache.tajo.catalog.Schema;
import org.apache.tajo.catalog.TableMeta;
import org.apache.tajo.catalog.statistics.TableStats;
import org.apache.tajo.common.TajoDataTypes;
-import org.apache.tajo.storage.FileAppender;
-import org.apache.tajo.storage.RowStoreUtil;
-import org.apache.tajo.storage.TableStatistics;
-import org.apache.tajo.storage.Tuple;
+import org.apache.tajo.plan.serder.PlanProto.ShuffleType;
+import org.apache.tajo.plan.util.PlannerUtil;
+import org.apache.tajo.storage.*;
import org.apache.tajo.tuple.BaseTupleBuilder;
import org.apache.tajo.tuple.offheap.OffHeapRowBlock;
import org.apache.tajo.tuple.offheap.UnSafeTuple;
import org.apache.tajo.unit.StorageUnit;
-import java.io.*;
+import java.io.File;
+import java.io.IOException;
+import java.io.RandomAccessFile;
import java.nio.ByteBuffer;
import java.nio.channels.FileChannel;
@@ -56,6 +57,7 @@ public class DirectRawFileWriter extends FileAppender {
private long pos;
private TableStatistics stats;
+ private ShuffleType shuffleType;
private BaseTupleBuilder builder;
@@ -96,6 +98,9 @@ public class DirectRawFileWriter extends FileAppender {
if (enabledStats) {
this.stats = new TableStatistics(this.schema);
+ this.shuffleType = PlannerUtil.getShuffleType(
+ meta.getOption(StorageConstants.SHUFFLE_TYPE,
+ PlannerUtil.getShuffleType(ShuffleType.NONE_SHUFFLE)));
}
builder = new BaseTupleBuilder(schema);
@@ -149,7 +154,8 @@ public class DirectRawFileWriter extends FileAppender {
@Override
public void addTuple(Tuple t) throws IOException {
- if (enabledStats) {
+ if (shuffleType == ShuffleType.RANGE_SHUFFLE) {
+ // it is to calculate min/max values, and it is only used for the intermediate file.
for (int i = 0; i < schema.size(); i++) {
stats.analyzeField(i, t);
}
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java
index 99c727c..4cd51e6 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/rcfile/RCFile.java
@@ -32,7 +32,6 @@ import org.apache.hadoop.util.ReflectionUtils;
import org.apache.tajo.TaskAttemptId;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.catalog.TableMeta;
-import org.apache.tajo.catalog.proto.CatalogProtos;
import org.apache.tajo.catalog.statistics.TableStats;
import org.apache.tajo.conf.TajoConf;
import org.apache.tajo.datum.Datum;
@@ -731,15 +730,6 @@ public class RCFile {
throw new FileNotFoundException(path.toString());
}
- //determine the intermediate file type
- String store = conf.get(TajoConf.ConfVars.SHUFFLE_FILE_FORMAT.varname,
- TajoConf.ConfVars.SHUFFLE_FILE_FORMAT.defaultVal);
- if (enabledStats && CatalogProtos.StoreType.RCFILE == CatalogProtos.StoreType.valueOf(store.toUpperCase())) {
- isShuffle = true;
- } else {
- isShuffle = false;
- }
-
if (this.meta.containsOption(StorageConstants.COMPRESSION_CODEC)) {
String codecClassname = this.meta.getOption(StorageConstants.COMPRESSION_CODEC);
try {
@@ -900,10 +890,6 @@ public class RCFile {
for (int i = 0; i < size; i++) {
int length = columnBuffers[i].append(tuple, i);
columnBufferSize += length;
- if (isShuffle) {
- // it is to calculate min/max values, and it is only used for the intermediate file.
- stats.analyzeField(i, tuple);
- }
}
if (size < columnNumber) {
http://git-wip-us.apache.org/repos/asf/tajo/blob/f87f6672/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java
----------------------------------------------------------------------
diff --git a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java
index 9b09d78..ad622fe 100644
--- a/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java
+++ b/tajo-storage/tajo-storage-hdfs/src/main/java/org/apache/tajo/storage/sequencefile/SequenceFileAppender.java
@@ -32,9 +32,7 @@ import org.apache.hadoop.io.compress.CompressionCodecFactory;
import org.apache.tajo.TaskAttemptId;
import org.apache.tajo.catalog.Schema;
import org.apache.tajo.catalog.TableMeta;
-import org.apache.tajo.catalog.proto.CatalogProtos;
import org.apache.tajo.catalog.statistics.TableStats;
-import org.apache.tajo.conf.TajoConf;
import org.apache.tajo.datum.NullDatum;
import org.apache.tajo.datum.ProtobufDatum;
import org.apache.tajo.storage.*;
@@ -86,15 +84,6 @@ public class SequenceFileAppender extends FileAppender {
this.fs = path.getFileSystem(conf);
- //determine the intermediate file type
- String store = conf.get(TajoConf.ConfVars.SHUFFLE_FILE_FORMAT.varname,
- TajoConf.ConfVars.SHUFFLE_FILE_FORMAT.defaultVal);
- if (enabledStats && CatalogProtos.StoreType.SEQUENCEFILE == CatalogProtos.StoreType.valueOf(store.toUpperCase())) {
- isShuffle = true;
- } else {
- isShuffle = false;
- }
-
this.delimiter = StringEscapeUtils.unescapeJava(this.meta.getOption(StorageConstants.SEQUENCEFILE_DELIMITER,
StorageConstants.DEFAULT_FIELD_DELIMITER)).charAt(0);
this.columnNum = schema.size();
@@ -194,11 +183,6 @@ public class SequenceFileAppender extends FileAppender {
}
serde.serialize(j, tuple, os, nullChars);
-
- if (isShuffle) {
- // it is to calculate min/max values, and it is only used for the intermediate file.
- stats.analyzeField(j, tuple);
- }
}
lasti = i + 1;
nullByte = 0;
@@ -216,12 +200,6 @@ public class SequenceFileAppender extends FileAppender {
if (columnNum -1 > i) {
os.write((byte) delimiter);
}
-
- if (isShuffle) {
- // it is to calculate min/max values, and it is only used for the intermediate file.
- stats.analyzeField(i, tuple);
- }
-
}
writer.append(EMPTY_KEY, new Text(os.toByteArray()));
}