You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ma...@apache.org on 2018/10/25 09:03:58 UTC
carbondata git commit: [CARBONDATA-2977] Write uncompress_size to
ChunkCompressMeta in the file
Repository: carbondata
Updated Branches:
refs/heads/master 33a6dc2ac -> e19c5da6d
[CARBONDATA-2977] Write uncompress_size to ChunkCompressMeta in the file
Currently total_uncompressed_size and total_compress_size in the ChunkCompressMeta in the carbondata file is always 0. This PR writes the
correct value to the file.
This closes #2772
Project: http://git-wip-us.apache.org/repos/asf/carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/carbondata/commit/e19c5da6
Tree: http://git-wip-us.apache.org/repos/asf/carbondata/tree/e19c5da6
Diff: http://git-wip-us.apache.org/repos/asf/carbondata/diff/e19c5da6
Branch: refs/heads/master
Commit: e19c5da6dbb07056b1053319d48a64a4b0715129
Parents: 33a6dc2
Author: Jacky Li <ja...@qq.com>
Authored: Thu Sep 27 00:39:29 2018 +0800
Committer: manishgupta88 <to...@gmail.com>
Committed: Thu Oct 25 14:38:28 2018 +0530
----------------------------------------------------------------------
.../core/datastore/page/ColumnPage.java | 39 +++++++++++
.../datastore/page/LocalDictColumnPage.java | 9 +++
.../page/UnsafeFixLengthColumnPage.java | 7 ++
.../datastore/page/VarLengthColumnPageBase.java | 5 ++
.../page/encoding/ColumnPageEncoder.java | 7 +-
.../core/util/CarbonMetadataUtil.java | 10 +--
.../apache/carbondata/tool/CarbonCliTest.java | 69 ++++----------------
7 files changed, 81 insertions(+), 65 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e19c5da6/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
index 8b9a9a5..e8097da 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/ColumnPage.java
@@ -724,6 +724,45 @@ public abstract class ColumnPage {
}
/**
+ * Return total page data length in bytes
+ */
+ public long getPageLengthInBytes() throws IOException {
+ DataType dataType = columnPageEncoderMeta.getStoreDataType();
+ if (dataType == DataTypes.BOOLEAN) {
+ return getBooleanPage().length;
+ } else if (dataType == DataTypes.BYTE) {
+ return getBytePage().length;
+ } else if (dataType == DataTypes.SHORT) {
+ return getShortPage().length * SHORT.getSizeInBytes();
+ } else if (dataType == DataTypes.SHORT_INT) {
+ return getShortIntPage().length;
+ } else if (dataType == DataTypes.INT) {
+ return getIntPage().length * INT.getSizeInBytes();
+ } else if (dataType == DataTypes.LONG) {
+ return getLongPage().length * LONG.getSizeInBytes();
+ } else if (dataType == DataTypes.FLOAT) {
+ return getFloatPage().length * FLOAT.getSizeInBytes();
+ } else if (dataType == DataTypes.DOUBLE) {
+ return getDoublePage().length * DOUBLE.getSizeInBytes();
+ } else if (DataTypes.isDecimal(dataType)) {
+ return getDecimalPage().length;
+ } else if (dataType == DataTypes.BYTE_ARRAY
+ && columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_PRIMITIVE) {
+ return getComplexChildrenLVFlattenedBytePage().length;
+ } else if (dataType == DataTypes.BYTE_ARRAY
+ && (columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_STRUCT
+ || columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.COMPLEX_ARRAY
+ || columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.PLAIN_LONG_VALUE
+ || columnPageEncoderMeta.getColumnSpec().getColumnType() == ColumnType.PLAIN_VALUE)) {
+ return getComplexParentFlattenedBytePage().length;
+ } else if (dataType == DataTypes.BYTE_ARRAY) {
+ return getLVFlattenedBytePage().length;
+ } else {
+ throw new UnsupportedOperationException("unsupport compress column page: " + dataType);
+ }
+ }
+
+ /**
* Compress page data using specified compressor
*/
public byte[] compress(Compressor compressor) throws MemoryException, IOException {
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e19c5da6/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java
index 3da154a..5cf2130 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/LocalDictColumnPage.java
@@ -344,4 +344,13 @@ public class LocalDictColumnPage extends ColumnPage {
@Override public void convertValue(ColumnPageValueConverter codec) {
throw new UnsupportedOperationException("Operation not supported");
}
+
+ @Override
+ public long getPageLengthInBytes() throws IOException {
+ if (null != pageLevelDictionary) {
+ return encodedDataColumnPage.getPageLengthInBytes();
+ } else {
+ return actualDataColumnPage.getPageLengthInBytes();
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e19c5da6/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java
index 7df29df..da0e487 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/UnsafeFixLengthColumnPage.java
@@ -534,6 +534,13 @@ public class UnsafeFixLengthColumnPage extends ColumnPage {
}
}
+ @Override
+ public long getPageLengthInBytes() {
+ // For unsafe column page, we are always tracking the total length
+ // so return it directly instead of calculate it again (super class implementation)
+ return totalLength;
+ }
+
@Override public byte[] compress(Compressor compressor) throws MemoryException, IOException {
if (UnsafeMemoryManager.isOffHeap() && compressor.supportUnsafe()) {
// use raw compression and copy to byte[]
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e19c5da6/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
index 35d0009..39b8282 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/VarLengthColumnPageBase.java
@@ -518,4 +518,9 @@ public abstract class VarLengthColumnPageBase extends ColumnPage {
rowOffset = null;
}
}
+
+ @Override
+ public long getPageLengthInBytes() throws IOException {
+ return totalLength;
+ }
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e19c5da6/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
index f38aef2..32eb5ab 100644
--- a/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
+++ b/core/src/main/java/org/apache/carbondata/core/datastore/page/encoding/ColumnPageEncoder.java
@@ -109,9 +109,10 @@ public abstract class ColumnPageEncoder {
return dataChunk;
}
- private void fillBasicFields(ColumnPage inputPage, DataChunk2 dataChunk) {
- dataChunk.setChunk_meta(
- CarbonMetadataUtil.getChunkCompressorMeta(inputPage.getColumnCompressorName()));
+ private void fillBasicFields(ColumnPage inputPage, DataChunk2 dataChunk)
+ throws IOException {
+ dataChunk.setChunk_meta(CarbonMetadataUtil.getChunkCompressorMeta(inputPage,
+ dataChunk.getData_page_length()));
dataChunk.setNumberOfRowsInpage(inputPage.getPageSize());
dataChunk.setRowMajor(false);
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e19c5da6/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java b/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java
index b156ae6..9c82fa4 100644
--- a/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java
+++ b/core/src/main/java/org/apache/carbondata/core/util/CarbonMetadataUtil.java
@@ -26,6 +26,7 @@ import org.apache.carbondata.common.logging.LogServiceFactory;
import org.apache.carbondata.core.datastore.blocklet.BlockletEncodedColumnPage;
import org.apache.carbondata.core.datastore.blocklet.EncodedBlocklet;
import org.apache.carbondata.core.datastore.compression.CompressorFactory;
+import org.apache.carbondata.core.datastore.page.ColumnPage;
import org.apache.carbondata.core.datastore.page.encoding.EncodedColumnPage;
import org.apache.carbondata.core.datastore.page.statistics.SimpleStatsResult;
import org.apache.carbondata.core.datastore.page.statistics.TablePageStatistics;
@@ -313,15 +314,16 @@ public class CarbonMetadataUtil {
* before 1.5.0, we set a enum 'compression_codec';
* after 1.5.0, we use string 'compressor_name' instead
*/
- public static ChunkCompressionMeta getChunkCompressorMeta(String compressorName) {
+ public static ChunkCompressionMeta getChunkCompressorMeta(
+ ColumnPage inputPage, long encodedDataLength) throws IOException {
ChunkCompressionMeta chunkCompressionMeta = new ChunkCompressionMeta();
// we will not use this field any longer and will use compressor_name instead,
// but in thrift definition, this field is required so we cannot set it to null, otherwise
// it will cause deserialization error in runtime (required field cannot be null).
chunkCompressionMeta.setCompression_codec(CompressionCodec.DEPRECATED);
- chunkCompressionMeta.setCompressor_name(compressorName);
- chunkCompressionMeta.setTotal_compressed_size(0);
- chunkCompressionMeta.setTotal_uncompressed_size(0);
+ chunkCompressionMeta.setCompressor_name(inputPage.getColumnCompressorName());
+ chunkCompressionMeta.setTotal_compressed_size(encodedDataLength);
+ chunkCompressionMeta.setTotal_uncompressed_size(inputPage.getPageLengthInBytes());
return chunkCompressionMeta;
}
http://git-wip-us.apache.org/repos/asf/carbondata/blob/e19c5da6/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
----------------------------------------------------------------------
diff --git a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
index e526131..d53e3d9 100644
--- a/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
+++ b/tools/cli/src/test/java/org/apache/carbondata/tool/CarbonCliTest.java
@@ -77,7 +77,7 @@ public class CarbonCliTest {
output.contains(
"Input Folder: ./CarbonCliTest\n"
+ "## Summary\n"
- + "total: 6 blocks, 2 shards, 14 blocklets, 314 pages, 10,000,000 rows, 32.26MB\n"
+ + "total: 6 blocks, 2 shards, 14 blocklets, 314 pages, 10,000,000 rows, 32.27MB\n"
+ "avg: 5.38MB/block, 2.30MB/blocklet, 1,666,666 rows/block, 714,285 rows/blocklet"));
String[] args2 = {"-cmd", "summary", "-p", path, "-s"};
@@ -116,7 +116,7 @@ public class CarbonCliTest {
+ "1 1 25 800,000 2.58MB \n"
+ "2 0 25 800,000 2.58MB \n"
+ "2 1 25 800,000 2.58MB \n"
- + "2 2 7 200,000 660.74KB "));
+ + "2 2 7 200,000 660.79KB "));
String[] args5 = {"-cmd", "summary", "-p", path, "-c", "name"};
out = new ByteArrayOutputStream();
@@ -126,61 +126,13 @@ public class CarbonCliTest {
Assert.assertTrue(
output.contains(
"BLK BLKLT Meta Size Data Size LocalDict DictEntries DictSize AvgPageSize Min% Max% \n"
- + "0 0 1.72KB 295.89KB false 0 0.0B 11.77KB robot0 robot1 \n"
- + "0 1 1.72KB 295.89KB false 0 0.0B 11.77KB robot1 robot3 \n"
- + "1 0 1.72KB 295.89KB false 0 0.0B 11.77KB robot3 robot4 \n"
- + "1 1 1.72KB 295.89KB false 0 0.0B 11.77KB robot4 robot6 \n"
- + "2 0 1.72KB 295.89KB false 0 0.0B 11.77KB robot6 robot7 \n"
- + "2 1 1.72KB 295.89KB false 0 0.0B 11.77KB robot8 robot9 \n"
- + "2 2 492.0B 74.03KB false 0 0.0B 10.51KB robot9 robot9 "));
- }
-
- @Test
- public void testSummaryOutputAll() {
- String[] args = {"-cmd", "summary", "-p", path, "-a", "-c", "age"};
- ByteArrayOutputStream out = new ByteArrayOutputStream();
- PrintStream stream = new PrintStream(out);
- CarbonCli.run(args, stream);
- String output = new String(out.toByteArray());
- Assert.assertTrue(
- output.contains(
- "Input Folder: ./CarbonCliTest\n"
- + "## Summary\n"
- + "total: 6 blocks, 2 shards, 14 blocklets, 314 pages, 10,000,000 rows, 32.26MB\n"
- + "avg: 5.38MB/block, 2.30MB/blocklet, 1,666,666 rows/block, 714,285 rows/blocklet\n"));
-
- Assert.assertTrue(
- output.contains(
- "Column Name Data Type Column Type SortColumn Encoding Ordinal Id \n"
- + "name STRING dimension true [INVERTED_INDEX] 0 NA \n"
- + "age INT measure false [] 1 NA \n"));
-
- Assert.assertTrue(
- output.contains(
- "## Table Properties\n"
- + "schema file not found"));
-
- Assert.assertTrue(
- output.contains(
- "BLK BLKLT NumPages NumRows Size \n"
- + "0 0 25 800,000 2.58MB \n"
- + "0 1 25 800,000 2.58MB \n"
- + "1 0 25 800,000 2.58MB \n"
- + "1 1 25 800,000 2.58MB \n"
- + "2 0 25 800,000 2.58MB \n"
- + "2 1 25 800,000 2.58MB \n"
- + "2 2 7 200,000 660.74KB "));
-
- Assert.assertTrue(
- output.contains(
- "BLK BLKLT Meta Size Data Size LocalDict DictEntries DictSize AvgPageSize Min% Max% \n"
- + "0 0 2.90KB 4.87MB false 0 0.0B 93.76KB 0.0 100.0 \n"
- + "0 1 2.90KB 2.29MB false 0 0.0B 93.76KB 0.0 100.0 \n"
- + "1 0 2.90KB 4.87MB false 0 0.0B 93.76KB 0.0 100.0 \n"
- + "1 1 2.90KB 2.29MB false 0 0.0B 93.76KB 0.0 100.0 \n"
- + "2 0 2.90KB 5.52MB false 0 0.0B 93.76KB 0.0 100.0 \n"
- + "2 1 2.90KB 2.94MB false 0 0.0B 93.76KB 0.0 100.0 \n"
- + "2 2 830.0B 586.81KB false 0 0.0B 83.71KB 0.0 100.0 "));
+ + "0 0 1.81KB 295.98KB false 0 0.0B 11.77KB robot0 robot1 \n"
+ + "0 1 1.81KB 295.99KB false 0 0.0B 11.77KB robot1 robot3 \n"
+ + "1 0 1.81KB 295.98KB false 0 0.0B 11.77KB robot3 robot4 \n"
+ + "1 1 1.81KB 295.99KB false 0 0.0B 11.77KB robot4 robot6 \n"
+ + "2 0 1.81KB 295.98KB false 0 0.0B 11.77KB robot6 robot7 \n"
+ + "2 1 1.81KB 295.98KB false 0 0.0B 11.77KB robot8 robot9 \n"
+ + "2 2 519.0B 74.06KB false 0 0.0B 10.51KB robot9 robot9 "));
}
@Test
@@ -190,10 +142,11 @@ public class CarbonCliTest {
PrintStream stream = new PrintStream(out);
CarbonCli.run(args, stream);
String output = new String(out.toByteArray());
+ System.out.println(output);
Assert.assertTrue(
output.contains(
"Blocklet 0:\n"
- + "Page 0 (offset 0, length 12049): DataChunk2(chunk_meta:ChunkCompressionMeta(compression_codec:DEPRECATED, total_uncompressed_size:0, total_compressed_size:0, compressor_name:snappy), rowMajor:false, data_page_length:12039, rowid_page_length:10, presence:PresenceMeta(represents_presence:false, present_bit_stream:00), sort_state:SORT_EXPLICIT, encoders:[INVERTED_INDEX], encoder_meta:[], min_max:BlockletMinMaxIndex(min_values:[72 6F 62 6F 74 30], max_values:[72 6F 62 6F 74 30], min_max_presence:[true]), numberOfRowsInpage:32000)"));
+ + "Page 0 (offset 0, length 12049): DataChunk2(chunk_meta:ChunkCompressionMeta(compression_codec:DEPRECATED, total_uncompressed_size:256000, total_compressed_size:12049, compressor_name:snappy), rowMajor:false, data_page_length:12039, rowid_page_length:10, presence:PresenceMeta(represents_presence:false, present_bit_stream:00), sort_state:SORT_EXPLICIT, encoders:[INVERTED_INDEX], encoder_meta:[], min_max:BlockletMinMaxIndex(min_values:[72 6F 62 6F 74 30], max_values:[72 6F 62 6F 74 30], min_max_presence:[true]), numberOfRowsInpage:32000)"));
}
@Test