You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2019/05/16 19:05:45 UTC
[carbondata] 12/22: [CARBONDATA-3345]A growing streaming ROW_V1
carbondata file would be ingored some InputSplits
This is an automated email from the ASF dual-hosted git repository.
ravipesala pushed a commit to branch branch-1.5
in repository https://gitbox.apache.org/repos/asf/carbondata.git
commit f80a28dd9fee8c5d355b30d4e422b854a981b796
Author: junyan-zg <27...@qq.com>
AuthorDate: Wed Apr 24 22:46:51 2019 +0800
[CARBONDATA-3345]A growing streaming ROW_V1 carbondata file would be ingored some InputSplits
After looking at carbondata segments, when the file grows to more than 150 M (possibly 128M),
Presto initiates a query by separating several small files, including those in ROW_V1 format.
This bug causes some small files in ROW_V1 format to be ignored, resulting in inaccurate queries.
So for the carbondata ROW_V1 inputSplits MapKey(Java), I adjust concat 'carbonInput.getStart()' to keeping the required inputSplit
This closes #3186
---
.../org/apache/carbondata/presto/impl/CarbonTableReader.java | 9 ++++++++-
1 file changed, 8 insertions(+), 1 deletion(-)
diff --git a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
index 57d8d5e..7ffe053 100755
--- a/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
+++ b/integration/presto/src/main/java/org/apache/carbondata/presto/impl/CarbonTableReader.java
@@ -46,6 +46,7 @@ import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
import org.apache.carbondata.core.metadata.schema.table.TableInfo;
import org.apache.carbondata.core.reader.ThriftReader;
import org.apache.carbondata.core.scan.expression.Expression;
+import org.apache.carbondata.core.statusmanager.FileFormat;
import org.apache.carbondata.core.statusmanager.LoadMetadataDetails;
import org.apache.carbondata.core.statusmanager.SegmentStatusManager;
import org.apache.carbondata.core.util.CarbonProperties;
@@ -291,7 +292,13 @@ public class CarbonTableReader {
// Use block distribution
List<List<CarbonLocalInputSplit>> inputSplits = new ArrayList(
result.stream().map(x -> (CarbonLocalInputSplit) x).collect(Collectors.groupingBy(
- carbonInput -> carbonInput.getSegmentId().concat(carbonInput.getPath()))).values());
+ carbonInput -> {
+ if (FileFormat.ROW_V1.equals(carbonInput.getFileFormat())) {
+ return carbonInput.getSegmentId().concat(carbonInput.getPath())
+ .concat(carbonInput.getStart() + "");
+ }
+ return carbonInput.getSegmentId().concat(carbonInput.getPath());
+ })).values());
if (inputSplits != null) {
for (int j = 0; j < inputSplits.size(); j++) {
multiBlockSplitList.add(new CarbonLocalMultiBlockSplit(inputSplits.get(j),