You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ra...@apache.org on 2016/08/01 10:05:26 UTC
[28/47] incubator-carbondata git commit: [CARBONDATA-95] Problem
Columns values with numeric data types are not getting parsed when included
in dictionary_include (#869)
[CARBONDATA-95] Problem Columns values with numeric data types are not getting parsed when included in dictionary_include (#869)
Analysis: When a numeric datatype lets say Decimal is defined for a column and the column is included as dictionary_include, then the whatever precision and scale is defined by the user is not taken into consideration and each value is accepted and dictionary is generated for that value.
Solution: Parse big decimal while generating global dictionary and dictionary look up and set the precision and scale specified by the user
Project: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/commit/adba5973
Tree: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/tree/adba5973
Diff: http://git-wip-us.apache.org/repos/asf/incubator-carbondata/diff/adba5973
Branch: refs/heads/master
Commit: adba597364b418156c60c317fa99674dd7ad71d0
Parents: 6dfaefe
Author: manishgupta88 <to...@gmail.com>
Authored: Thu Jul 28 16:09:24 2016 +0530
Committer: Kumar Vishal <ku...@gmail.com>
Committed: Thu Jul 28 16:09:24 2016 +0530
----------------------------------------------------------------------
.../processing/datatypes/PrimitiveDataType.java | 3 +-
.../CarbonCSVBasedDimSurrogateKeyGen.java | 16 +++----
.../csvbased/CarbonCSVBasedSeqGenStep.java | 46 +++++++++++---------
3 files changed, 35 insertions(+), 30 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/adba5973/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java b/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java
index 3d272d5..d4b9b58 100644
--- a/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java
+++ b/processing/src/main/java/org/carbondata/processing/datatypes/PrimitiveDataType.java
@@ -163,8 +163,7 @@ public class PrimitiveDataType implements GenericDataType {
String[] delimiter, int delimiterIndex, DataOutputStream dataOutputStream,
CarbonCSVBasedDimSurrogateKeyGen surrogateKeyGen) throws KettleException, IOException {
String parsedValue = DataTypeUtil.parseValue(inputString,
- surrogateKeyGen.getDimensionNameToDimensionMapping()
- .get(tableName + CarbonCommonConstants.UNDERSCORE + name));
+ surrogateKeyGen.getDimensionOrdinalToDimensionMapping()[dimensionOrdinal]);
Integer surrogateKey = null;
if (null == parsedValue) {
surrogateKey = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY;
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/adba5973/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
index 8fc1196..b72de8e 100644
--- a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
+++ b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedDimSurrogateKeyGen.java
@@ -117,9 +117,9 @@ public abstract class CarbonCSVBasedDimSurrogateKeyGen {
new HashMap<String, Map<ArrayWrapper, Integer>>(
CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
/**
- * dimension name to dimension mapping
+ * dimension ordinal to dimension mapping
*/
- private Map<String, CarbonDimension> dimensionNameToDimensionMapping;
+ private CarbonDimension[] dimensionOrdinalToDimensionMapping;
/**
* rwLock2
*/
@@ -520,15 +520,15 @@ public abstract class CarbonCSVBasedDimSurrogateKeyGen {
/**
* @return
*/
- public Map<String, CarbonDimension> getDimensionNameToDimensionMapping() {
- return dimensionNameToDimensionMapping;
+ public CarbonDimension[] getDimensionOrdinalToDimensionMapping() {
+ return dimensionOrdinalToDimensionMapping;
}
/**
- * @param dimensionNameToDimensionMapping
+ * @param dimensionOrdinalToDimensionMapping
*/
- public void setDimensionNameToDimensionMapping(
- Map<String, CarbonDimension> dimensionNameToDimensionMapping) {
- this.dimensionNameToDimensionMapping = dimensionNameToDimensionMapping;
+ public void setDimensionOrdinalToDimensionMapping(
+ CarbonDimension[] dimensionOrdinalToDimensionMapping) {
+ this.dimensionOrdinalToDimensionMapping = dimensionOrdinalToDimensionMapping;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/adba5973/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
index 207e68d..d5ecb37 100644
--- a/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
+++ b/processing/src/main/java/org/carbondata/processing/surrogatekeysgenerator/csvbased/CarbonCSVBasedSeqGenStep.java
@@ -435,7 +435,7 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
if (null != getInputRowMeta()) {
generateNoDictionaryAndComplexIndexMapping();
data.getSurrogateKeyGen()
- .setDimensionNameToDimensionMapping(populateNameToCarbonDimensionMap());
+ .setDimensionOrdinalToDimensionMapping(populateNameToCarbonDimensionMap());
}
serializationNullFormat = meta.getTableOptionWrapper().get("serialization_null_format");
}
@@ -1147,10 +1147,8 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
surrogateKeyGen.max[m] = Integer.MAX_VALUE;
} else {
- String parsedValue = DataTypeUtil.parseValue(tuple,
- data.getSurrogateKeyGen().getDimensionNameToDimensionMapping().get(
- meta.getTableName() + CarbonCommonConstants.UNDERSCORE + columnName
- .toLowerCase()));
+ String parsedValue = DataTypeUtil.parseValue(tuple, data.getSurrogateKeyGen()
+ .getDimensionOrdinalToDimensionMapping()[memberMapping[i]]);
if(null == parsedValue) {
surrogateKeyForHrrchy[0] = CarbonCommonConstants.MEMBER_DEFAULT_VAL_SURROGATE_KEY;
} else {
@@ -1838,34 +1836,42 @@ public class CarbonCSVBasedSeqGenStep extends BaseStep {
}
}
- private Map<String, CarbonDimension> populateNameToCarbonDimensionMap() {
+ private CarbonDimension[] populateNameToCarbonDimensionMap() {
CarbonTable carbonTable = CarbonMetadata.getInstance().getCarbonTable(
meta.getSchemaName() + CarbonCommonConstants.UNDERSCORE + meta.getTableName());
List<CarbonDimension> dimensionsList = carbonTable.getDimensionByTableName(meta.getTableName());
- Map<String, CarbonDimension> dimensionNameToDimensionMapping =
- new HashMap<>(dimensionsList.size());
- for (CarbonDimension dimension : dimensionsList) {
+ CarbonDimension[] dimensionOrdinalToDimensionMapping =
+ new CarbonDimension[meta.getColumnSchemaDetailsWrapper().getColumnSchemaDetailsMap()
+ .size()];
+ List<CarbonDimension> dimListExcludingNoDictionaryColumn = dimensionsList;
+ if (null != meta.getNoDictionaryDims() && meta.getNoDictionaryDims().length() > 0) {
+ dimListExcludingNoDictionaryColumn =
+ new ArrayList<>(dimensionsList.size() - meta.noDictionaryCols.length);
+ for (CarbonDimension dimension : dimensionsList) {
+ if (!dimension.getEncoder().isEmpty()) {
+ dimListExcludingNoDictionaryColumn.add(dimension);
+ }
+ }
+ }
+ for (int i = 0; i < dimListExcludingNoDictionaryColumn.size(); i++) {
+ CarbonDimension dimension = dimListExcludingNoDictionaryColumn.get(meta.memberMapping[i]);
if (dimension.isComplex()) {
- populateComplexDimension(dimensionNameToDimensionMapping, dimension);
+ populateComplexDimension(dimensionOrdinalToDimensionMapping, dimension);
} else {
- dimensionNameToDimensionMapping.put(
- meta.getTableName() + CarbonCommonConstants.UNDERSCORE + dimension.getColName()
- .toLowerCase(), dimension);
+ dimensionOrdinalToDimensionMapping[meta.memberMapping[i]] = dimension;
}
}
- return dimensionNameToDimensionMapping;
+ return dimensionOrdinalToDimensionMapping;
}
- private void populateComplexDimension(
- Map<String, CarbonDimension> dimensionNameToDimensionMapping, CarbonDimension dimension) {
+ private void populateComplexDimension(CarbonDimension[] dimensionOrdinalToDimensionMapping,
+ CarbonDimension dimension) {
List<CarbonDimension> listOfChildDimensions = dimension.getListOfChildDimensions();
for (CarbonDimension childDimension : listOfChildDimensions) {
if (childDimension.isComplex()) {
- populateComplexDimension(dimensionNameToDimensionMapping, childDimension);
+ populateComplexDimension(dimensionOrdinalToDimensionMapping, childDimension);
} else {
- dimensionNameToDimensionMapping.put(
- meta.getTableName() + CarbonCommonConstants.UNDERSCORE + childDimension.getColName(),
- childDimension);
+ dimensionOrdinalToDimensionMapping[childDimension.getOrdinal()] = childDimension;
}
}
}