You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@carbondata.apache.org by ja...@apache.org on 2017/03/30 05:12:10 UTC
[02/13] incubator-carbondata git commit: Removed kettle related code
and refactored
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e6b60907/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/FileStoreSurrogateKeyGenForCSV.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/FileStoreSurrogateKeyGenForCSV.java b/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/FileStoreSurrogateKeyGenForCSV.java
deleted file mode 100644
index eb0f52b..0000000
--- a/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/csvbased/FileStoreSurrogateKeyGenForCSV.java
+++ /dev/null
@@ -1,339 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.carbondata.processing.surrogatekeysgenerator.csvbased;
-
-import java.io.File;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.carbondata.core.cache.Cache;
-import org.apache.carbondata.core.cache.CacheProvider;
-import org.apache.carbondata.core.cache.CacheType;
-import org.apache.carbondata.core.cache.dictionary.Dictionary;
-import org.apache.carbondata.core.cache.dictionary.DictionaryColumnUniqueIdentifier;
-import org.apache.carbondata.core.constants.CarbonCommonConstants;
-import org.apache.carbondata.core.keygenerator.KeyGenException;
-import org.apache.carbondata.core.keygenerator.KeyGenerator;
-import org.apache.carbondata.core.metadata.CarbonMetadata;
-import org.apache.carbondata.core.metadata.CarbonTableIdentifier;
-import org.apache.carbondata.core.metadata.ColumnIdentifier;
-import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
-import org.apache.carbondata.core.util.CarbonProperties;
-import org.apache.carbondata.core.util.CarbonTimeStatisticsFactory;
-import org.apache.carbondata.core.writer.ByteArrayHolder;
-import org.apache.carbondata.core.writer.HierarchyValueWriterForCSV;
-import org.apache.carbondata.processing.datatypes.GenericDataType;
-import org.apache.carbondata.processing.mdkeygen.file.FileData;
-import org.apache.carbondata.processing.mdkeygen.file.FileManager;
-import org.apache.carbondata.processing.mdkeygen.file.IFileManagerComposite;
-import org.apache.carbondata.processing.schema.metadata.ColumnSchemaDetails;
-import org.apache.carbondata.processing.schema.metadata.ColumnSchemaDetailsWrapper;
-import org.apache.carbondata.processing.schema.metadata.ColumnsInfo;
-import org.apache.carbondata.processing.util.CarbonDataProcessorUtil;
-
-import org.pentaho.di.core.exception.KettleException;
-
-public class FileStoreSurrogateKeyGenForCSV extends CarbonCSVBasedDimSurrogateKeyGen {
-
- /**
- * hierValueWriter
- */
- private Map<String, HierarchyValueWriterForCSV> hierValueWriter;
-
- /**
- * keyGenerator
- */
- private Map<String, KeyGenerator> keyGenerator;
-
- /**
- * LOAD_FOLDER
- */
- private String loadFolderName;
-
- /**
- * primaryKeyStringArray
- */
- private String[] primaryKeyStringArray;
- /**
- * partitionID
- */
- private String partitionID;
- /**
- * load Id
- */
- private String segmentId;
- /**
- * task id, each spark task has a unique id
- */
- private String taskNo;
-
- /**
- * @param columnsInfo
- * @throws IOException
- */
- public FileStoreSurrogateKeyGenForCSV(ColumnsInfo columnsInfo, String partitionID,
- String segmentId, String taskNo) throws IOException {
- super(columnsInfo);
- populatePrimaryKeyarray(dimInsertFileNames, columnsInfo.getPrimaryKeyMap());
- this.partitionID = partitionID;
- this.segmentId = segmentId;
- this.taskNo = taskNo;
- keyGenerator = new HashMap<String, KeyGenerator>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
-
- setStoreFolderWithLoadNumber(
- checkAndCreateLoadFolderNumber(columnsInfo.getDatabaseName(),
- columnsInfo.getTableName()));
- fileManager = new FileManager();
- fileManager.setName(loadFolderName + CarbonCommonConstants.FILE_INPROGRESS_STATUS);
-
- hierValueWriter = new HashMap<String, HierarchyValueWriterForCSV>(
- CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
-
- for (Entry<String, String> entry : hierInsertFileNames.entrySet()) {
- String hierFileName = entry.getValue().trim();
- hierValueWriter.put(entry.getKey(),
- new HierarchyValueWriterForCSV(hierFileName, getStoreFolderWithLoadNumber()));
- Map<String, KeyGenerator> keyGenerators = columnsInfo.getKeyGenerators();
- keyGenerator.put(entry.getKey(), keyGenerators.get(entry.getKey()));
- FileData fileData = new FileData(hierFileName, getStoreFolderWithLoadNumber());
- fileData.setHierarchyValueWriter(hierValueWriter.get(entry.getKey()));
- fileManager.add(fileData);
- }
- populateCache();
- //Update the primary key surroagate key map
- updatePrimaryKeyMaxSurrogateMap();
- }
-
- private void populatePrimaryKeyarray(String[] dimInsertFileNames, Map<String, Boolean> map) {
- List<String> primaryKeyList = new ArrayList<String>(CarbonCommonConstants.CONSTANT_SIZE_TEN);
- for (String columnName : dimInsertFileNames) {
- if (null != map.get(columnName)) {
- map.put(columnName, false);
- }
- }
- Set<Entry<String, Boolean>> entrySet = map.entrySet();
- for (Entry<String, Boolean> entry : entrySet) {
- if (entry.getValue()) {
- primaryKeyList.add(entry.getKey().trim());
- }
- }
- primaryKeyStringArray = primaryKeyList.toArray(new String[primaryKeyList.size()]);
- }
-
- /**
- * update the
- */
- private void updatePrimaryKeyMaxSurrogateMap() {
- Map<String, Boolean> primaryKeyMap = columnsInfo.getPrimaryKeyMap();
- for (Entry<String, Boolean> entry : primaryKeyMap.entrySet()) {
- if (!primaryKeyMap.get(entry.getKey())) {
- int repeatedPrimaryFromLevels =
- getRepeatedPrimaryFromLevels(dimInsertFileNames, entry.getKey());
-
- if (null == primaryKeysMaxSurroagetMap) {
- primaryKeysMaxSurroagetMap =
- new HashMap<String, Integer>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
- }
- primaryKeysMaxSurroagetMap.put(entry.getKey(), max[repeatedPrimaryFromLevels]);
- }
- }
- }
-
- private int getRepeatedPrimaryFromLevels(String[] columnNames, String primaryKey) {
- for (int j = 0; j < columnNames.length; j++) {
- if (primaryKey.equals(columnNames[j])) {
- return j;
- }
- }
- return -1;
- }
-
- private String checkAndCreateLoadFolderNumber(String databaseName,
- String tableName) throws IOException {
- String carbonDataDirectoryPath = CarbonDataProcessorUtil
- .getLocalDataFolderLocation(databaseName, tableName, taskNo, partitionID, segmentId + "",
- false);
- boolean isDirCreated = new File(carbonDataDirectoryPath).mkdirs();
- if (!isDirCreated) {
- throw new IOException("Unable to create data load directory" + carbonDataDirectoryPath);
- }
- return carbonDataDirectoryPath;
- }
-
- /**
- * This method will update the maxkey information.
- * @param tabColumnName
- * @param maxKey max cardinality of a column
- */
- private void updateMaxKeyInfo(String tabColumnName, int maxKey) {
- checkAndUpdateMap(maxKey, tabColumnName);
- }
-
- /**
- * This method will generate cache for all the global dictionaries during data loading.
- */
- private void populateCache() throws IOException {
- String carbonStorePath =
- CarbonProperties.getInstance().getProperty(CarbonCommonConstants.STORE_LOCATION_HDFS);
- String[] dimColumnNames = columnsInfo.getDimColNames();
- String[] dimColumnIds = columnsInfo.getDimensionColumnIds();
- String databaseName = columnsInfo.getDatabaseName();
- String tableName = columnsInfo.getTableName();
- CarbonTable carbonTable = CarbonMetadata.getInstance()
- .getCarbonTable(databaseName + CarbonCommonConstants.UNDERSCORE + tableName);
- CarbonTableIdentifier carbonTableIdentifier = carbonTable.getCarbonTableIdentifier();
- CacheProvider cacheProvider = CacheProvider.getInstance();
- Cache reverseDictionaryCache =
- cacheProvider.createCache(CacheType.REVERSE_DICTIONARY, carbonStorePath);
- List<String> dictionaryKeys = new ArrayList<>(dimColumnNames.length);
- List<DictionaryColumnUniqueIdentifier> dictionaryColumnUniqueIdentifiers =
- new ArrayList<>(dimColumnNames.length);
- ColumnSchemaDetailsWrapper columnSchemaDetailsWrapper =
- columnsInfo.getColumnSchemaDetailsWrapper();
- // update the member cache for dimension
- for (int i = 0; i < dimColumnNames.length; i++) {
- String dimColName = dimColumnNames[i].substring(tableName.length() + 1);
- ColumnSchemaDetails details = columnSchemaDetailsWrapper.get(dimColumnIds[i]);
- if (details.isDirectDictionary()) {
- continue;
- }
- GenericDataType complexType = columnsInfo.getComplexTypesMap().get(dimColName);
- if (complexType != null) {
- List<GenericDataType> primitiveChild = new ArrayList<GenericDataType>();
- complexType.getAllPrimitiveChildren(primitiveChild);
- for (GenericDataType eachPrimitive : primitiveChild) {
- details = columnSchemaDetailsWrapper.get(eachPrimitive.getColumnId());
- if (details.isDirectDictionary()) {
- continue;
- }
- ColumnIdentifier columnIdentifier = new ColumnIdentifier(eachPrimitive.getColumnId(),
- columnsInfo.getColumnProperties(eachPrimitive.getName()), details.getColumnType());
- String dimColumnName =
- tableName + CarbonCommonConstants.UNDERSCORE + eachPrimitive.getName();
- DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier =
- new DictionaryColumnUniqueIdentifier(carbonTableIdentifier, columnIdentifier);
- dictionaryColumnUniqueIdentifiers.add(dictionaryColumnUniqueIdentifier);
- dictionaryKeys.add(dimColumnName);
- }
- } else {
- ColumnIdentifier columnIdentifier =
- new ColumnIdentifier(dimColumnIds[i], columnsInfo.getColumnProperties(dimColName),
- details.getColumnType());
- DictionaryColumnUniqueIdentifier dictionaryColumnUniqueIdentifier =
- new DictionaryColumnUniqueIdentifier(carbonTableIdentifier, columnIdentifier);
- dictionaryColumnUniqueIdentifiers.add(dictionaryColumnUniqueIdentifier);
- dictionaryKeys.add(dimColumnNames[i]);
- }
- }
- initDictionaryCacheInfo(dictionaryKeys, dictionaryColumnUniqueIdentifiers,
- reverseDictionaryCache);
- }
-
- /**
- * This method will initial the needed information for a dictionary of one column.
- *
- * @param dictionaryKeys
- * @param dictionaryColumnUniqueIdentifiers
- * @param reverseDictionaryCache
- * @throws KettleException
- */
- private void initDictionaryCacheInfo(List<String> dictionaryKeys,
- List<DictionaryColumnUniqueIdentifier> dictionaryColumnUniqueIdentifiers,
- Cache reverseDictionaryCache) throws IOException {
- long lruCacheStartTime = System.currentTimeMillis();
- List reverseDictionaries = reverseDictionaryCache.getAll(dictionaryColumnUniqueIdentifiers);
- for (int i = 0; i < reverseDictionaries.size(); i++) {
- Dictionary reverseDictionary = (Dictionary) reverseDictionaries.get(i);
- getDictionaryCaches().put(dictionaryKeys.get(i), reverseDictionary);
- updateMaxKeyInfo(dictionaryKeys.get(i), reverseDictionary.getDictionaryChunks().getSize());
- }
- CarbonTimeStatisticsFactory.getLoadStatisticsInstance().recordLruCacheLoadTime(
- (System.currentTimeMillis() - lruCacheStartTime) / 1000.0);
- }
-
- @Override protected int getSurrogateFromStore(String value, int index, Object[] properties)
- throws KettleException {
- max[index]++;
- int key = max[index];
- return key;
- }
-
- @Override
- protected int updateSurrogateToStore(String tuple, String columnName, int index, int key,
- Object[] properties) throws KettleException {
- Map<String, Integer> cache = getTimeDimCache().get(columnName);
- if (cache == null) {
- return key;
- }
- return key;
- }
-
- private void checkAndUpdateMap(int maxKey, String dimInsertFileNames) {
- String[] dimsFiles2 = getDimsFiles();
- for (int i = 0; i < dimsFiles2.length; i++) {
- if (dimInsertFileNames.equalsIgnoreCase(dimsFiles2[i])) {
- if (max[i] < maxKey) {
- max[i] = maxKey;
- break;
- }
- }
- }
-
- }
-
- @Override public boolean isCacheFilled(String[] columns) {
- for (String column : columns) {
- Dictionary dicCache = getDictionaryCaches().get(column);
- if (null == dicCache) {
- return true;
- }
- }
- return false;
- }
-
- public IFileManagerComposite getFileManager() {
- return fileManager;
- }
-
- @Override protected byte[] getNormalizedHierFromStore(int[] val, String hier, int primaryKey,
- HierarchyValueWriterForCSV hierWriter) throws KettleException {
- byte[] bytes;
- try {
- bytes = columnsInfo.getKeyGenerators().get(hier).generateKey(val);
- hierWriter.getByteArrayList().add(new ByteArrayHolder(bytes, primaryKey));
- } catch (KeyGenException e) {
- throw new KettleException(e);
- }
- return bytes;
- }
-
- @Override public int getSurrogateForMeasure(String tuple, String columnName)
- throws KettleException {
- Integer measureSurrogate = null;
- Map<String, Dictionary> dictionaryCaches = getDictionaryCaches();
- Dictionary dicCache = dictionaryCaches.get(columnName);
- measureSurrogate = dicCache.getSurrogateKey(tuple);
- return measureSurrogate;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e6b60907/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/dbbased/messages/messages_en_US.properties
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/dbbased/messages/messages_en_US.properties b/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/dbbased/messages/messages_en_US.properties
deleted file mode 100644
index d5b97fa..0000000
--- a/processing/src/main/java/org/apache/carbondata/processing/surrogatekeysgenerator/dbbased/messages/messages_en_US.properties
+++ /dev/null
@@ -1,61 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-CarbonDialog.Shell.Title=Carbon Surrogate Generator
-CarbonDialog.Group.Label=Carbon Settings
-CarbonDialog.dim.Label=Dimensions
-CarbonDialog.dim.Tooltip=Dimension index separated by comma(,)
-CarbonDialog.msr.Label=Measures
-CarbonDialog.msr.Tooltip=Measures index separated by comma(,)
-CarbonDialog.hier.Label=Hierarichies
-CarbonDialog.hier.Tooltip=Hierarichies seperated by &.
-
-CarbonDialog.time.Label=Time Hierarichy
-CarbonDialog.time.Tooltip=Time Hierarichy seperated by ,
-
-CarbonDialog.loc.Label=Location
-CarbonDialog.loc.Tooltip=Location to keep hierarichy files
-
-CarbonDialog.con.Label=Connection
-CarbonDialog.con.Tooltip=Jdbc url
-
-CarbonDialog.Schema.Label=Schema
-
-CarbonDialog.BatchSize.Label=Carbon Batch Size
-CarbonDialog.BatchSize.Tooltip=Carbon Batch Size
-
-CarbonDialog.CarbonProperties.Label=Carbon Properties
-CarbonDialog.CarbonProperties.Tooltip=Carbon Properties
-
-CarbonDialog.MetaHier.Label=Carbon Metadata Hierarichies
-CarbonDialog.MetaHier.Tooltip=Carbon Metadata Hierarichies
-
-CarbonDialog.IsInitialLoad=Initial Load
-
-CarbonDialog.MetadataFolderLocation.Label= Carbon Metadata Files Location
-CarbonDialog.MetadataFolderLocation.Tooltip= Carbon Metadata Files Location
-
-CarbonDialog.DoMapping.UnableToFindInput=Cannot find previous fields
-CarbonStep.Exception.UnexpectedErrorInReadingStepInfo=Error reading step info
-CarbonStep.Exception.UnableToSaveStepInfoToRepository=Error saving step info
-
-CarbonStep.Error.UnableFindField=Unable to find key field
-CarbonStep.Check.StepIsReceivingInfoFromOtherSteps=Step is receiving input from other steps
-CarbonStep.Check.NoInputReceivedFromOtherSteps=Step is not receiving any input from other steps
-CarbonStep.Check.MissingFieldsNotFoundInInput=Required fields not found in input
-CarbonStep.Check.AllFieldsFoundInInput=All required fields found
-CarbonStep.Check.CouldNotReadFromPreviousSteps=Could not read from previous steps
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e6b60907/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java
index bde8215..27662a4 100644
--- a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java
+++ b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonDataProcessorUtil.java
@@ -55,12 +55,6 @@ import org.apache.carbondata.processing.model.CarbonDataLoadSchema;
import org.apache.carbondata.processing.newflow.DataField;
import org.apache.commons.lang3.ArrayUtils;
-import org.pentaho.di.core.CheckResult;
-import org.pentaho.di.core.CheckResultInterface;
-import org.pentaho.di.core.Const;
-import org.pentaho.di.core.row.RowMetaInterface;
-import org.pentaho.di.i18n.BaseMessages;
-import org.pentaho.di.trans.step.StepMeta;
public final class CarbonDataProcessorUtil {
private static final LogService LOGGER =
@@ -149,9 +143,7 @@ public final class CarbonDataProcessorUtil {
String badRecordsInProgressFileName = null;
String changedFileName = null;
- // CHECKSTYLE:OFF
for (CarbonFile badFiles : listFiles) {
- // CHECKSTYLE:ON
badRecordsInProgressFileName = badFiles.getName();
changedFileName = badLogStoreLocation + File.separator + badRecordsInProgressFileName
@@ -164,51 +156,6 @@ public final class CarbonDataProcessorUtil {
LOGGER.error("Unable to delete File : " + badFiles.getName());
}
}
- } // CHECKSTYLE:ON
- }
-
- public static void checkResult(List<CheckResultInterface> remarks, StepMeta stepMeta,
- String[] input) {
- CheckResult cr;
-
- // See if we have input streams leading to this step!
- if (input.length > 0) {
- cr = new CheckResult(CheckResult.TYPE_RESULT_OK, "Step is receiving info from other steps.",
- stepMeta);
- remarks.add(cr);
- } else {
- cr = new CheckResult(CheckResult.TYPE_RESULT_ERROR, "No input received from other steps!",
- stepMeta);
- remarks.add(cr);
- }
- }
-
- public static void check(Class<?> pkg, List<CheckResultInterface> remarks, StepMeta stepMeta,
- RowMetaInterface prev, String[] input) {
- CheckResult cr;
-
- // See if we have input streams leading to this step!
- if (input.length > 0) {
- cr = new CheckResult(CheckResult.TYPE_RESULT_OK,
- BaseMessages.getString(pkg, "CarbonStep.Check.StepIsReceivingInfoFromOtherSteps"),
- stepMeta);
- remarks.add(cr);
- } else {
- cr = new CheckResult(CheckResult.TYPE_RESULT_ERROR,
- BaseMessages.getString(pkg, "CarbonStep.Check.NoInputReceivedFromOtherSteps"), stepMeta);
- remarks.add(cr);
- }
-
- // also check that each expected key fields are acually coming
- if (prev != null && prev.size() > 0) {
- cr = new CheckResult(CheckResultInterface.TYPE_RESULT_OK,
- BaseMessages.getString(pkg, "CarbonStep.Check.AllFieldsFoundInInput"), stepMeta);
- remarks.add(cr);
- } else {
- String errorMessage =
- BaseMessages.getString(pkg, "CarbonStep.Check.CouldNotReadFromPreviousSteps") + Const.CR;
- cr = new CheckResult(CheckResultInterface.TYPE_RESULT_ERROR, errorMessage, stepMeta);
- remarks.add(cr);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e6b60907/processing/src/main/java/org/apache/carbondata/processing/util/CarbonSchemaParser.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonSchemaParser.java b/processing/src/main/java/org/apache/carbondata/processing/util/CarbonSchemaParser.java
deleted file mode 100644
index bba40e9..0000000
--- a/processing/src/main/java/org/apache/carbondata/processing/util/CarbonSchemaParser.java
+++ /dev/null
@@ -1,1076 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.carbondata.processing.util;
-
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.HashSet;
-import java.util.Iterator;
-import java.util.LinkedHashMap;
-import java.util.LinkedHashSet;
-import java.util.List;
-import java.util.Map;
-import java.util.Map.Entry;
-import java.util.Set;
-
-import org.apache.carbondata.core.constants.CarbonCommonConstants;
-import org.apache.carbondata.core.keygenerator.factory.KeyGeneratorFactory;
-import org.apache.carbondata.core.metadata.datatype.DataType;
-import org.apache.carbondata.core.metadata.encoder.Encoding;
-import org.apache.carbondata.core.metadata.schema.table.CarbonTable;
-import org.apache.carbondata.core.metadata.schema.table.column.CarbonDimension;
-import org.apache.carbondata.core.metadata.schema.table.column.CarbonMeasure;
-import org.apache.carbondata.core.util.CarbonUtil;
-import org.apache.carbondata.processing.model.CarbonDataLoadSchema;
-import org.apache.carbondata.processing.model.CarbonDataLoadSchema.DimensionRelation;
-import org.apache.carbondata.processing.schema.metadata.ColumnSchemaDetails;
-import org.apache.carbondata.processing.schema.metadata.ColumnSchemaDetailsWrapper;
-
-public final class CarbonSchemaParser {
- /**
- *
- */
- public static final String QUOTES = "\"";
-
- private CarbonSchemaParser() {
-
- }
-
- /**
- * This method Return the dimension queries based on quotest required or not.
- *
- * @param dimensions
- * @return
- */
- public static String getDimensionSQLQueries(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema, boolean isQuotesRequired, String quote) {
- if (isQuotesRequired) {
- return getDimensionSQLQueriesWithQuotes(dimensions, carbonDataLoadSchema, quote);
- } else {
- return getDimensionSQLQueries(dimensions, carbonDataLoadSchema);
- }
- }
-
- public static String getDenormColNames(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema) {
- //
- List<String> foreignKeys = new ArrayList<String>(CarbonCommonConstants.CONSTANT_SIZE_TEN);
- Set<String> allRelationCols = new HashSet<String>();
-
- for (DimensionRelation dimensionRelation : carbonDataLoadSchema.getDimensionRelationList()) {
- foreignKeys.add(dimensionRelation.getRelation().getFactForeignKeyColumn());
- allRelationCols.addAll(dimensionRelation.getColumns());
- }
-
- StringBuilder columns = new StringBuilder();
-
- for (CarbonDimension dim : dimensions) {
- if (foreignKeys.contains(dim.getColName()) && !allRelationCols.contains(dim.getColName())) {
- columns.append(dim.getColName());
- columns.append(CarbonCommonConstants.HASH_SPC_CHARACTER);
- }
- }
-
- String columnstr = columns.toString();
- if (columnstr.length() > 0 && columnstr.endsWith(CarbonCommonConstants.HASH_SPC_CHARACTER)) {
- columnstr = columnstr
- .substring(0, columnstr.length() - CarbonCommonConstants.HASH_SPC_CHARACTER.length());
- }
-
- return columnstr;
- }
-
- private static String getDimensionSQLQueries(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema) {
- //
- List<String> queryList = new ArrayList<String>(CarbonCommonConstants.CONSTANT_SIZE_TEN);
- for (CarbonDimension dim : dimensions) {
-
- String tableName = extractDimensionTableName(dim.getColName(), carbonDataLoadSchema);
- StringBuilder query;
- String factTableName = carbonDataLoadSchema.getCarbonTable().getFactTableName();
- if (factTableName.equals(tableName)) {
- continue;
- }
- String dimName = dim.getColName();
- query =
- new StringBuilder(dimName + '_' + tableName + CarbonCommonConstants.COLON_SPC_CHARACTER);
-
- String primaryKey = null;
- for (DimensionRelation dimensionRelation : carbonDataLoadSchema.getDimensionRelationList()) {
- for (String field : dimensionRelation.getColumns()) {
- if (dimName.equals(field)) {
- primaryKey = dimensionRelation.getRelation().getDimensionPrimaryKeyColumn();
- break;
- }
- }
- if (null != primaryKey) {
- break;
- }
- }
- query.append("SELECT ");
- query.append(primaryKey + ',');
- query.append(dimName);
- query.append(" FROM " + tableName);
- queryList.add(query.toString());
- }
- StringBuilder finalQuryString = new StringBuilder();
-
- for (int i = 0; i < queryList.size() - 1; i++) {
- finalQuryString.append(queryList.get(i));
- finalQuryString.append(CarbonCommonConstants.HASH_SPC_CHARACTER);
- }
- if (queryList.size() > 0) {
- finalQuryString.append(queryList.get(queryList.size() - 1));
- }
- return finalQuryString.toString();
- }
-
- /**
- * @param table
- * @param dimensions
- * @return
- */
-
- private static String getDimensionSQLQueriesWithQuotes(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema, String quotes) {
- //
- List<String> queryList = new ArrayList<String>(CarbonCommonConstants.CONSTANT_SIZE_TEN);
- // Property[] properties = null;
- for (CarbonDimension dim : dimensions) {
-
- String tableName = extractDimensionTableName(dim.getColName(), carbonDataLoadSchema);
- StringBuilder query;
- String factTableName = carbonDataLoadSchema.getCarbonTable().getFactTableName();
- if (factTableName.equals(tableName)) {
- continue;
- }
- String dimName = dim.getColName();
- query =
- new StringBuilder(dimName + '_' + tableName + CarbonCommonConstants.COLON_SPC_CHARACTER);
-
- String primaryKey = null;
- for (DimensionRelation dimensionRelation : carbonDataLoadSchema.getDimensionRelationList()) {
- for (String field : dimensionRelation.getColumns()) {
- if (dimName.equals(field)) {
- primaryKey = dimensionRelation.getRelation().getDimensionPrimaryKeyColumn();
- break;
- }
- }
- if (null != primaryKey) {
- break;
- }
- }
- query.append("SELECT ");
- query.append(quotes + primaryKey + quotes + ',');
- query.append(quotes + dimName + quotes);
- query.append(" FROM " + quotes + tableName + quotes);
- queryList.add(query.toString());
- }
- StringBuilder finalQuryString = new StringBuilder();
-
- for (int i = 0; i < queryList.size() - 1; i++) {
- finalQuryString.append(queryList.get(i));
- finalQuryString.append(CarbonCommonConstants.HASH_SPC_CHARACTER);
- }
- if (queryList.size() > 0) {
- finalQuryString.append(queryList.get(queryList.size() - 1));
- }
- return finalQuryString.toString();
- }
-
- /**
- * @param dimensions
- * @param measures
- * @param factTableName
- * @param isQuotesRequired
- * @param schemaInfo
- * @return
- */
- public static String getTableInputSQLQuery(List<CarbonDimension> dimensions,
- List<CarbonMeasure> measures, String factTableName, boolean isQuotesRequired,
- CarbonDataLoadSchema carbonDataLoadSchema) {
- StringBuilder query = new StringBuilder("SELECT ");
-
- getQueryForDimension(dimensions, query, factTableName, isQuotesRequired, carbonDataLoadSchema);
-
- if (!"select".equalsIgnoreCase(query.toString().trim())) {
- query.append(",");
- }
- Set<String> uniqueMsrCols = new HashSet<String>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
- for (int i = 0; i < measures.size(); i++) {
-
- uniqueMsrCols.add(measures.get(i).getColName());
- }
- String[] uniqueMeasure = uniqueMsrCols.toArray(new String[uniqueMsrCols.size()]);
- for (int j = 0; j < uniqueMeasure.length; j++) {
- query.append(System.getProperty("line.separator"));
- if (isQuotesRequired) {
- query.append(QUOTES + uniqueMeasure[j] + QUOTES);
- } else {
- query.append(uniqueMeasure[j]);
- }
-
- if (j != uniqueMeasure.length - 1) {
-
- query.append(",");
- }
- }
- query.append(System.getProperty("line.separator"));
-
- if (isQuotesRequired) {
- query.append(" FROM " + QUOTES + factTableName + QUOTES + ' ');
- } else {
- query.append(" FROM " + factTableName + ' ');
- }
-
- return query.toString();
- }
-
- private static void getQueryForDimension(List<CarbonDimension> dimensions, StringBuilder query,
- String factTableName, boolean isQuotesRequired, CarbonDataLoadSchema carbonDataLoadSchema) {
- int counter = 0;
- for (CarbonDimension cDim : dimensions) {
-
- String foreignKey = null;
- for (DimensionRelation dimensionRelation : carbonDataLoadSchema.getDimensionRelationList()) {
- for (String field : dimensionRelation.getColumns()) {
- if (cDim.getColName().equals(field)) {
- foreignKey = dimensionRelation.getRelation().getFactForeignKeyColumn();
- }
- }
- }
- if (foreignKey != null) {
- query.append(System.getProperty("line.separator"));
- if (counter != 0) {
- query.append(',');
- }
-
- if (isQuotesRequired) {
- query.append(QUOTES + foreignKey + QUOTES);
- } else {
- query.append(foreignKey);
- }
- continue;
- } else {
- query.append(System.getProperty("line.separator"));
- if (counter != 0) {
- query.append(',');
- }
-
- if (isQuotesRequired) {
- query.append(QUOTES + factTableName + QUOTES + '.' + QUOTES + cDim.getColName() + QUOTES);
- } else {
- query.append(factTableName + '.' + cDim.getColName());
- }
- }
- counter++;
- }
- }
-
- /**
- * Get dimension string from a array of TableDimension,which can be shared
- * TableDimension within schema or in a table.
- *
- * @param table
- * @param dimensions
- * @return
- */
- public static int getDimensionString(List<CarbonDimension> dimensions, StringBuilder dimString,
- int counter, CarbonDataLoadSchema carbonDataLoadSchema) {
- for (CarbonDimension cDimension : dimensions) {
- if (!cDimension.getEncoder().contains(Encoding.DICTIONARY)) {
- continue;
- }
-
- String tableName = extractDimensionTableName(cDimension.getColName(), carbonDataLoadSchema);
- dimString.append(
- tableName + '_' + cDimension.getColName() + CarbonCommonConstants.COLON_SPC_CHARACTER
- + counter + CarbonCommonConstants.COLON_SPC_CHARACTER + -1
- + CarbonCommonConstants.COLON_SPC_CHARACTER + 'Y'
- + CarbonCommonConstants.COMA_SPC_CHARACTER);
- counter++;
- }
- return counter;
- }
-
- /**
- * Return mapping of Column name to cardinality
- */
-
- public static Map<String, String> getCardinalities(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema) {
- Map<String, String> cardinalities = new LinkedHashMap<String, String>();
- for (CarbonDimension cDimension : dimensions) {
- String tableName = extractDimensionTableName(cDimension.getColName(), carbonDataLoadSchema);
- cardinalities.put(tableName + '_' + cDimension.getColName(), -1 + "");
- }
- return cardinalities;
- }
-
- /**
- * Get measure string from a array of Measure
- *
- * @param measures
- * @return
- */
- public static String getMeasureString(List<CarbonMeasure> measures, int counter) {
- StringBuilder measureString = new StringBuilder();
- int i = measures.size();
- for (CarbonMeasure measure : measures) {
-
- measureString
- .append(measure.getColName() + CarbonCommonConstants.COLON_SPC_CHARACTER + counter);
- counter++;
- if (i > 1) {
- measureString.append(CarbonCommonConstants.COMA_SPC_CHARACTER);
- }
- i--;
-
- }
- return measureString.toString();
- }
-
- /**
- * Get measure string from a array of Measure
- *
- * @param measures
- * @return
- */
- public static String[] getMeasures(List<CarbonMeasure> measures) {
- String[] measuresStringArray = new String[measures.size()];
-
- for (int i = 0; i < measuresStringArray.length; i++) {
- measuresStringArray[i] = measures.get(i).getColName();
- }
- return measuresStringArray;
- }
-
- //TODO SIMIAN
-
- /**
- * Get hierarchy string from dimensions
- *
- * @param dimensions
- * @return
- */
- public static String getHierarchyString(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema) {
- StringBuilder hierString = new StringBuilder();
- String hierStr = "";
-
- for (CarbonDimension cDimension : dimensions) {
- if (cDimension.getEncoder().contains(Encoding.DICTIONARY)) {
- continue;
- }
- String tableName = extractDimensionTableName(cDimension.getColName(), carbonDataLoadSchema);
- String cDimName = cDimension.getColName();
- hierStr = 0 + CarbonCommonConstants.AMPERSAND_SPC_CHARACTER;
- hierStr = cDimName + '_' + tableName + CarbonCommonConstants.COLON_SPC_CHARACTER + hierStr;
- hierString.append(hierStr);
- }
-
- hierStr = hierString.toString();
- if (hierStr.length() > 0 && hierStr.endsWith(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER)) {
- hierStr = hierStr
- .substring(0, hierStr.length() - CarbonCommonConstants.AMPERSAND_SPC_CHARACTER.length());
- }
- return hierStr;
- }
-
- /**
- * this method will return table columns
- *
- * @param dimensions
- * @param carbonDataLoadSchema
- * @return
- */
- public static String[] getTableDimensions(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema) {
- List<String> list = new ArrayList<String>(CarbonCommonConstants.CONSTANT_SIZE_TEN);
- for (CarbonDimension cDimension : dimensions) {
- // Ignoring the dimensions which are high cardinality dimension
- if (!cDimension.getEncoder().contains(Encoding.DICTIONARY)) {
- continue;
- }
- list.add(extractDimensionTableName(cDimension.getColName(), carbonDataLoadSchema) + "_"
- + cDimension.getColName());
- }
- String[] fields = new String[list.size()];
- fields = list.toArray(fields);
- return fields;
- }
-
- /**
- * This method will extract dimension table name,
- * By default, fact table name will be returned.
- *
- * @param dimensionColName
- * @param carbonDataLoadSchema
- * @return
- */
- private static String extractDimensionTableName(String dimensionColName,
- CarbonDataLoadSchema carbonDataLoadSchema) {
- List<DimensionRelation> dimensionRelationList = carbonDataLoadSchema.getDimensionRelationList();
-
- for (DimensionRelation dimensionRelation : dimensionRelationList) {
- for (String field : dimensionRelation.getColumns()) {
- if (dimensionColName.equals(field)) {
- return dimensionRelation.getTableName();
- }
- }
- }
- return carbonDataLoadSchema.getCarbonTable().getFactTableName();
- }
-
- /**
- * It will return all column groups in below format
- * 0,1~2~3,4,5,6~7~8,9
- * groups are
- * ,-> all ordinal with different group id
- * ~-> all ordinal with same group id
- *
- * @param dimensions
- * @return
- */
- public static String getColumnGroups(List<CarbonDimension> dimensions) {
- StringBuilder columnGroups = new StringBuilder();
- for (int i = 0; i < dimensions.size(); i++) {
- CarbonDimension dimension = dimensions.get(i);
- //assuming complex dimensions will always be atlast
- if (null != dimension.getListOfChildDimensions() &&
- dimension.getListOfChildDimensions().size() > 0) {
- break;
- }
- if (!dimension.hasEncoding(Encoding.DICTIONARY)) {
- continue;
- }
- columnGroups.append(dimension.getOrdinal());
- if (i < dimensions.size() - 1) {
- int currGroupOrdinal = dimension.columnGroupId();
- int nextGroupOrdinal = dimensions.get(i + 1).columnGroupId();
- if (currGroupOrdinal == nextGroupOrdinal && currGroupOrdinal != -1) {
- columnGroups.append("~");
- } else {
- columnGroups.append(",");
- }
- }
-
- }
- return columnGroups.toString();
- }
-
- /**
- * getHeirAndCardinalityString
- *
- * @param dimensions
- * @param schema
- * @return String
- */
- public static String getHeirAndCardinalityString(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema) {
- StringBuilder builder = new StringBuilder();
- String heirName = null;
- for (CarbonDimension cDimension : dimensions) {
- heirName = extractDimensionTableName(cDimension.getColName(), carbonDataLoadSchema);
- String dimName = cDimension.getColName();
- builder.append(dimName + '_' + heirName + ".hierarchy");
- builder.append(CarbonCommonConstants.COLON_SPC_CHARACTER);
- builder.append(-1);
- builder.append(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER);
- }
- return builder.toString();
- }
-
- /**
- * @param dimensions
- * @return
- */
- public static String getMetaHeirString(List<CarbonDimension> dimensions, CarbonTable schema) {
- StringBuilder propString = new StringBuilder();
- String tableName = schema.getFactTableName();
- for (CarbonDimension cDimension : dimensions) {
- propString.append(tableName + "_" + cDimension.getColName());
- propString.append(CarbonCommonConstants.HASH_SPC_CHARACTER);
-
- }
- // Delete the last special character
- String prop = propString.toString();
- if (prop.endsWith(CarbonCommonConstants.HASH_SPC_CHARACTER)) {
- prop = prop.substring(0, prop.length() - CarbonCommonConstants.HASH_SPC_CHARACTER.length());
- }
- return prop;
- }
-
- public static String getTableNameString(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema) {
- StringBuilder stringBuffer = new StringBuilder();
-
- for (CarbonDimension cDimension : dimensions) {
- String tableName = extractDimensionTableName(cDimension.getColName(), carbonDataLoadSchema);
-
- stringBuffer.append(cDimension.getColName() + '_' + cDimension.getColName());
- stringBuffer.append(CarbonCommonConstants.COLON_SPC_CHARACTER);
- stringBuffer.append(tableName);
- stringBuffer.append(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER);
- }
- // Delete the last & character
- String string = stringBuffer.toString();
- if (string.endsWith(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER)) {
- string = string
- .substring(0, string.length() - CarbonCommonConstants.AMPERSAND_SPC_CHARACTER.length());
- }
- return string;
- }
-
- /**
- * This method will concatenate all the column ids for a given list of dimensions
- *
- * @param dimensions
- * @return
- */
- public static String getColumnIdString(List<CarbonDimension> dimensions) {
- StringBuilder stringBuffer = new StringBuilder();
- for (CarbonDimension cDimension : dimensions) {
- if (!cDimension.hasEncoding(Encoding.DICTIONARY)) {
- continue;
- }
- stringBuffer.append(cDimension.getColumnId());
- stringBuffer.append(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER);
- }
- // Delete the last & character
- String string = stringBuffer.toString();
- if (string.endsWith(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER)) {
- string = string
- .substring(0, string.length() - CarbonCommonConstants.AMPERSAND_SPC_CHARACTER.length());
- }
- return string;
- }
-
- /**
- * @param dimensions
- * @param schema
- * @return
- */
- public static String getMdkeySizeForFact(List<CarbonDimension> dimensions) {
- int[] dims = new int[dimensions.size()];
- for (int i = 0; i < dims.length; i++) {
- dims[i] = -1;
- }
- return KeyGeneratorFactory.getKeyGenerator(dims).getKeySizeInBytes() + "";
- }
-
- /**
- * @param dimensions
- * @param schema
- * @return
- */
- public static String getHeirAndKeySizeMapForFact(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema) {
- StringBuilder stringBuffer = new StringBuilder();
- String heirName = null;
- int[] dims = null;
- int keySizeInBytes = 0;
- for (CarbonDimension cDimension : dimensions) {
- String dimName = cDimension.getColName();
- heirName = extractDimensionTableName(dimName, carbonDataLoadSchema);
- dims = new int[] { -1 };
- keySizeInBytes = KeyGeneratorFactory.getKeyGenerator(dims).getKeySizeInBytes();
- stringBuffer.append(dimName + '_' + heirName + CarbonCommonConstants.HIERARCHY_FILE_EXTENSION
- + CarbonCommonConstants.COLON_SPC_CHARACTER + keySizeInBytes
- + CarbonCommonConstants.AMPERSAND_SPC_CHARACTER);
- }
- return stringBuffer.toString();
- }
-
- /**
- * @param dimensions
- * @return
- */
- public static String getHierarchyStringWithColumnNames(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema) {
-
- StringBuilder hierString = new StringBuilder();
- String hierStr = "";
-
- for (CarbonDimension cDimension : dimensions) {
- if (cDimension.getEncoder().contains(Encoding.DICTIONARY)) {
- continue;
- }
- String tableName = extractDimensionTableName(cDimension.getColName(), carbonDataLoadSchema);
- String cDimName = cDimension.getColName();
- hierStr = cDimName + CarbonCommonConstants.AMPERSAND_SPC_CHARACTER;
- hierStr = cDimName + '_' + tableName + CarbonCommonConstants.COLON_SPC_CHARACTER + hierStr;
- hierString.append(hierStr);
- }
-
- hierStr = hierString.toString();
- if (hierStr.length() > 0 && hierStr.endsWith(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER)) {
- hierStr = hierStr
- .substring(0, hierStr.length() - CarbonCommonConstants.AMPERSAND_SPC_CHARACTER.length());
- }
- return hierStr;
-
- }
-
- /**
- * Return foreign key array
- *
- * @param dimensions
- * @return
- */
- public static String[] getForeignKeyForTables(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema) {
- Set<String> foreignKey = new LinkedHashSet<String>();
- for (CarbonDimension cDimension : dimensions) {
-
- List<DimensionRelation> dimensionRelationList =
- carbonDataLoadSchema.getDimensionRelationList();
-
- for (DimensionRelation dimensionRelation : dimensionRelationList) {
- for (String field : dimensionRelation.getColumns()) {
- if (cDimension.getColName().equals(field)) {
- foreignKey.add(dimensionRelation.getRelation().getFactForeignKeyColumn());
- }
- }
- }
-
- }
- return foreignKey.toArray(new String[foreignKey.size()]);
-
- }
-
- /**
- * Return foreign key and respective hierarchy String.
- *
- * @param dimensions
- * @return
- */
- public static String getForeignKeyHierarchyString(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema, String factTable) {
- StringBuilder foreignKeyHierarchyString = new StringBuilder();
- String columns = "";
-
- for (CarbonDimension cDimension : dimensions) {
- String dimTableName =
- extractDimensionTableName(cDimension.getColName(), carbonDataLoadSchema);
- String dimName = cDimension.getColName();
-
- if (dimTableName.equals(factTable)) {
- continue;
- }
-
- String foreignKey = null;
- for (DimensionRelation dimensionRelation : carbonDataLoadSchema.getDimensionRelationList()) {
- for (String field : dimensionRelation.getColumns()) {
- if (dimName.equals(field)) {
- foreignKey = dimensionRelation.getRelation().getFactForeignKeyColumn();
- break;
- }
- }
-
- foreignKeyHierarchyString.append(foreignKey);
- foreignKeyHierarchyString.append(CarbonCommonConstants.COLON_SPC_CHARACTER);
- foreignKeyHierarchyString.append(dimName + '_' + dimTableName);
- foreignKeyHierarchyString.append(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER);
- }
- }
- columns = foreignKeyHierarchyString.toString();
- if (columns.length() > 0 && columns.endsWith(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER)) {
- columns = columns
- .substring(0, columns.length() - CarbonCommonConstants.AMPERSAND_SPC_CHARACTER.length());
- }
- return columns;
-
- }
-
- /**
- * Return foreign key and respective hierarchy String.
- *
- * @param dimensions
- * @param factTableName
- * @return
- */
- public static String getForeignKeyAndPrimaryKeyMapString(
- List<DimensionRelation> dimensionRelationList) {
- StringBuilder foreignKeyHierarchyString = new StringBuilder();
- String columns = "";
-
- for (DimensionRelation dimensionRelation : dimensionRelationList) {
- foreignKeyHierarchyString.append(dimensionRelation.getRelation().getFactForeignKeyColumn());
- foreignKeyHierarchyString.append(CarbonCommonConstants.COLON_SPC_CHARACTER);
- foreignKeyHierarchyString.append(
- dimensionRelation.getTableName() + '_' + dimensionRelation.getRelation()
- .getDimensionPrimaryKeyColumn());
- foreignKeyHierarchyString.append(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER);
- }
- columns = foreignKeyHierarchyString.toString();
- if (columns.length() > 0 && columns.endsWith(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER)) {
- columns = columns
- .substring(0, columns.length() - CarbonCommonConstants.AMPERSAND_SPC_CHARACTER.length());
- }
- return columns;
-
- }
-
- /**
- * Return foreign key array
- *
- * @param dimensions
- * @return
- */
- public static String getPrimaryKeyString(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema) {
- StringBuilder primaryKeyStringbuffer = new StringBuilder();
- for (CarbonDimension cDimension : dimensions) {
- String dimTableName =
- extractDimensionTableName(cDimension.getColName(), carbonDataLoadSchema);
- String dimName = cDimension.getColName();
-
- String primaryKey = null;
- if (dimTableName.equals(carbonDataLoadSchema.getCarbonTable().getFactTableName())) {
- dimTableName = dimName;
- } else {
- for (DimensionRelation dimensionRelation : carbonDataLoadSchema
- .getDimensionRelationList()) {
- for (String field : dimensionRelation.getColumns()) {
- if (field.equals(dimName)) {
- primaryKey = dimensionRelation.getRelation().getDimensionPrimaryKeyColumn();
- break;
- }
- }
- }
- }
-
- primaryKeyStringbuffer.append(dimTableName + '_' + primaryKey);
- primaryKeyStringbuffer.append(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER);
-
- }
-
- String primaryKeyString = primaryKeyStringbuffer.toString();
-
- if (primaryKeyString.length() > 0 && primaryKeyString
- .endsWith(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER)) {
- primaryKeyString = primaryKeyString.substring(0,
- primaryKeyString.length() - CarbonCommonConstants.AMPERSAND_SPC_CHARACTER.length());
- }
-
- return primaryKeyString;
- }
-
- /**
- * Get Measure Name String
- *
- * @param table
- * @return
- */
- public static String getMeasuresNamesString(List<CarbonMeasure> measures) {
- StringBuilder measureNames = new StringBuilder();
-
- for (int i = 0; i < measures.size(); i++) {
- measureNames.append(measures.get(i).getColName());
- measureNames.append(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER);
- }
-
- String measureNameString = measureNames.toString();
-
- if (measureNameString.length() > 0 && measureNameString
- .endsWith(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER)) {
- measureNameString = measureNameString.substring(0,
- measureNameString.length() - CarbonCommonConstants.AMPERSAND_SPC_CHARACTER.length());
- }
-
- return measureNameString;
- }
-
- /**
- * Get Measure Name String
- *
- * @param table
- * @return
- */
- public static String getMeasuresUniqueColumnNamesString(List<CarbonMeasure> measures) {
- StringBuilder measureNames = new StringBuilder();
- Set<String> set = new HashSet<String>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
- for (int i = 0; i < measures.size(); i++) {
- if (!set.contains(measures.get(i).getColName())) {
- set.add(measures.get(i).getColName());
- measureNames.append(measures.get(i).getColName());
- measureNames.append(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER);
- }
- }
- String measureNameString = measureNames.toString();
- if (measureNameString.length() > 0 && measureNameString
- .endsWith(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER)) {
- measureNameString = measureNameString.substring(0,
- measureNameString.length() - CarbonCommonConstants.AMPERSAND_SPC_CHARACTER.length());
- }
- return measureNameString;
- }
-
- /**
- * Get Measure Aggregator array
- *
- * @param table
- * @return
- */
- public static String[] getMeasuresAggragatorArray(List<CarbonMeasure> measures) {
- String[] msrAggregators = new String[measures.size()];
-
- for (int i = 0; i < msrAggregators.length; i++) {
- msrAggregators[i] = "sum";
- }
-
- return msrAggregators;
- }
-
- /**
- * @param schemaInfo
- * @param table
- * @return
- */
- public static String getActualDimensions(List<CarbonDimension> dimensions) {
- StringBuilder actualDim = new StringBuilder();
- for (CarbonDimension cDimension : dimensions) {
- if (!cDimension.getEncoder().contains(Encoding.DICTIONARY)) {
- continue;
- }
- actualDim.append(cDimension.getColName());
- actualDim.append(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER);
- }
-
- String actualDimString = actualDim.toString();
-
- if (actualDimString.length() > 0 && actualDimString
- .endsWith(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER)) {
- actualDimString = actualDimString.substring(0,
- actualDimString.length() - CarbonCommonConstants.AMPERSAND_SPC_CHARACTER.length());
- }
-
- return actualDimString;
- }
-
- /**Method will prepare column name and its data type string inorder
- * to pass to the ETL steps.
- * @param schemaInfo
- * @param table
- * @return
- */
- public static String getDimensionsDataTypes(List<CarbonDimension> dimensions) {
- StringBuilder dimDataTypeBuilder = new StringBuilder();
- for (CarbonDimension cDimension : dimensions) {
- dimDataTypeBuilder.append(cDimension.getColName());
- dimDataTypeBuilder.append(CarbonCommonConstants.COMA_SPC_CHARACTER);
- dimDataTypeBuilder.append(cDimension.getDataType().toString());
- dimDataTypeBuilder.append(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER);
- }
- String dimDataType = dimDataTypeBuilder.toString();
- return dimDataType;
- }
-
- public static String getMeasuresDataType(List<CarbonMeasure> measures) {
- StringBuilder measureDataTypeString = new StringBuilder();
-
- for (CarbonMeasure measure : measures) {
- measureDataTypeString.append(measure.getDataType())
- .append(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER);
- }
-
- String measureTypeString = measureDataTypeString.toString();
-
- if (measureTypeString.length() > 0 && measureTypeString
- .endsWith(CarbonCommonConstants.AMPERSAND_SPC_CHARACTER)) {
- measureTypeString = measureTypeString.substring(0,
- measureTypeString.length() - CarbonCommonConstants.AMPERSAND_SPC_CHARACTER.length());
- }
-
- return measureTypeString;
-
- }
-
- /**
- * Below method will be used to get the level and its data type string
- *
- * @param dimensions
- * @param schema
- * @param table
- * @return String
- */
- public static String getLevelAndDataTypeMapString(List<CarbonDimension> dimensions,
- CarbonDataLoadSchema carbonDataLoadSchema) {
- StringBuilder dimString = new StringBuilder();
- for (CarbonDimension cDimension : dimensions) {
- String tableName = extractDimensionTableName(cDimension.getColName(), carbonDataLoadSchema);
- String levelName = tableName + '_' + cDimension.getColName();
- dimString.append(levelName + CarbonCommonConstants.LEVEL_FILE_EXTENSION
- + CarbonCommonConstants.COLON_SPC_CHARACTER + cDimension.getDataType()
- + CarbonCommonConstants.HASH_SPC_CHARACTER);
- }
- return dimString.toString();
- }
-
- /**
- * Below method will be used to get the complex dimension string
- *
- * @param dimensions
- * @param schema
- * @param table
- * @return String
- */
- public static String getComplexTypeString(List<CarbonDimension> dimensions) {
- StringBuilder dimString = new StringBuilder();
- for (int i = 0; i < dimensions.size(); i++) {
- CarbonDimension dimension = dimensions.get(i);
- if (dimension.getDataType().equals(DataType.ARRAY) || dimension.getDataType()
- .equals(DataType.STRUCT)) {
- addAllComplexTypeChildren(dimension, dimString, "");
- dimString.append(CarbonCommonConstants.SEMICOLON_SPC_CHARACTER);
- }
- }
- return dimString.toString();
- }
-
- /**
- * This method will return all the child dimensions under complex dimension
- *
- * @param dimension
- * @param dimString
- * @param parent
- */
- private static void addAllComplexTypeChildren(CarbonDimension dimension, StringBuilder dimString,
- String parent) {
- dimString.append(
- dimension.getColName() + CarbonCommonConstants.COLON_SPC_CHARACTER + dimension.getDataType()
- + CarbonCommonConstants.COLON_SPC_CHARACTER + parent
- + CarbonCommonConstants.COLON_SPC_CHARACTER + dimension.getColumnId()
- + CarbonCommonConstants.HASH_SPC_CHARACTER);
- for (int i = 0; i < dimension.getNumberOfChild(); i++) {
- CarbonDimension childDim = dimension.getListOfChildDimensions().get(i);
- if (childDim.getNumberOfChild() > 0) {
- addAllComplexTypeChildren(childDim, dimString, dimension.getColName());
- } else {
- dimString.append(
- childDim.getColName() + CarbonCommonConstants.COLON_SPC_CHARACTER + childDim
- .getDataType() + CarbonCommonConstants.COLON_SPC_CHARACTER + dimension.getColName()
- + CarbonCommonConstants.COLON_SPC_CHARACTER + childDim.getColumnId()
- + CarbonCommonConstants.COLON_SPC_CHARACTER + childDim.getOrdinal()
- + CarbonCommonConstants.HASH_SPC_CHARACTER);
- }
- }
- }
-
- /**
- * the method returns the ColumnSchemaDetailsWrapper
- *
- * @param dimensions
- * @return
- */
- public static ColumnSchemaDetailsWrapper getColumnSchemaDetails(
- List<CarbonDimension> dimensions) {
- ColumnSchemaDetailsWrapper columnSchemaDetailsWrapper = new ColumnSchemaDetailsWrapper();
- Map<String, ColumnSchemaDetails> columnSchemaDetailsMap =
- new HashMap<>(CarbonCommonConstants.DEFAULT_COLLECTION_SIZE);
- fillColumnSchemaDetailsWithComplex(dimensions, columnSchemaDetailsMap);
- columnSchemaDetailsWrapper.setColumnSchemaDetailsMap(columnSchemaDetailsMap);
- return columnSchemaDetailsWrapper;
- }
-
- private static void fillColumnSchemaDetailsWithComplex(
- List<CarbonDimension> dimensions,
- Map<String, ColumnSchemaDetails> columnSchemaDetailsMap) {
- for (CarbonDimension cDimension : dimensions) {
- ColumnSchemaDetails details =
- new ColumnSchemaDetails(cDimension.getColName(), cDimension.getDataType(),
- CarbonUtil.hasEncoding(cDimension.getEncoder(), Encoding.DIRECT_DICTIONARY));
- columnSchemaDetailsMap.put(cDimension.getColumnSchema().getColumnUniqueId(), details);
- if (cDimension.isComplex()) {
- fillColumnSchemaDetailsWithComplex(cDimension.getListOfChildDimensions(),
- columnSchemaDetailsMap);
- }
- }
- }
-
- /**
- * Get dimension string from a array of TableDimension,which can be shared
- * TableDimension within schema or in a table.
- *
- * @param table
- * @param dimensions
- * @return
- */
- public static int getNoDictionaryDimensionString(List<CarbonDimension> dimensions,
- StringBuilder dimString, int counter, CarbonDataLoadSchema carbonDataLoadSchema) {
- for (CarbonDimension cDimension : dimensions) {
- if (cDimension.getEncoder().contains(Encoding.DICTIONARY)) {
- continue;
- }
-
- String tableName = extractDimensionTableName(cDimension.getColName(), carbonDataLoadSchema);
- dimString.append(
- tableName + '_' + cDimension.getColName() + CarbonCommonConstants.COLON_SPC_CHARACTER
- + counter + CarbonCommonConstants.COLON_SPC_CHARACTER + -1
- + CarbonCommonConstants.COLON_SPC_CHARACTER + 'Y'
- + CarbonCommonConstants.COMA_SPC_CHARACTER);
- counter++;
- }
- return counter;
- }
-
- public static String getColumnPropertiesString(List<CarbonDimension> dimensions) {
- StringBuilder colPropertiesString = new StringBuilder();
- for (int dim = 0; dim < dimensions.size(); dim++) {
- CarbonDimension dimension = dimensions.get(dim);
- if (dimension.isComplex()) {
- List<CarbonDimension> childs = dimension.getListOfChildDimensions();
- for (CarbonDimension child : childs) {
- buildDimensionColumnPropertyString(child, colPropertiesString, dim);
- }
- } else {
- buildDimensionColumnPropertyString(dimension, colPropertiesString, dim);
- }
-
- }
-
- return colPropertiesString.toString();
- }
-
- protected static void buildDimensionColumnPropertyString(CarbonDimension dimension,
- StringBuilder colPropertiesString, int dim) {
- Map<String, String> columnProperties = dimension.getColumnProperties();
- if (null != columnProperties && columnProperties.size() > 0) {
- if (colPropertiesString.length() > 0) {
- colPropertiesString.append(CarbonCommonConstants.HASH_SPC_CHARACTER);
- }
- colPropertiesString.append(dimension.getColName())
- .append(CarbonCommonConstants.COLON_SPC_CHARACTER);
- int size = columnProperties.entrySet().size();
- int index = 0;
- Iterator<Entry<String, String>> itr = columnProperties.entrySet().iterator();
- while (itr.hasNext()) {
- Entry<String, String> entry = itr.next();
- colPropertiesString.append(entry.getKey())
- .append(CarbonCommonConstants.HYPHEN_SPC_CHARACTER);
- colPropertiesString.append(entry.getValue());
- index++;
- if (index < size) {
- colPropertiesString.append(CarbonCommonConstants.COMA_SPC_CHARACTER);
- }
- }
- }
- }
-}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e6b60907/processing/src/main/java/org/pentaho/di/trans/steps/hadoopfileinput/HadoopFileInputMeta.java
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/pentaho/di/trans/steps/hadoopfileinput/HadoopFileInputMeta.java b/processing/src/main/java/org/pentaho/di/trans/steps/hadoopfileinput/HadoopFileInputMeta.java
deleted file mode 100644
index 15b0ac8..0000000
--- a/processing/src/main/java/org/pentaho/di/trans/steps/hadoopfileinput/HadoopFileInputMeta.java
+++ /dev/null
@@ -1,27 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.pentaho.di.trans.steps.hadoopfileinput;
-
-import org.pentaho.di.core.annotations.Step;
-import org.pentaho.di.trans.steps.textfileinput.TextFileInputMeta;
-
-@Step(id = "HadoopFileInputPlugin", image = "HDI.png", name = "HadoopFileInputPlugin",
- description = "Process files from an HDFS location", categoryDescription = "Hadoop")
-public class HadoopFileInputMeta extends TextFileInputMeta {
-
-}
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e6b60907/processing/src/main/java/org/pentaho/di/trans/steps/hadoopfileinput/messages/messages_en_US.properties
----------------------------------------------------------------------
diff --git a/processing/src/main/java/org/pentaho/di/trans/steps/hadoopfileinput/messages/messages_en_US.properties b/processing/src/main/java/org/pentaho/di/trans/steps/hadoopfileinput/messages/messages_en_US.properties
deleted file mode 100644
index 4ed60dd..0000000
--- a/processing/src/main/java/org/pentaho/di/trans/steps/hadoopfileinput/messages/messages_en_US.properties
+++ /dev/null
@@ -1,18 +0,0 @@
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements. See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership. The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-#
-HadoopFileInputDialog.DialogTitle=Hadoop File Input
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e6b60907/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
----------------------------------------------------------------------
diff --git a/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java b/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
index 714a32f..156c57e 100644
--- a/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
+++ b/processing/src/test/java/org/apache/carbondata/carbon/datastore/BlockIndexStoreTest.java
@@ -40,7 +40,7 @@ import org.apache.carbondata.core.datastore.block.TableBlockInfo;
import org.apache.carbondata.core.datastore.block.TableBlockUniqueIdentifier;
import org.apache.carbondata.core.constants.CarbonCommonConstants;
import org.apache.carbondata.core.util.CarbonProperties;
-import org.apache.carbondata.test.util.StoreCreator;
+import org.apache.carbondata.processing.StoreCreator;
import junit.framework.TestCase;
import org.junit.AfterClass;
http://git-wip-us.apache.org/repos/asf/incubator-carbondata/blob/e6b60907/processing/src/test/java/org/apache/carbondata/lcm/locks/LocalFileLockTest.java
----------------------------------------------------------------------
diff --git a/processing/src/test/java/org/apache/carbondata/lcm/locks/LocalFileLockTest.java b/processing/src/test/java/org/apache/carbondata/lcm/locks/LocalFileLockTest.java
index 4bb7d16..68d822b 100644
--- a/processing/src/test/java/org/apache/carbondata/lcm/locks/LocalFileLockTest.java
+++ b/processing/src/test/java/org/apache/carbondata/lcm/locks/LocalFileLockTest.java
@@ -20,9 +20,9 @@ import org.apache.carbondata.core.metadata.CarbonTableIdentifier;
import org.apache.carbondata.core.locks.LocalFileLock;
import org.apache.carbondata.core.locks.LockUsage;
import org.junit.After;
+import org.junit.Assert;
import org.junit.Before;
import org.junit.Test;
-import org.pentaho.di.core.util.Assert;
/**
* Test class to test the functionality of the local file locking.