You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2016/09/12 11:24:58 UTC
[22/25] kylin git commit: minor,
better log on dictionary creation failure
minor, better log on dictionary creation failure
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/6db4b172
Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/6db4b172
Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/6db4b172
Branch: refs/heads/1.5.x-CDH5.7
Commit: 6db4b1723c47762280128ec7c08a160fdc9a69d7
Parents: aef7869
Author: Li Yang <li...@apache.org>
Authored: Mon Sep 12 15:28:58 2016 +0800
Committer: Li Yang <li...@apache.org>
Committed: Mon Sep 12 15:28:58 2016 +0800
----------------------------------------------------------------------
.../apache/kylin/common/KylinConfigBase.java | 2 +-
.../apache/kylin/dict/DictionaryGenerator.java | 378 +++++++++----------
.../apache/kylin/dict/DictionaryManager.java | 2 +
3 files changed, 192 insertions(+), 190 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/kylin/blob/6db4b172/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
----------------------------------------------------------------------
diff --git a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
index de9051c..79ee084 100644
--- a/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
+++ b/core-common/src/main/java/org/apache/kylin/common/KylinConfigBase.java
@@ -449,7 +449,7 @@ abstract public class KylinConfigBase implements Serializable {
}
public int getDictionaryMaxCardinality() {
- return Integer.parseInt(getOptional("kylin.dictionary.max.cardinality", "5000000"));
+ return Integer.parseInt(getOptional("kylin.dictionary.max.cardinality", "30000000"));
}
public int getTableSnapshotMaxMB() {
http://git-wip-us.apache.org/repos/asf/kylin/blob/6db4b172/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
index 5bd3357..ba848c6 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryGenerator.java
@@ -1,189 +1,189 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
-*/
-
-package org.apache.kylin.dict;
-
-import java.io.IOException;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.ArrayList;
-import java.util.List;
-
-import org.apache.commons.lang.StringUtils;
-import org.apache.kylin.common.KylinConfig;
-import org.apache.kylin.common.util.Bytes;
-import org.apache.kylin.common.util.Dictionary;
-import org.apache.kylin.metadata.datatype.DataType;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.google.common.base.Preconditions;
-
-/**
- * @author yangli9
- */
-@SuppressWarnings({ "rawtypes", "unchecked" })
-public class DictionaryGenerator {
-
- private static final int DICT_MAX_CARDINALITY = getDictionaryMaxCardinality();
-
- private static final Logger logger = LoggerFactory.getLogger(DictionaryGenerator.class);
-
- private static final String[] DATE_PATTERNS = new String[] { "yyyy-MM-dd", "yyyyMMdd" };
-
- private static int getDictionaryMaxCardinality() {
- try {
- return KylinConfig.getInstanceFromEnv().getDictionaryMaxCardinality();
- } catch (Throwable e) {
- return 5000000; // some test case does not have KylinConfig setup properly
- }
- }
-
- public static Dictionary<String> buildDictionary(DataType dataType, IDictionaryValueEnumerator valueEnumerator) throws IOException {
- Preconditions.checkNotNull(dataType, "dataType cannot be null");
-
- // build dict, case by data type
- IDictionaryBuilder builder;
- if (dataType.isDateTimeFamily()) {
- if (dataType.isDate())
- builder = new DateDictBuilder();
- else
- builder = new TimeDictBuilder();
- } else if (dataType.isNumberFamily()) {
- builder = new NumberDictBuilder();
- } else {
- builder = new StringDictBuilder();
- }
-
- return buildDictionary(builder, null, valueEnumerator);
- }
-
- public static Dictionary<String> buildDictionary(IDictionaryBuilder builder, DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator) throws IOException {
- int baseId = 0; // always 0 for now
- int nSamples = 5;
- ArrayList<String> samples = new ArrayList<String>(nSamples);
-
- Dictionary<String> dict = builder.build(dictInfo, valueEnumerator, baseId, nSamples, samples);
-
- // log a few samples
- StringBuilder buf = new StringBuilder();
- for (String s : samples) {
- if (buf.length() > 0) {
- buf.append(", ");
- }
- buf.append(s.toString()).append("=>").append(dict.getIdFromValue(s));
- }
- logger.debug("Dictionary value samples: " + buf.toString());
- logger.debug("Dictionary cardinality: " + dict.getSize());
- logger.debug("Dictionary builder class: " + builder.getClass().getName());
- logger.debug("Dictionary class: " + dict.getClass().getName());
- if (dict instanceof TrieDictionary && dict.getSize() > DICT_MAX_CARDINALITY) {
- throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- cardinality: " + dict.getSize());
- }
- return dict;
- }
-
- public static Dictionary mergeDictionaries(DataType dataType, List<DictionaryInfo> sourceDicts) throws IOException {
- return buildDictionary(dataType, new MultipleDictionaryValueEnumerator(sourceDicts));
- }
-
- private static class DateDictBuilder implements IDictionaryBuilder {
- @Override
- public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
- final int BAD_THRESHOLD = 0;
- String matchPattern = null;
- byte[] value;
-
- for (String ptn : DATE_PATTERNS) {
- matchPattern = ptn; // be optimistic
- int badCount = 0;
- SimpleDateFormat sdf = new SimpleDateFormat(ptn);
- while (valueEnumerator.moveNext()) {
- value = valueEnumerator.current();
- if (value == null || value.length == 0)
- continue;
-
- String str = Bytes.toString(value);
- try {
- sdf.parse(str);
- if (returnSamples.size() < nSamples && returnSamples.contains(str) == false)
- returnSamples.add(str);
- } catch (ParseException e) {
- logger.info("Unrecognized date value: " + str);
- badCount++;
- if (badCount > BAD_THRESHOLD) {
- matchPattern = null;
- break;
- }
- }
- }
- if (matchPattern != null) {
- return new DateStrDictionary(matchPattern, baseId);
- }
- }
-
- throw new IllegalStateException("Unrecognized datetime value");
- }
- }
-
- private static class TimeDictBuilder implements IDictionaryBuilder {
- @Override
- public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
- return new TimeStrDictionary(); // base ID is always 0
- }
- }
-
- private static class StringDictBuilder implements IDictionaryBuilder {
- @Override
- public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
- TrieDictionaryBuilder builder = new TrieDictionaryBuilder(new StringBytesConverter());
- byte[] value;
- while (valueEnumerator.moveNext()) {
- value = valueEnumerator.current();
- if (value == null)
- continue;
- String v = Bytes.toString(value);
- builder.addValue(v);
- if (returnSamples.size() < nSamples && returnSamples.contains(v) == false)
- returnSamples.add(v);
- }
- return builder.build(baseId);
- }
- }
-
- private static class NumberDictBuilder implements IDictionaryBuilder {
- @Override
- public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
- NumberDictionaryBuilder builder = new NumberDictionaryBuilder(new StringBytesConverter());
- byte[] value;
- while (valueEnumerator.moveNext()) {
- value = valueEnumerator.current();
- if (value == null)
- continue;
- String v = Bytes.toString(value);
- if (StringUtils.isBlank(v)) // empty string is null for numbers
- continue;
-
- builder.addValue(v);
- if (returnSamples.size() < nSamples && returnSamples.contains(v) == false)
- returnSamples.add(v);
- }
- return builder.build(baseId);
- }
- }
-}
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.dict;
+
+import java.io.IOException;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.commons.lang.StringUtils;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.Bytes;
+import org.apache.kylin.common.util.Dictionary;
+import org.apache.kylin.metadata.datatype.DataType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.google.common.base.Preconditions;
+
+/**
+ * @author yangli9
+ */
+@SuppressWarnings({ "rawtypes", "unchecked" })
+public class DictionaryGenerator {
+
+ private static final int DICT_MAX_CARDINALITY = getDictionaryMaxCardinality();
+
+ private static final Logger logger = LoggerFactory.getLogger(DictionaryGenerator.class);
+
+ private static final String[] DATE_PATTERNS = new String[] { "yyyy-MM-dd", "yyyyMMdd" };
+
+ private static int getDictionaryMaxCardinality() {
+ try {
+ return KylinConfig.getInstanceFromEnv().getDictionaryMaxCardinality();
+ } catch (Throwable e) {
+ return 30000000; // some test case does not have KylinConfig setup properly
+ }
+ }
+
+ public static Dictionary<String> buildDictionary(DataType dataType, IDictionaryValueEnumerator valueEnumerator) throws IOException {
+ Preconditions.checkNotNull(dataType, "dataType cannot be null");
+
+ // build dict, case by data type
+ IDictionaryBuilder builder;
+ if (dataType.isDateTimeFamily()) {
+ if (dataType.isDate())
+ builder = new DateDictBuilder();
+ else
+ builder = new TimeDictBuilder();
+ } else if (dataType.isNumberFamily()) {
+ builder = new NumberDictBuilder();
+ } else {
+ builder = new StringDictBuilder();
+ }
+
+ return buildDictionary(builder, null, valueEnumerator);
+ }
+
+ public static Dictionary<String> buildDictionary(IDictionaryBuilder builder, DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator) throws IOException {
+ int baseId = 0; // always 0 for now
+ int nSamples = 5;
+ ArrayList<String> samples = new ArrayList<String>(nSamples);
+
+ Dictionary<String> dict = builder.build(dictInfo, valueEnumerator, baseId, nSamples, samples);
+
+ // log a few samples
+ StringBuilder buf = new StringBuilder();
+ for (String s : samples) {
+ if (buf.length() > 0) {
+ buf.append(", ");
+ }
+ buf.append(s.toString()).append("=>").append(dict.getIdFromValue(s));
+ }
+ logger.debug("Dictionary value samples: " + buf.toString());
+ logger.debug("Dictionary cardinality: " + dict.getSize());
+ logger.debug("Dictionary builder class: " + builder.getClass().getName());
+ logger.debug("Dictionary class: " + dict.getClass().getName());
+ if (dict instanceof TrieDictionary && dict.getSize() > DICT_MAX_CARDINALITY) {
+ throw new IllegalArgumentException("Too high cardinality is not suitable for dictionary -- cardinality: " + dict.getSize());
+ }
+ return dict;
+ }
+
+ public static Dictionary mergeDictionaries(DataType dataType, List<DictionaryInfo> sourceDicts) throws IOException {
+ return buildDictionary(dataType, new MultipleDictionaryValueEnumerator(sourceDicts));
+ }
+
+ private static class DateDictBuilder implements IDictionaryBuilder {
+ @Override
+ public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
+ final int BAD_THRESHOLD = 0;
+ String matchPattern = null;
+ byte[] value;
+
+ for (String ptn : DATE_PATTERNS) {
+ matchPattern = ptn; // be optimistic
+ int badCount = 0;
+ SimpleDateFormat sdf = new SimpleDateFormat(ptn);
+ while (valueEnumerator.moveNext()) {
+ value = valueEnumerator.current();
+ if (value == null || value.length == 0)
+ continue;
+
+ String str = Bytes.toString(value);
+ try {
+ sdf.parse(str);
+ if (returnSamples.size() < nSamples && returnSamples.contains(str) == false)
+ returnSamples.add(str);
+ } catch (ParseException e) {
+ logger.info("Unrecognized date value: " + str);
+ badCount++;
+ if (badCount > BAD_THRESHOLD) {
+ matchPattern = null;
+ break;
+ }
+ }
+ }
+ if (matchPattern != null) {
+ return new DateStrDictionary(matchPattern, baseId);
+ }
+ }
+
+ throw new IllegalStateException("Unrecognized datetime value");
+ }
+ }
+
+ private static class TimeDictBuilder implements IDictionaryBuilder {
+ @Override
+ public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
+ return new TimeStrDictionary(); // base ID is always 0
+ }
+ }
+
+ private static class StringDictBuilder implements IDictionaryBuilder {
+ @Override
+ public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
+ TrieDictionaryBuilder builder = new TrieDictionaryBuilder(new StringBytesConverter());
+ byte[] value;
+ while (valueEnumerator.moveNext()) {
+ value = valueEnumerator.current();
+ if (value == null)
+ continue;
+ String v = Bytes.toString(value);
+ builder.addValue(v);
+ if (returnSamples.size() < nSamples && returnSamples.contains(v) == false)
+ returnSamples.add(v);
+ }
+ return builder.build(baseId);
+ }
+ }
+
+ private static class NumberDictBuilder implements IDictionaryBuilder {
+ @Override
+ public Dictionary<String> build(DictionaryInfo dictInfo, IDictionaryValueEnumerator valueEnumerator, int baseId, int nSamples, ArrayList<String> returnSamples) throws IOException {
+ NumberDictionaryBuilder builder = new NumberDictionaryBuilder(new StringBytesConverter());
+ byte[] value;
+ while (valueEnumerator.moveNext()) {
+ value = valueEnumerator.current();
+ if (value == null)
+ continue;
+ String v = Bytes.toString(value);
+ if (StringUtils.isBlank(v)) // empty string is null for numbers
+ continue;
+
+ builder.addValue(v);
+ if (returnSamples.size() < nSamples && returnSamples.contains(v) == false)
+ returnSamples.add(v);
+ }
+ return builder.build(baseId);
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/kylin/blob/6db4b172/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
index ff088b9..c8a7a54 100644
--- a/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
+++ b/core-dictionary/src/main/java/org/apache/kylin/dict/DictionaryManager.java
@@ -321,6 +321,8 @@ public class DictionaryManager {
dictionary = DictionaryGenerator.buildDictionary(DataType.getType(dictInfo.getDataType()), columnValueEnumerator);
else
dictionary = DictionaryGenerator.buildDictionary((IDictionaryBuilder) ClassUtil.newInstance(builderClass), dictInfo, columnValueEnumerator);
+ } catch (Exception ex) {
+ throw new RuntimeException("Failed to create dictionary on " + col, ex);
} finally {
if (columnValueEnumerator != null)
columnValueEnumerator.close();