You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/07/22 06:13:14 UTC
[24/47] incubator-kylin git commit: KYLIN-875 rename modules:
core-common, core-cube, core-dictionary, core-cube
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7e8896ac/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
new file mode 100644
index 0000000..f6031e8
--- /dev/null
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.dict;
+
+import org.junit.Test;
+
+import java.io.*;
+import java.util.*;
+
+import static org.junit.Assert.*;
+
+public class TrieDictionaryTest {
+
+ public static void main(String[] args) throws Exception {
+ InputStream is = new FileInputStream("src/test/resources/dict/dw_category_grouping_names.dat");
+ // InputStream is =
+ // Util.getPackageResourceAsStream(TrieDictionaryTest.class,
+ // "eng_com.dic");
+ ArrayList<String> str = loadStrings(is);
+ benchmarkStringDictionary(str);
+ }
+
+ @Test
+ public void partOverflowTest() {
+ ArrayList<String> str = new ArrayList<String>();
+ // str.add("");
+ str.add("part");
+ str.add("par");
+ str.add("partition");
+ str.add("party");
+ str.add("parties");
+ str.add("paint");
+ String longStr = "paintjkjdfklajkdljfkdsajklfjklsadjkjekjrklewjrklewjklrjklewjkljkljkljkljweklrjewkljrklewjrlkjewkljrkljkljkjlkjjkljkljkljkljlkjlkjlkjljdfadfads" + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk" + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk" + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk" + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk" + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk"
+ + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk" + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk";
+ System.out.println("The length of the long string is " + longStr.length());
+ str.add(longStr);
+
+ str.add("zzzzzz" + longStr);// another long string
+
+ TrieDictionaryBuilder<String> b = newDictBuilder(str);
+ TrieDictionary<String> dict = b.build(0);
+
+ TreeSet<String> set = new TreeSet<String>();
+ for (String s : str) {
+ set.add(s);
+ }
+
+ // test serialize
+ dict = testSerialize(dict);
+
+ // test basic id<==>value
+ Iterator<String> it = set.iterator();
+ int id = 0;
+ int previousId = -1;
+ for (; it.hasNext(); id++) {
+ String value = it.next();
+
+ // in case of overflow parts, there exist interpolation nodes
+ // they exist to make sure that any node's part is shorter than 255
+ int actualId = dict.getIdFromValue(value);
+ assertTrue(actualId >= id);
+ assertTrue(actualId > previousId);
+ previousId = actualId;
+
+ assertEquals(value, dict.getValueFromId(actualId));
+ }
+ }
+
+ @Test
+ public void emptyValueTest() {
+ ArrayList<String> str = new ArrayList<String>();
+ str.add("");
+ str.add("part");
+ str.add("par");
+ str.add("partition");
+ str.add("party");
+ str.add("parties");
+ str.add("paint");
+ testStringDictionary(str, null);
+ }
+
+ @Test
+ public void simpleTrieTest() {
+ ArrayList<String> str = new ArrayList<String>();
+ str.add("part");
+ str.add("part"); // meant to be dup
+ str.add("par");
+ str.add("partition");
+ str.add("party");
+ str.add("parties");
+ str.add("paint");
+
+ ArrayList<String> notFound = new ArrayList<String>();
+ notFound.add("");
+ notFound.add("p");
+ notFound.add("pa");
+ notFound.add("pb");
+ notFound.add("parti");
+ notFound.add("partz");
+ notFound.add("partyz");
+
+ testStringDictionary(str, notFound);
+ }
+
+ @Test
+ public void dictionaryContainTest()
+ {
+ ArrayList<String> str = new ArrayList<String>();
+ str.add("part");
+ str.add("part"); // meant to be dup
+ str.add("par");
+ str.add("partition");
+ str.add("party");
+ str.add("parties");
+ str.add("paint");
+
+ TrieDictionaryBuilder<String> b = newDictBuilder(str);
+ int baseId = new Random().nextInt(100);
+ TrieDictionary<String> dict = b.build(baseId);
+
+ str.add("py");
+ b = newDictBuilder(str);
+ baseId = new Random().nextInt(100);
+ TrieDictionary<String> dict2 = b.build(baseId);
+
+ assertEquals(true,dict.containedBy(dict2));
+ assertEquals(false,dict2.containedBy(dict));
+ }
+
+
+ @Test
+ public void englishWordsTest() throws Exception {
+ InputStream is = new FileInputStream("src/test/resources/dict/english-words.80 (scowl-2015.05.18).txt");
+ ArrayList<String> str = loadStrings(is);
+ testStringDictionary(str, null);
+ }
+
+ @Test
+ public void categoryNamesTest() throws Exception {
+ InputStream is = new FileInputStream("src/test/resources/dict/dw_category_grouping_names.dat");
+ ArrayList<String> str = loadStrings(is);
+ testStringDictionary(str, null);
+ }
+
+ private static void benchmarkStringDictionary(ArrayList<String> str) throws UnsupportedEncodingException {
+ TrieDictionaryBuilder<String> b = newDictBuilder(str);
+ b.stats().print();
+ TrieDictionary<String> dict = b.build(0);
+
+ TreeSet<String> set = new TreeSet<String>();
+ for (String s : str) {
+ set.add(s);
+ }
+
+ // prepare id==>value array and value==>id map
+ HashMap<String, Integer> map = new HashMap<String, Integer>();
+ String[] strArray = new String[set.size()];
+ byte[][] array = new byte[set.size()][];
+ Iterator<String> it = set.iterator();
+ for (int id = 0; it.hasNext(); id++) {
+ String value = it.next();
+ map.put(value, id);
+ strArray[id] = value;
+ array[id] = value.getBytes("UTF-8");
+ }
+
+ // System.out.println("Dict size in bytes: " +
+ // MemoryUtil.deepMemoryUsageOf(dict));
+ // System.out.println("Map size in bytes: " +
+ // MemoryUtil.deepMemoryUsageOf(map));
+ // System.out.println("Array size in bytes: " +
+ // MemoryUtil.deepMemoryUsageOf(strArray));
+
+ // warm-up, said that code only got JIT after run 1k-10k times,
+ // following jvm options may help
+ // -XX:CompileThreshold=1500
+ // -XX:+PrintCompilation
+ benchmark("Warm up", dict, set, map, strArray, array);
+ benchmark("Benchmark", dict, set, map, strArray, array);
+ }
+
+ private static int benchmark(String msg, TrieDictionary<String> dict, TreeSet<String> set, HashMap<String, Integer> map, String[] strArray, byte[][] array) {
+ int n = set.size();
+ int times = 10 * 1000 * 1000 / n; // run 10 million lookups
+ int keep = 0; // make sure JIT don't OPT OUT function calls under test
+ byte[] valueBytes = new byte[dict.getSizeOfValue()];
+ long start;
+
+ // benchmark value==>id, via HashMap
+ System.out.println(msg + " HashMap lookup value==>id");
+ start = System.currentTimeMillis();
+ for (int i = 0; i < times; i++) {
+ for (int j = 0; j < n; j++) {
+ keep |= map.get(strArray[j]);
+ }
+ }
+ long timeValueToIdByMap = System.currentTimeMillis() - start;
+ System.out.println(timeValueToIdByMap);
+
+ // benchmark value==>id, via Dict
+ System.out.println(msg + " Dictionary lookup value==>id");
+ start = System.currentTimeMillis();
+ for (int i = 0; i < times; i++) {
+ for (int j = 0; j < n; j++) {
+ keep |= dict.getIdFromValueBytes(array[j], 0, array[j].length);
+ }
+ }
+ long timeValueToIdByDict = System.currentTimeMillis() - start;
+ System.out.println(timeValueToIdByDict);
+
+ // benchmark id==>value, via Array
+ System.out.println(msg + " Array lookup id==>value");
+ start = System.currentTimeMillis();
+ for (int i = 0; i < times; i++) {
+ for (int j = 0; j < n; j++) {
+ keep |= strArray[j].length();
+ }
+ }
+ long timeIdToValueByArray = System.currentTimeMillis() - start;
+ System.out.println(timeIdToValueByArray);
+
+ // benchmark id==>value, via Dict
+ System.out.println(msg + " Dictionary lookup id==>value");
+ start = System.currentTimeMillis();
+ for (int i = 0; i < times; i++) {
+ for (int j = 0; j < n; j++) {
+ keep |= dict.getValueBytesFromId(j, valueBytes, 0);
+ }
+ }
+ long timeIdToValueByDict = System.currentTimeMillis() - start;
+ System.out.println(timeIdToValueByDict);
+
+ return keep;
+ }
+
+ private static void testStringDictionary(ArrayList<String> str, ArrayList<String> notFound) {
+ TrieDictionaryBuilder<String> b = newDictBuilder(str);
+ int baseId = new Random().nextInt(100);
+ TrieDictionary<String> dict = b.build(baseId);
+
+ TreeSet<String> set = new TreeSet<String>();
+ for (String s : str) {
+ set.add(s);
+ }
+
+ // test serialize
+ dict = testSerialize(dict);
+
+ // test basic id<==>value
+ Iterator<String> it = set.iterator();
+ int id = baseId;
+ for (; it.hasNext(); id++) {
+ String value = it.next();
+ // System.out.println("checking " + id + " <==> " + value);
+
+ assertEquals(id, dict.getIdFromValue(value));
+ assertEquals(value, dict.getValueFromId(id));
+ }
+ if (notFound != null) {
+ for (String s : notFound) {
+ try {
+ dict.getIdFromValue(s);
+ fail("For not found value '" + s + "', IllegalArgumentException is expected");
+ } catch (IllegalArgumentException e) {
+ // good
+ }
+ }
+ }
+
+ // test null value
+ int nullId = dict.getIdFromValue(null);
+ assertNull(dict.getValueFromId(nullId));
+ int nullId2 = dict.getIdFromValueBytes(null, 0, 0);
+ assertEquals(dict.getValueBytesFromId(nullId2, null, 0), -1);
+ assertEquals(nullId, nullId2);
+ }
+
+ private static TrieDictionary<String> testSerialize(TrieDictionary<String> dict) {
+ try {
+ ByteArrayOutputStream bout = new ByteArrayOutputStream();
+ DataOutputStream dataout = new DataOutputStream(bout);
+ dict.write(dataout);
+ dataout.close();
+ ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
+ DataInputStream datain = new DataInputStream(bin);
+ TrieDictionary<String> r = new TrieDictionary<String>();
+ r.readFields(datain);
+ datain.close();
+ return r;
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static TrieDictionaryBuilder<String> newDictBuilder(ArrayList<String> str) {
+ TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter());
+ for (String s : str)
+ b.addValue(s);
+ return b;
+ }
+
+ private static ArrayList<String> loadStrings(InputStream is) throws Exception {
+ ArrayList<String> r = new ArrayList<String>();
+ BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
+ try {
+ String word;
+ while ((word = reader.readLine()) != null) {
+ word = word.trim();
+ if (word.isEmpty() == false)
+ r.add(word);
+ }
+ } finally {
+ reader.close();
+ is.close();
+ }
+ return r;
+ }
+
+ @Test
+ public void testSuperLongStringValue() {
+ String longPrefix = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789";
+
+ TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter());
+ String v1 = longPrefix + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
+ String v2 = longPrefix + "xyz";
+
+ b.addValue(v1);
+ b.addValue(v2);
+ TrieDictionary<String> dict = b.build(0);
+ dict.dump(System.out);
+ }
+
+ @Test
+ public void testRounding() {
+ // see NumberDictionaryTest.testRounding();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7e8896ac/core-dictionary/src/test/java/org/apache/kylin/dict/lookup/LookupTableTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/lookup/LookupTableTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/lookup/LookupTableTest.java
new file mode 100644
index 0000000..bb4a717
--- /dev/null
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/lookup/LookupTableTest.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.dict.lookup;
+
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.metadata.MetadataManager;
+import org.apache.kylin.metadata.model.TableDesc;
+import org.apache.kylin.common.util.Pair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ */
+public class LookupTableTest extends LocalFileMetadataTestCase {
+
+ private KylinConfig config = null;
+
+ private LookupTable<String> lookupTable;
+
+ @Before
+ public void setup() throws Exception {
+ createTestMetadata();
+ config = KylinConfig.getInstanceFromEnv();
+ lookupTable = initLookupTable();
+ }
+
+ @After
+ public void tearDown() {
+ cleanupTestMetadata();
+ }
+
+ @Test
+ public void testScan() throws Exception {
+ List<String> values = new ArrayList<String>();
+ values.add("2012-01-24");
+ values.add("2012-12-30");
+ List<String> results = lookupTable.scan("CAL_DT", values, "YEAR_BEG_DT");
+
+ Assert.assertTrue(results.size() > 0);
+ for (String i : results) {
+ System.out.println(i);
+
+ Assert.assertEquals("2012-01-01", i);
+ }
+ }
+
+ @Test
+ public void testMapRange() throws Exception {
+ List<String> values = new ArrayList<String>();
+ values.add("2012-01-24");
+ values.add("2012-12-30");
+ Pair<String, String> results = lookupTable.mapRange("CAL_DT", "2012-01-24", "2012-12-30", "QTR_BEG_DT");
+
+
+ Assert.assertTrue(results != null);
+ System.out.println("The first qtr_beg_dt is " + results.getFirst());
+ System.out.println("The last qtr_beg_dt is " + results.getSecond());
+
+ Assert.assertEquals("2012-01-01", results.getFirst());
+ Assert.assertEquals( "2012-10-01" , results.getSecond());
+ }
+
+ @Test
+ public void testMapValues() throws Exception {
+ Set<String> values = new HashSet<String>();
+ values.add("2012-01-24");
+ values.add("2012-12-30");
+ Set<String> results = lookupTable.mapValues("CAL_DT", values, "YEAR_BEG_DT");
+
+ Assert.assertTrue(results.size() == 1);
+ for (String i : results) {
+ System.out.println(i);
+
+ Assert.assertEquals("2012-01-01", i);
+ }
+ }
+
+ public LookupTable<String> initLookupTable() throws Exception {
+
+
+ MetadataManager metaMgr = MetadataManager.getInstance(config);
+
+
+ String tableName = "EDW.TEST_CAL_DT";
+ String[] pkCols = new String[]{"CAL_DT"};
+ String snapshotResPath = "/table_snapshot/TEST_CAL_DT.csv/4af48c94-86de-4e22-a4fd-c49b06cbaa4f.snapshot";
+ SnapshotTable snapshot = getSnapshotManager().getSnapshotTable(snapshotResPath);
+ TableDesc tableDesc = metaMgr.getTableDesc(tableName);
+ LookupTable<String> lt = new LookupStringTable(tableDesc, pkCols, snapshot);
+
+ System.out.println(lt);
+
+
+ return lt;
+ }
+
+ private SnapshotManager getSnapshotManager() {
+ return SnapshotManager.getInstance(config);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7e8896ac/core-dictionary/src/test/resources/dict/DW_SITES
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/resources/dict/DW_SITES b/core-dictionary/src/test/resources/dict/DW_SITES
new file mode 100644
index 0000000..6f6af46
Binary files /dev/null and b/core-dictionary/src/test/resources/dict/DW_SITES differ