You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by li...@apache.org on 2015/07/22 06:13:14 UTC

[24/47] incubator-kylin git commit: KYLIN-875 rename modules: core-common, core-cube, core-dictionary, core-cube

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7e8896ac/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
new file mode 100644
index 0000000..f6031e8
--- /dev/null
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/TrieDictionaryTest.java
@@ -0,0 +1,353 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.dict;
+
+import org.junit.Test;
+
+import java.io.*;
+import java.util.*;
+
+import static org.junit.Assert.*;
+
+public class TrieDictionaryTest {
+
+    public static void main(String[] args) throws Exception {
+        InputStream is = new FileInputStream("src/test/resources/dict/dw_category_grouping_names.dat");
+        // InputStream is =
+        // Util.getPackageResourceAsStream(TrieDictionaryTest.class,
+        // "eng_com.dic");
+        ArrayList<String> str = loadStrings(is);
+        benchmarkStringDictionary(str);
+    }
+
+    @Test
+    public void partOverflowTest() {
+        ArrayList<String> str = new ArrayList<String>();
+        // str.add("");
+        str.add("part");
+        str.add("par");
+        str.add("partition");
+        str.add("party");
+        str.add("parties");
+        str.add("paint");
+        String longStr = "paintjkjdfklajkdljfkdsajklfjklsadjkjekjrklewjrklewjklrjklewjkljkljkljkljweklrjewkljrklewjrlkjewkljrkljkljkjlkjjkljkljkljkljlkjlkjlkjljdfadfads" + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk" + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk" + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk" + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk" + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk"
+                + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk" + "dddddddddddddddddddddddddddddddddddddddddddddddddkfjadslkfjdsakljflksadjklfjklsjfkljwelkrjewkljrklewjklrjelkwjrklewjrlkjwkljerklkljlkjrlkwejrk";
+        System.out.println("The length of the long string is " + longStr.length());
+        str.add(longStr);
+
+        str.add("zzzzzz" + longStr);// another long string
+
+        TrieDictionaryBuilder<String> b = newDictBuilder(str);
+        TrieDictionary<String> dict = b.build(0);
+
+        TreeSet<String> set = new TreeSet<String>();
+        for (String s : str) {
+            set.add(s);
+        }
+
+        // test serialize
+        dict = testSerialize(dict);
+
+        // test basic id<==>value
+        Iterator<String> it = set.iterator();
+        int id = 0;
+        int previousId = -1;
+        for (; it.hasNext(); id++) {
+            String value = it.next();
+
+            // in case of overflow parts, there exist interpolation nodes
+            // they exist to make sure that any node's part is shorter than 255
+            int actualId = dict.getIdFromValue(value);
+            assertTrue(actualId >= id);
+            assertTrue(actualId > previousId);
+            previousId = actualId;
+
+            assertEquals(value, dict.getValueFromId(actualId));
+        }
+    }
+
+    @Test
+    public void emptyValueTest() {
+        ArrayList<String> str = new ArrayList<String>();
+        str.add("");
+        str.add("part");
+        str.add("par");
+        str.add("partition");
+        str.add("party");
+        str.add("parties");
+        str.add("paint");
+        testStringDictionary(str, null);
+    }
+
+    @Test
+    public void simpleTrieTest() {
+        ArrayList<String> str = new ArrayList<String>();
+        str.add("part");
+        str.add("part"); // meant to be dup
+        str.add("par");
+        str.add("partition");
+        str.add("party");
+        str.add("parties");
+        str.add("paint");
+
+        ArrayList<String> notFound = new ArrayList<String>();
+        notFound.add("");
+        notFound.add("p");
+        notFound.add("pa");
+        notFound.add("pb");
+        notFound.add("parti");
+        notFound.add("partz");
+        notFound.add("partyz");
+
+        testStringDictionary(str, notFound);
+    }
+
+    @Test
+    public void dictionaryContainTest()
+    {
+        ArrayList<String> str = new ArrayList<String>();
+        str.add("part");
+        str.add("part"); // meant to be dup
+        str.add("par");
+        str.add("partition");
+        str.add("party");
+        str.add("parties");
+        str.add("paint");
+
+        TrieDictionaryBuilder<String> b = newDictBuilder(str);
+        int baseId = new Random().nextInt(100);
+        TrieDictionary<String> dict = b.build(baseId);
+
+        str.add("py");
+        b = newDictBuilder(str);
+        baseId = new Random().nextInt(100);
+        TrieDictionary<String> dict2 = b.build(baseId);
+
+        assertEquals(true,dict.containedBy(dict2));
+        assertEquals(false,dict2.containedBy(dict));
+    }
+
+
+    @Test
+    public void englishWordsTest() throws Exception {
+        InputStream is = new FileInputStream("src/test/resources/dict/english-words.80 (scowl-2015.05.18).txt");
+        ArrayList<String> str = loadStrings(is);
+        testStringDictionary(str, null);
+    }
+
+    @Test
+    public void categoryNamesTest() throws Exception {
+        InputStream is = new FileInputStream("src/test/resources/dict/dw_category_grouping_names.dat");
+        ArrayList<String> str = loadStrings(is);
+        testStringDictionary(str, null);
+    }
+
+    private static void benchmarkStringDictionary(ArrayList<String> str) throws UnsupportedEncodingException {
+        TrieDictionaryBuilder<String> b = newDictBuilder(str);
+        b.stats().print();
+        TrieDictionary<String> dict = b.build(0);
+
+        TreeSet<String> set = new TreeSet<String>();
+        for (String s : str) {
+            set.add(s);
+        }
+
+        // prepare id==>value array and value==>id map
+        HashMap<String, Integer> map = new HashMap<String, Integer>();
+        String[] strArray = new String[set.size()];
+        byte[][] array = new byte[set.size()][];
+        Iterator<String> it = set.iterator();
+        for (int id = 0; it.hasNext(); id++) {
+            String value = it.next();
+            map.put(value, id);
+            strArray[id] = value;
+            array[id] = value.getBytes("UTF-8");
+        }
+
+        // System.out.println("Dict size in bytes:  " +
+        // MemoryUtil.deepMemoryUsageOf(dict));
+        // System.out.println("Map size in bytes:   " +
+        // MemoryUtil.deepMemoryUsageOf(map));
+        // System.out.println("Array size in bytes: " +
+        // MemoryUtil.deepMemoryUsageOf(strArray));
+
+        // warm-up, said that code only got JIT after run 1k-10k times,
+        // following jvm options may help
+        // -XX:CompileThreshold=1500
+        // -XX:+PrintCompilation
+        benchmark("Warm up", dict, set, map, strArray, array);
+        benchmark("Benchmark", dict, set, map, strArray, array);
+    }
+
+    private static int benchmark(String msg, TrieDictionary<String> dict, TreeSet<String> set, HashMap<String, Integer> map, String[] strArray, byte[][] array) {
+        int n = set.size();
+        int times = 10 * 1000 * 1000 / n; // run 10 million lookups
+        int keep = 0; // make sure JIT don't OPT OUT function calls under test
+        byte[] valueBytes = new byte[dict.getSizeOfValue()];
+        long start;
+
+        // benchmark value==>id, via HashMap
+        System.out.println(msg + " HashMap lookup value==>id");
+        start = System.currentTimeMillis();
+        for (int i = 0; i < times; i++) {
+            for (int j = 0; j < n; j++) {
+                keep |= map.get(strArray[j]);
+            }
+        }
+        long timeValueToIdByMap = System.currentTimeMillis() - start;
+        System.out.println(timeValueToIdByMap);
+
+        // benchmark value==>id, via Dict
+        System.out.println(msg + " Dictionary lookup value==>id");
+        start = System.currentTimeMillis();
+        for (int i = 0; i < times; i++) {
+            for (int j = 0; j < n; j++) {
+                keep |= dict.getIdFromValueBytes(array[j], 0, array[j].length);
+            }
+        }
+        long timeValueToIdByDict = System.currentTimeMillis() - start;
+        System.out.println(timeValueToIdByDict);
+
+        // benchmark id==>value, via Array
+        System.out.println(msg + " Array lookup id==>value");
+        start = System.currentTimeMillis();
+        for (int i = 0; i < times; i++) {
+            for (int j = 0; j < n; j++) {
+                keep |= strArray[j].length();
+            }
+        }
+        long timeIdToValueByArray = System.currentTimeMillis() - start;
+        System.out.println(timeIdToValueByArray);
+
+        // benchmark id==>value, via Dict
+        System.out.println(msg + " Dictionary lookup id==>value");
+        start = System.currentTimeMillis();
+        for (int i = 0; i < times; i++) {
+            for (int j = 0; j < n; j++) {
+                keep |= dict.getValueBytesFromId(j, valueBytes, 0);
+            }
+        }
+        long timeIdToValueByDict = System.currentTimeMillis() - start;
+        System.out.println(timeIdToValueByDict);
+
+        return keep;
+    }
+
+    private static void testStringDictionary(ArrayList<String> str, ArrayList<String> notFound) {
+        TrieDictionaryBuilder<String> b = newDictBuilder(str);
+        int baseId = new Random().nextInt(100);
+        TrieDictionary<String> dict = b.build(baseId);
+
+        TreeSet<String> set = new TreeSet<String>();
+        for (String s : str) {
+            set.add(s);
+        }
+
+        // test serialize
+        dict = testSerialize(dict);
+
+        // test basic id<==>value
+        Iterator<String> it = set.iterator();
+        int id = baseId;
+        for (; it.hasNext(); id++) {
+            String value = it.next();
+            // System.out.println("checking " + id + " <==> " + value);
+
+            assertEquals(id, dict.getIdFromValue(value));
+            assertEquals(value, dict.getValueFromId(id));
+        }
+        if (notFound != null) {
+            for (String s : notFound) {
+                try {
+                    dict.getIdFromValue(s);
+                    fail("For not found value '" + s + "', IllegalArgumentException is expected");
+                } catch (IllegalArgumentException e) {
+                    // good
+                }
+            }
+        }
+
+        // test null value
+        int nullId = dict.getIdFromValue(null);
+        assertNull(dict.getValueFromId(nullId));
+        int nullId2 = dict.getIdFromValueBytes(null, 0, 0);
+        assertEquals(dict.getValueBytesFromId(nullId2, null, 0), -1);
+        assertEquals(nullId, nullId2);
+    }
+
+    private static TrieDictionary<String> testSerialize(TrieDictionary<String> dict) {
+        try {
+            ByteArrayOutputStream bout = new ByteArrayOutputStream();
+            DataOutputStream dataout = new DataOutputStream(bout);
+            dict.write(dataout);
+            dataout.close();
+            ByteArrayInputStream bin = new ByteArrayInputStream(bout.toByteArray());
+            DataInputStream datain = new DataInputStream(bin);
+            TrieDictionary<String> r = new TrieDictionary<String>();
+            r.readFields(datain);
+            datain.close();
+            return r;
+        } catch (IOException e) {
+            throw new RuntimeException(e);
+        }
+    }
+
+    private static TrieDictionaryBuilder<String> newDictBuilder(ArrayList<String> str) {
+        TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter());
+        for (String s : str)
+            b.addValue(s);
+        return b;
+    }
+
+    private static ArrayList<String> loadStrings(InputStream is) throws Exception {
+        ArrayList<String> r = new ArrayList<String>();
+        BufferedReader reader = new BufferedReader(new InputStreamReader(is, "UTF-8"));
+        try {
+            String word;
+            while ((word = reader.readLine()) != null) {
+                word = word.trim();
+                if (word.isEmpty() == false)
+                    r.add(word);
+            }
+        } finally {
+            reader.close();
+            is.close();
+        }
+        return r;
+    }
+
+    @Test
+    public void testSuperLongStringValue() {
+        String longPrefix = "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789" + "0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789";
+
+        TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter());
+        String v1 = longPrefix + "abcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyzabcdefghijklmnopqrstuvwxyz";
+        String v2 = longPrefix + "xyz";
+
+        b.addValue(v1);
+        b.addValue(v2);
+        TrieDictionary<String> dict = b.build(0);
+        dict.dump(System.out);
+    }
+
+    @Test
+    public void testRounding() {
+        // see NumberDictionaryTest.testRounding();
+    }
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7e8896ac/core-dictionary/src/test/java/org/apache/kylin/dict/lookup/LookupTableTest.java
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/java/org/apache/kylin/dict/lookup/LookupTableTest.java b/core-dictionary/src/test/java/org/apache/kylin/dict/lookup/LookupTableTest.java
new file mode 100644
index 0000000..bb4a717
--- /dev/null
+++ b/core-dictionary/src/test/java/org/apache/kylin/dict/lookup/LookupTableTest.java
@@ -0,0 +1,125 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+*/
+
+package org.apache.kylin.dict.lookup;
+
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.metadata.MetadataManager;
+import org.apache.kylin.metadata.model.TableDesc;
+import org.apache.kylin.common.util.Pair;
+import org.junit.After;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Set;
+
+/**
+ */
+public class LookupTableTest extends LocalFileMetadataTestCase {
+
+    private KylinConfig config = null;
+
+    private LookupTable<String> lookupTable;
+
+    @Before
+    public void setup() throws Exception {
+        createTestMetadata();
+        config = KylinConfig.getInstanceFromEnv();
+        lookupTable = initLookupTable();
+    }
+
+    @After
+    public void tearDown() {
+        cleanupTestMetadata();
+    }
+
+    @Test
+    public void testScan() throws Exception {
+        List<String> values = new ArrayList<String>();
+        values.add("2012-01-24");
+        values.add("2012-12-30");
+        List<String> results = lookupTable.scan("CAL_DT", values, "YEAR_BEG_DT");
+
+        Assert.assertTrue(results.size() > 0);
+        for (String i : results) {
+            System.out.println(i);
+
+            Assert.assertEquals("2012-01-01", i);
+        }
+    }
+
+    @Test
+    public void testMapRange() throws Exception {
+        List<String> values = new ArrayList<String>();
+        values.add("2012-01-24");
+        values.add("2012-12-30");
+        Pair<String, String> results = lookupTable.mapRange("CAL_DT", "2012-01-24", "2012-12-30", "QTR_BEG_DT");
+
+
+        Assert.assertTrue(results != null);
+        System.out.println("The first qtr_beg_dt is " + results.getFirst());
+        System.out.println("The last qtr_beg_dt is " + results.getSecond());
+
+        Assert.assertEquals("2012-01-01", results.getFirst());
+        Assert.assertEquals( "2012-10-01" , results.getSecond());
+    }
+
+    @Test
+    public void testMapValues() throws Exception {
+        Set<String> values = new HashSet<String>();
+        values.add("2012-01-24");
+        values.add("2012-12-30");
+        Set<String> results = lookupTable.mapValues("CAL_DT", values, "YEAR_BEG_DT");
+
+        Assert.assertTrue(results.size() == 1);
+        for (String i : results) {
+            System.out.println(i);
+
+            Assert.assertEquals("2012-01-01", i);
+        }
+    }
+
+    public LookupTable<String> initLookupTable() throws Exception {
+
+
+        MetadataManager metaMgr = MetadataManager.getInstance(config);
+
+
+        String tableName = "EDW.TEST_CAL_DT";
+        String[] pkCols = new String[]{"CAL_DT"};
+        String snapshotResPath = "/table_snapshot/TEST_CAL_DT.csv/4af48c94-86de-4e22-a4fd-c49b06cbaa4f.snapshot";
+        SnapshotTable snapshot = getSnapshotManager().getSnapshotTable(snapshotResPath);
+        TableDesc tableDesc = metaMgr.getTableDesc(tableName);
+        LookupTable<String> lt = new LookupStringTable(tableDesc, pkCols, snapshot);
+
+        System.out.println(lt);
+
+
+        return lt;
+    }
+
+    private SnapshotManager getSnapshotManager() {
+        return SnapshotManager.getInstance(config);
+    }
+
+}

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/7e8896ac/core-dictionary/src/test/resources/dict/DW_SITES
----------------------------------------------------------------------
diff --git a/core-dictionary/src/test/resources/dict/DW_SITES b/core-dictionary/src/test/resources/dict/DW_SITES
new file mode 100644
index 0000000..6f6af46
Binary files /dev/null and b/core-dictionary/src/test/resources/dict/DW_SITES differ