You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by ma...@apache.org on 2015/06/24 15:23:30 UTC

[3/4] incubator-kylin git commit: hot fix: filter evaluation performance issue caused by TableDesc

hot fix: filter evaluation performance issue caused by TableDesc


Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/e0733c96
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/e0733c96
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/e0733c96

Branch: refs/heads/0.7-staging
Commit: e0733c96db5ed908ecb2fead8067beb42d0fd445
Parents: e8132b7
Author: honma <ho...@ebay.com>
Authored: Wed Jun 24 21:20:25 2015 +0800
Committer: honma <ho...@ebay.com>
Committed: Wed Jun 24 21:20:25 2015 +0800

----------------------------------------------------------------------
 .../apache/kylin/metadata/model/TableDesc.java  |  13 +-
 .../org/apache/kylin/storage/tuple/Tuple.java   |  10 +-
 .../kylin/storage/filter/FilterPerfTest.java    | 132 +++++++++++++++++++
 3 files changed, 146 insertions(+), 9 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/e0733c96/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
----------------------------------------------------------------------
diff --git a/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java b/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
index 14ea390..30c4d76 100644
--- a/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
+++ b/metadata/src/main/java/org/apache/kylin/metadata/model/TableDesc.java
@@ -18,9 +18,6 @@
 
 package org.apache.kylin.metadata.model;
 
-import java.util.Arrays;
-import java.util.Comparator;
-
 import com.fasterxml.jackson.annotation.JsonAutoDetect;
 import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
 import com.fasterxml.jackson.annotation.JsonProperty;
@@ -28,6 +25,9 @@ import org.apache.kylin.common.persistence.ResourceStore;
 import org.apache.kylin.common.persistence.RootPersistentEntity;
 import org.apache.kylin.common.util.StringSplitter;
 
+import java.util.Arrays;
+import java.util.Comparator;
+
 /**
  * Table Metadata from Source. All name should be uppercase.
  * <p/>
@@ -43,6 +43,8 @@ public class TableDesc extends RootPersistentEntity {
 
     private DatabaseDesc database;
 
+    private String identity = null;
+
     public ColumnDesc findColumnByName(String name) {
         //ignore the db name and table name if exists
         int lastIndexOfDot = name.lastIndexOf(".");
@@ -73,7 +75,10 @@ public class TableDesc extends RootPersistentEntity {
     }
     
     public String getIdentity() {
-        return String.format("%s.%s", this.getDatabase().toUpperCase(), this.getName()).toUpperCase();
+        if (identity == null) {
+            identity = String.format("%s.%s", this.getDatabase().toUpperCase(), this.getName()).toUpperCase();
+        }
+        return identity;
     }
     
     public static String concatResourcePath(String tableIdentity) {

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/e0733c96/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java
----------------------------------------------------------------------
diff --git a/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java b/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java
index 5023580..92e5174 100644
--- a/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java
+++ b/storage/src/main/java/org/apache/kylin/storage/tuple/Tuple.java
@@ -18,10 +18,6 @@
 
 package org.apache.kylin.storage.tuple;
 
-import java.math.BigDecimal;
-import java.util.Date;
-import java.util.List;
-
 import org.apache.kylin.common.util.Array;
 import org.apache.kylin.common.util.DateFormat;
 import org.apache.kylin.cube.CubeManager;
@@ -31,6 +27,10 @@ import org.apache.kylin.dict.lookup.LookupStringTable;
 import org.apache.kylin.metadata.model.TblColRef;
 import org.apache.kylin.metadata.tuple.ITuple;
 
+import java.math.BigDecimal;
+import java.util.Date;
+import java.util.List;
+
 /**
  * @author xjiang
  */
@@ -82,7 +82,7 @@ public class Tuple implements ITuple {
         return info.getDataType(fieldName);
     }
 
-    private void setFieldObjectValue(String fieldName, Object fieldValue) {
+    public void setFieldObjectValue(String fieldName, Object fieldValue) {
         int index = info.getFieldIndex(fieldName);
         values[index] = fieldValue;
     }

http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/e0733c96/storage/src/test/java/org/apache/kylin/storage/filter/FilterPerfTest.java
----------------------------------------------------------------------
diff --git a/storage/src/test/java/org/apache/kylin/storage/filter/FilterPerfTest.java b/storage/src/test/java/org/apache/kylin/storage/filter/FilterPerfTest.java
new file mode 100644
index 0000000..e8826cd
--- /dev/null
+++ b/storage/src/test/java/org/apache/kylin/storage/filter/FilterPerfTest.java
@@ -0,0 +1,132 @@
+package org.apache.kylin.storage.filter;
+
+import com.google.common.collect.Iterables;
+import com.google.common.collect.Lists;
+import com.google.common.io.Files;
+import org.apache.kylin.common.KylinConfig;
+import org.apache.kylin.common.util.LocalFileMetadataTestCase;
+import org.apache.kylin.metadata.MetadataManager;
+import org.apache.kylin.metadata.filter.*;
+import org.apache.kylin.metadata.model.ColumnDesc;
+import org.apache.kylin.metadata.model.TableDesc;
+import org.apache.kylin.metadata.model.TblColRef;
+import org.apache.kylin.storage.tuple.Tuple;
+import org.apache.kylin.storage.tuple.TupleInfo;
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.io.File;
+import java.io.IOException;
+import java.nio.charset.Charset;
+import java.util.Iterator;
+import java.util.List;
+
+/**
+ */
+public class FilterPerfTest extends LocalFileMetadataTestCase {
+    @Before
+    public void setUp() throws Exception {
+        this.createTestMetadata();
+    }
+
+    @After
+    public void after() throws Exception {
+        this.cleanupTestMetadata();
+    }
+
+    @Test
+    public void foo() throws IOException, InterruptedException {
+        KylinConfig kylinConfig = KylinConfig.getInstanceFromEnv();
+        TableDesc tableDesc = MetadataManager.getInstance(kylinConfig).getTableDesc("DEFAULT.TEST_KYLIN_FACT");
+        TblColRef format = new TblColRef(tableDesc.findColumnByName("LSTG_FORMAT_NAME"));
+        TblColRef categ = new TblColRef(tableDesc.findColumnByName("LEAF_CATEG_ID"));
+        TblColRef site = new TblColRef(tableDesc.findColumnByName("LSTG_SITE_ID"));
+
+        List<TupleFilter> ands = Lists.newArrayList();
+        ands.add(buildOrFilterWithMultipleValues(format, Lists.newArrayList("Auction", "FP-GTC", "Others")));
+        ands.add(buildOrFilterWithMultipleValues(categ, Lists.newArrayList("48027", "164261", "113802", "118687")));
+        ands.add(buildOrFilterWithMultipleValues(site, Lists.newArrayList("0", "15", "3")));
+        TupleFilter filter = buildAndFilter(ands);
+
+        TupleInfo info = new TupleInfo();
+        ColumnDesc[] columns = tableDesc.getColumns();
+        for (int i = 0; i < columns.length; i++) {
+            ColumnDesc column = columns[i];
+            info.setField(column.getName(), new TblColRef(column), column.getDatatype(), i);
+        }
+
+        List<String> lines = Files.readLines(new File("../examples/test_case_data/localmeta/data/DEFAULT.TEST_KYLIN_FACT.csv"), Charset.defaultCharset());
+        List<Tuple> tuples = Lists.newArrayList();
+
+        for (String line : lines) {
+            String[] tokens = line.split(",");
+            if (tokens.length != columns.length) {
+                System.out.println("invalid line");
+                continue;
+            }
+            Tuple t = new Tuple(info);
+            for (int k = 0; k < columns.length; k++) {
+                ColumnDesc column = columns[k];
+                t.setFieldObjectValue(column.getName(), tokens[k]);
+            }
+            tuples.add(t);
+        }
+
+        for (int i = 0; i < 5; i++) {
+            Thread.sleep(1000);
+            System.out.println(i);
+        }
+
+        Iterator<Tuple> itr = Iterables.cycle(tuples).iterator();
+        int TOTAL_LOOP = 1000000;
+        int loopCount = 0;
+        int matchCount = 0;
+        long startTime = System.currentTimeMillis();
+        while (itr.hasNext()) {
+            if (filter.evaluate(itr.next())) {
+                matchCount++;
+            }
+
+            if (++loopCount > TOTAL_LOOP) {
+                break;
+            }
+        }
+        System.out.println("Total match count: " + matchCount);
+        System.out.println("ellapsed time: " + (System.currentTimeMillis() - startTime));
+    }
+
+    private TupleFilter buildOrFilterWithMultipleValues(TblColRef column, List<String> values) {
+        List<TupleFilter> ors = Lists.newArrayList();
+        for (String v : values) {
+            ors.add(buildCompareFilter(column, v));
+        }
+        return buildOrFilter(ors);
+    }
+
+    protected CompareTupleFilter buildCompareFilter(TblColRef column, String v) {
+        CompareTupleFilter compareFilter = new CompareTupleFilter(TupleFilter.FilterOperatorEnum.EQ);
+        ColumnTupleFilter columnFilter = new ColumnTupleFilter(column);
+        compareFilter.addChild(columnFilter);
+        ConstantTupleFilter constantFilter = new ConstantTupleFilter(v);
+        compareFilter.addChild(constantFilter);
+        return compareFilter;
+    }
+
+    protected TupleFilter buildAndFilter(List<TupleFilter> ands) {
+        LogicalTupleFilter andFilter = new LogicalTupleFilter(TupleFilter.FilterOperatorEnum.AND);
+        for (TupleFilter x : ands) {
+            andFilter.addChild(x);
+        }
+        return andFilter;
+    }
+
+    protected TupleFilter buildOrFilter(List<TupleFilter> ors) {
+        LogicalTupleFilter orFilter = new LogicalTupleFilter(TupleFilter.FilterOperatorEnum.OR);
+        for (TupleFilter x : ors) {
+            orFilter.addChild(x);
+        }
+        return orFilter;
+    }
+
+}