You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2015/06/29 09:13:45 UTC

[1/2] incubator-kylin git commit: KYLIN-839 use dictionary in building snapshot

Repository: incubator-kylin
Updated Branches:
  refs/heads/KYLIN-839 b97d0e550 -> 9a467fd3f


KYLIN-839 use dictionary in building snapshot

Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/08c8e910
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/08c8e910
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/08c8e910

Branch: refs/heads/KYLIN-839
Commit: 08c8e9103fa757330d8e4848552e563be9bcbf2e
Parents: b97d0e5
Author: shaofengshi <sh...@apache.org>
Authored: Mon Jun 29 10:52:49 2015 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Mon Jun 29 10:52:49 2015 +0800

----------------------------------------------------------------------
 .../java/org/apache/kylin/dict/lookup/SnapshotTable.java     | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/08c8e910/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java b/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
index 35cbb84..4764dd0 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
@@ -49,7 +49,7 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
     @JsonProperty("column_delimeter")
     private String columnDelimeter;
     @JsonProperty("useDictionary")
-    private Boolean useDictionary;
+    private boolean useDictionary;
 
     private ArrayList<String[]> rows;
 
@@ -60,7 +60,7 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
     SnapshotTable(ReadableTable table) throws IOException {
         this.signature = table.getSignature();
         this.columnDelimeter = table.getColumnDelimeter();
-        this.useDictionary = Boolean.TRUE;
+        this.useDictionary = true;
     }
 
     public void takeSnapshot(ReadableTable table, TableDesc tableDesc) throws IOException {
@@ -162,7 +162,7 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
             int n = rows.get(0).length;
             out.writeInt(n);
 
-            if (this.useDictionary == Boolean.TRUE) {
+            if (this.useDictionary == true) {
                 Dictionary<String> dict = buildDictionary();
                 dict.write(out);
                 for (int i = 0; i < rows.size(); i++) {
@@ -199,7 +199,7 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
         rows = new ArrayList<String[]>(rowNum);
         if (rowNum > 0) {
             int n = in.readInt();
-            if (this.useDictionary == Boolean.TRUE) {
+            if (this.useDictionary == true) {
                 Dictionary<String> dict = new TrieDictionary<String>();
                 dict.readFields(in);
                 for (int i = 0; i < rowNum; i++) {


[2/2] incubator-kylin git commit: KYLIN-839 saving memory for snapshot tables

Posted by sh...@apache.org.
KYLIN-839 saving memory for snapshot tables

Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/9a467fd3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/9a467fd3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/9a467fd3

Branch: refs/heads/KYLIN-839
Commit: 9a467fd3f080a8d22d5b57bb397ceec58c0bd808
Parents: 08c8e91
Author: shaofengshi <sh...@apache.org>
Authored: Mon Jun 29 15:13:34 2015 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Mon Jun 29 15:13:34 2015 +0800

----------------------------------------------------------------------
 .../apache/kylin/dict/lookup/SnapshotTable.java | 117 +++++++++++--------
 1 file changed, 71 insertions(+), 46 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a467fd3/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java b/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
index 4764dd0..130a4c2 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
@@ -18,18 +18,11 @@
 
 package org.apache.kylin.dict.lookup;
 
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.hadoop.fs.Path;
-
 import com.fasterxml.jackson.annotation.JsonAutoDetect;
 import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
 import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.fs.Path;
 import org.apache.kylin.common.persistence.ResourceStore;
 import org.apache.kylin.common.persistence.RootPersistentEntity;
 import org.apache.kylin.dict.Dictionary;
@@ -38,6 +31,13 @@ import org.apache.kylin.dict.TrieDictionary;
 import org.apache.kylin.dict.TrieDictionaryBuilder;
 import org.apache.kylin.metadata.model.TableDesc;
 
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
 /**
  * @author yangli9
  */
@@ -51,7 +51,8 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
     @JsonProperty("useDictionary")
     private boolean useDictionary;
 
-    private ArrayList<String[]> rows;
+    private ArrayList<int[]> rowIndices;
+    private Dictionary<String> dict;
 
     // default constructor for JSON serialization
     public SnapshotTable() {
@@ -69,16 +70,33 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
 
         int maxIndex = tableDesc.getMaxColumnIndex();
 
+        TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter());
+
         TableReader reader = table.getReader();
-        ArrayList<String[]> allRows = new ArrayList<String[]>();
         while (reader.next()) {
             String[] row = reader.getRow();
             if (row.length <= maxIndex) {
                 throw new IllegalStateException("Bad hive table row, " + tableDesc + " expect " + (maxIndex + 1) + " columns, but got " + Arrays.toString(row));
             }
-            allRows.add(row);
+
+            for (String cell : row) {
+                b.addValue(cell);
+            }
         }
-        this.rows = allRows;
+
+        this.dict = b.build(0);
+
+        reader = table.getReader();
+        ArrayList<int[]> allRowIndices = new ArrayList<int[]>();
+        while (reader.next()) {
+            String[] row = reader.getRow();
+            int[] rowIndex = new int[row.length];
+            for (int i = 0; i < row.length; i++) {
+                rowIndex[i] = dict.getIdFromValue(row[i]);
+            }
+            allRowIndices.add(rowIndex);
+        }
+        this.rowIndices = allRowIndices;
     }
 
     public String getResourcePath() {
@@ -98,12 +116,17 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
             @Override
             public boolean next() throws IOException {
                 i++;
-                return i < rows.size();
+                return i < rowIndices.size();
             }
 
             @Override
             public String[] getRow() {
-                return rows.get(i);
+                int[] rowIndex = rowIndices.get(i);
+                String[] row = new String[rowIndex.length];
+                for (int x = 0; x < row.length; x++) {
+                    row[x] = dict.getValueFromId(rowIndex[x]);
+                }
+                return row;
             }
 
             @Override
@@ -134,9 +157,9 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
      */
     @Override
     public int hashCode() {
-        int[] parts = new int[this.rows.size()];
+        int[] parts = new int[this.rowIndices.size()];
         for (int i = 0; i < parts.length; ++i)
-            parts[i] = Arrays.hashCode(this.rows.get(i));
+            parts[i] = Arrays.hashCode(this.rowIndices.get(i));
         return Arrays.hashCode(parts);
     }
 
@@ -147,76 +170,78 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
         SnapshotTable that = (SnapshotTable) o;
 
         //compare row by row
-        if (this.rows.size() != that.rows.size())
+        if (this.rowIndices.size() != that.rowIndices.size())
             return false;
-        for (int i = 0; i < this.rows.size(); ++i) {
-            if (!ArrayUtils.isEquals(this.rows.get(i), that.rows.get(i)))
+        for (int i = 0; i < this.rowIndices.size(); ++i) {
+            if (!ArrayUtils.isEquals(this.rowIndices.get(i), that.rowIndices.get(i)))
                 return false;
         }
         return true;
     }
 
     void writeData(DataOutput out) throws IOException {
-        out.writeInt(rows.size());
-        if (rows.size() > 0) {
-            int n = rows.get(0).length;
+        out.writeInt(rowIndices.size());
+        if (rowIndices.size() > 0) {
+            int n = rowIndices.get(0).length;
             out.writeInt(n);
 
             if (this.useDictionary == true) {
-                Dictionary<String> dict = buildDictionary();
                 dict.write(out);
-                for (int i = 0; i < rows.size(); i++) {
-                    String[] row = rows.get(i);
+                for (int i = 0; i < rowIndices.size(); i++) {
+                    int[] row = rowIndices.get(i);
                     for (int j = 0; j < n; j++) {
-                        out.writeInt(dict.getIdFromValue(row[j]));
+                        out.writeInt(row[j]);
                     }
                 }
 
             } else {
-                for (int i = 0; i < rows.size(); i++) {
-                    String[] row = rows.get(i);
+                for (int i = 0; i < rowIndices.size(); i++) {
+                    int[] row = rowIndices.get(i);
                     for (int j = 0; j < n; j++) {
-                        out.writeUTF(row[j]);
+                        out.writeUTF(dict.getValueFromId(row[j]));
                     }
                 }
             }
         }
     }
 
-    Dictionary<String> buildDictionary() {
-        TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter());
-        for (String[] row : rows) {
-            for (String cell : row) {
-                b.addValue(cell);
-            }
-        }
-        TrieDictionary<String> dict = b.build(0);
-        return dict;
-    }
-
     void readData(DataInput in) throws IOException {
         int rowNum = in.readInt();
-        rows = new ArrayList<String[]>(rowNum);
         if (rowNum > 0) {
             int n = in.readInt();
             if (this.useDictionary == true) {
-                Dictionary<String> dict = new TrieDictionary<String>();
+                this.dict = new TrieDictionary<String>();
                 dict.readFields(in);
+
                 for (int i = 0; i < rowNum; i++) {
-                    String[] row = new String[n];
-                    rows.add(row);
+                    int[] row = new int[n];
+                    this.rowIndices.add(row);
                     for (int j = 0; j < n; j++) {
-                        row[j] = dict.getValueFromId(in.readInt());
+                        row[j] = in.readInt();
                     }
                 }
             } else {
+                List<String[]> rows = new ArrayList<String[]>(rowNum);
+                ArrayList<int[]> allRowIndices = new ArrayList<int[]>();
+                TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter());
+
                 for (int i = 0; i < rowNum; i++) {
                     String[] row = new String[n];
                     rows.add(row);
                     for (int j = 0; j < n; j++) {
                         row[j] = in.readUTF();
+                        b.addValue(row[j]);
+                    }
+                }
+                this.dict = b.build(0);
+                for (String[] row : rows) {
+                    int[] rowIndex = new int[n];
+                    for (int i = 0; i < n; i++) {
+                        rowIndex[i] = dict.getIdFromValue(row[i]);
                     }
+                    allRowIndices.add(rowIndex);
                 }
+                this.rowIndices = allRowIndices;
             }
         }
     }