You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by sh...@apache.org on 2015/06/29 09:13:45 UTC
[1/2] incubator-kylin git commit: KYLIN-839 use dictionary in
building snapshot
Repository: incubator-kylin
Updated Branches:
refs/heads/KYLIN-839 b97d0e550 -> 9a467fd3f
KYLIN-839 use dictionary in building snapshot
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/08c8e910
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/08c8e910
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/08c8e910
Branch: refs/heads/KYLIN-839
Commit: 08c8e9103fa757330d8e4848552e563be9bcbf2e
Parents: b97d0e5
Author: shaofengshi <sh...@apache.org>
Authored: Mon Jun 29 10:52:49 2015 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Mon Jun 29 10:52:49 2015 +0800
----------------------------------------------------------------------
.../java/org/apache/kylin/dict/lookup/SnapshotTable.java | 8 ++++----
1 file changed, 4 insertions(+), 4 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/08c8e910/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java b/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
index 35cbb84..4764dd0 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
@@ -49,7 +49,7 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
@JsonProperty("column_delimeter")
private String columnDelimeter;
@JsonProperty("useDictionary")
- private Boolean useDictionary;
+ private boolean useDictionary;
private ArrayList<String[]> rows;
@@ -60,7 +60,7 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
SnapshotTable(ReadableTable table) throws IOException {
this.signature = table.getSignature();
this.columnDelimeter = table.getColumnDelimeter();
- this.useDictionary = Boolean.TRUE;
+ this.useDictionary = true;
}
public void takeSnapshot(ReadableTable table, TableDesc tableDesc) throws IOException {
@@ -162,7 +162,7 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
int n = rows.get(0).length;
out.writeInt(n);
- if (this.useDictionary == Boolean.TRUE) {
+ if (this.useDictionary == true) {
Dictionary<String> dict = buildDictionary();
dict.write(out);
for (int i = 0; i < rows.size(); i++) {
@@ -199,7 +199,7 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
rows = new ArrayList<String[]>(rowNum);
if (rowNum > 0) {
int n = in.readInt();
- if (this.useDictionary == Boolean.TRUE) {
+ if (this.useDictionary == true) {
Dictionary<String> dict = new TrieDictionary<String>();
dict.readFields(in);
for (int i = 0; i < rowNum; i++) {
[2/2] incubator-kylin git commit: KYLIN-839 saving memory for
snapshot tables
Posted by sh...@apache.org.
KYLIN-839 saving memory for snapshot tables
Project: http://git-wip-us.apache.org/repos/asf/incubator-kylin/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-kylin/commit/9a467fd3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-kylin/tree/9a467fd3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-kylin/diff/9a467fd3
Branch: refs/heads/KYLIN-839
Commit: 9a467fd3f080a8d22d5b57bb397ceec58c0bd808
Parents: 08c8e91
Author: shaofengshi <sh...@apache.org>
Authored: Mon Jun 29 15:13:34 2015 +0800
Committer: shaofengshi <sh...@apache.org>
Committed: Mon Jun 29 15:13:34 2015 +0800
----------------------------------------------------------------------
.../apache/kylin/dict/lookup/SnapshotTable.java | 117 +++++++++++--------
1 file changed, 71 insertions(+), 46 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/9a467fd3/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
----------------------------------------------------------------------
diff --git a/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java b/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
index 4764dd0..130a4c2 100644
--- a/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
+++ b/dictionary/src/main/java/org/apache/kylin/dict/lookup/SnapshotTable.java
@@ -18,18 +18,11 @@
package org.apache.kylin.dict.lookup;
-import java.io.DataInput;
-import java.io.DataOutput;
-import java.io.IOException;
-import java.util.ArrayList;
-import java.util.Arrays;
-
-import org.apache.commons.lang.ArrayUtils;
-import org.apache.hadoop.fs.Path;
-
import com.fasterxml.jackson.annotation.JsonAutoDetect;
import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
import com.fasterxml.jackson.annotation.JsonProperty;
+import org.apache.commons.lang.ArrayUtils;
+import org.apache.hadoop.fs.Path;
import org.apache.kylin.common.persistence.ResourceStore;
import org.apache.kylin.common.persistence.RootPersistentEntity;
import org.apache.kylin.dict.Dictionary;
@@ -38,6 +31,13 @@ import org.apache.kylin.dict.TrieDictionary;
import org.apache.kylin.dict.TrieDictionaryBuilder;
import org.apache.kylin.metadata.model.TableDesc;
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.List;
+
/**
* @author yangli9
*/
@@ -51,7 +51,8 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
@JsonProperty("useDictionary")
private boolean useDictionary;
- private ArrayList<String[]> rows;
+ private ArrayList<int[]> rowIndices;
+ private Dictionary<String> dict;
// default constructor for JSON serialization
public SnapshotTable() {
@@ -69,16 +70,33 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
int maxIndex = tableDesc.getMaxColumnIndex();
+ TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter());
+
TableReader reader = table.getReader();
- ArrayList<String[]> allRows = new ArrayList<String[]>();
while (reader.next()) {
String[] row = reader.getRow();
if (row.length <= maxIndex) {
throw new IllegalStateException("Bad hive table row, " + tableDesc + " expect " + (maxIndex + 1) + " columns, but got " + Arrays.toString(row));
}
- allRows.add(row);
+
+ for (String cell : row) {
+ b.addValue(cell);
+ }
}
- this.rows = allRows;
+
+ this.dict = b.build(0);
+
+ reader = table.getReader();
+ ArrayList<int[]> allRowIndices = new ArrayList<int[]>();
+ while (reader.next()) {
+ String[] row = reader.getRow();
+ int[] rowIndex = new int[row.length];
+ for (int i = 0; i < row.length; i++) {
+ rowIndex[i] = dict.getIdFromValue(row[i]);
+ }
+ allRowIndices.add(rowIndex);
+ }
+ this.rowIndices = allRowIndices;
}
public String getResourcePath() {
@@ -98,12 +116,17 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
@Override
public boolean next() throws IOException {
i++;
- return i < rows.size();
+ return i < rowIndices.size();
}
@Override
public String[] getRow() {
- return rows.get(i);
+ int[] rowIndex = rowIndices.get(i);
+ String[] row = new String[rowIndex.length];
+ for (int x = 0; x < row.length; x++) {
+ row[x] = dict.getValueFromId(rowIndex[x]);
+ }
+ return row;
}
@Override
@@ -134,9 +157,9 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
*/
@Override
public int hashCode() {
- int[] parts = new int[this.rows.size()];
+ int[] parts = new int[this.rowIndices.size()];
for (int i = 0; i < parts.length; ++i)
- parts[i] = Arrays.hashCode(this.rows.get(i));
+ parts[i] = Arrays.hashCode(this.rowIndices.get(i));
return Arrays.hashCode(parts);
}
@@ -147,76 +170,78 @@ public class SnapshotTable extends RootPersistentEntity implements ReadableTable
SnapshotTable that = (SnapshotTable) o;
//compare row by row
- if (this.rows.size() != that.rows.size())
+ if (this.rowIndices.size() != that.rowIndices.size())
return false;
- for (int i = 0; i < this.rows.size(); ++i) {
- if (!ArrayUtils.isEquals(this.rows.get(i), that.rows.get(i)))
+ for (int i = 0; i < this.rowIndices.size(); ++i) {
+ if (!ArrayUtils.isEquals(this.rowIndices.get(i), that.rowIndices.get(i)))
return false;
}
return true;
}
void writeData(DataOutput out) throws IOException {
- out.writeInt(rows.size());
- if (rows.size() > 0) {
- int n = rows.get(0).length;
+ out.writeInt(rowIndices.size());
+ if (rowIndices.size() > 0) {
+ int n = rowIndices.get(0).length;
out.writeInt(n);
if (this.useDictionary == true) {
- Dictionary<String> dict = buildDictionary();
dict.write(out);
- for (int i = 0; i < rows.size(); i++) {
- String[] row = rows.get(i);
+ for (int i = 0; i < rowIndices.size(); i++) {
+ int[] row = rowIndices.get(i);
for (int j = 0; j < n; j++) {
- out.writeInt(dict.getIdFromValue(row[j]));
+ out.writeInt(row[j]);
}
}
} else {
- for (int i = 0; i < rows.size(); i++) {
- String[] row = rows.get(i);
+ for (int i = 0; i < rowIndices.size(); i++) {
+ int[] row = rowIndices.get(i);
for (int j = 0; j < n; j++) {
- out.writeUTF(row[j]);
+ out.writeUTF(dict.getValueFromId(row[j]));
}
}
}
}
}
- Dictionary<String> buildDictionary() {
- TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter());
- for (String[] row : rows) {
- for (String cell : row) {
- b.addValue(cell);
- }
- }
- TrieDictionary<String> dict = b.build(0);
- return dict;
- }
-
void readData(DataInput in) throws IOException {
int rowNum = in.readInt();
- rows = new ArrayList<String[]>(rowNum);
if (rowNum > 0) {
int n = in.readInt();
if (this.useDictionary == true) {
- Dictionary<String> dict = new TrieDictionary<String>();
+ this.dict = new TrieDictionary<String>();
dict.readFields(in);
+
for (int i = 0; i < rowNum; i++) {
- String[] row = new String[n];
- rows.add(row);
+ int[] row = new int[n];
+ this.rowIndices.add(row);
for (int j = 0; j < n; j++) {
- row[j] = dict.getValueFromId(in.readInt());
+ row[j] = in.readInt();
}
}
} else {
+ List<String[]> rows = new ArrayList<String[]>(rowNum);
+ ArrayList<int[]> allRowIndices = new ArrayList<int[]>();
+ TrieDictionaryBuilder<String> b = new TrieDictionaryBuilder<String>(new StringBytesConverter());
+
for (int i = 0; i < rowNum; i++) {
String[] row = new String[n];
rows.add(row);
for (int j = 0; j < n; j++) {
row[j] = in.readUTF();
+ b.addValue(row[j]);
+ }
+ }
+ this.dict = b.build(0);
+ for (String[] row : rows) {
+ int[] rowIndex = new int[n];
+ for (int i = 0; i < n; i++) {
+ rowIndex[i] = dict.getIdFromValue(row[i]);
}
+ allRowIndices.add(rowIndex);
}
+ this.rowIndices = allRowIndices;
}
}
}