You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@kylin.apache.org by lu...@apache.org on 2015/01/07 15:47:03 UTC
[42/51] [partial] incubator-kylin git commit: migrate repo from
github.com to apache git
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/CubeSegment.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/CubeSegment.java b/cube/src/main/java/com/kylinolap/cube/CubeSegment.java
new file mode 100644
index 0000000..744ee9a
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/CubeSegment.java
@@ -0,0 +1,307 @@
+/*
+ * Copyright 2013-2014 eBay Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kylinolap.cube;
+
+import java.text.SimpleDateFormat;
+import java.util.Collection;
+import java.util.Map;
+import java.util.TimeZone;
+import java.util.concurrent.ConcurrentHashMap;
+
+import com.fasterxml.jackson.annotation.JsonAutoDetect;
+import com.fasterxml.jackson.annotation.JsonAutoDetect.Visibility;
+import com.fasterxml.jackson.annotation.JsonBackReference;
+import com.fasterxml.jackson.annotation.JsonProperty;
+import com.google.common.base.Objects;
+import com.kylinolap.metadata.model.cube.CubeDesc;
+import com.kylinolap.metadata.model.cube.TblColRef;
+
+@JsonAutoDetect(fieldVisibility = Visibility.NONE, getterVisibility = Visibility.NONE, isGetterVisibility = Visibility.NONE, setterVisibility = Visibility.NONE)
+public class CubeSegment implements Comparable<CubeSegment> {
+
+ @JsonBackReference
+ private CubeInstance cubeInstance;
+ @JsonProperty("uuid")
+ private String uuid;
+ @JsonProperty("name")
+ private String name;
+ @JsonProperty("storage_location_identifier")
+ private String storageLocationIdentifier; // HTable name
+ @JsonProperty("date_range_start")
+ private long dateRangeStart;
+ @JsonProperty("date_range_end")
+ private long dateRangeEnd;
+ @JsonProperty("status")
+ private CubeSegmentStatusEnum status;
+ @JsonProperty("size_kb")
+ private long sizeKB;
+ @JsonProperty("source_records")
+ private long sourceRecords;
+ @JsonProperty("source_records_size")
+ private long sourceRecordsSize;
+ @JsonProperty("last_build_time")
+ private long lastBuildTime;
+ @JsonProperty("last_build_job_id")
+ private String lastBuildJobID;
+ @JsonProperty("create_time")
+ private String createTime;
+
+ @JsonProperty("binary_signature")
+ private String binarySignature; // a hash of cube schema and dictionary ID,
+ // used for sanity check
+
+ @JsonProperty("dictionaries")
+ private ConcurrentHashMap<String, String> dictionaries; // table/column ==> dictionary resource path
+ @JsonProperty("snapshots")
+ private ConcurrentHashMap<String, String> snapshots; // table name ==> snapshot resource path
+
+ public CubeDesc getCubeDesc() {
+ return getCubeInstance().getDescriptor();
+ }
+
+ /**
+ * @param startDate
+ * @param endDate
+ * @return if(startDate == 0 && endDate == 0), returns "FULL_BUILD", else
+ * returns "yyyyMMddHHmmss_yyyyMMddHHmmss"
+ */
+ public static String getSegmentName(long startDate, long endDate) {
+ if (startDate == 0 && endDate == 0) {
+ return "FULL_BUILD";
+ }
+
+ SimpleDateFormat dateFormat = new SimpleDateFormat("yyyyMMddHHmmss");
+ dateFormat.setTimeZone(TimeZone.getTimeZone("GMT"));
+
+ return dateFormat.format(startDate) + "_" + dateFormat.format(endDate);
+ }
+
+ // ============================================================================
+
+
+ public String getUuid() {
+ return uuid;
+ }
+
+ public void setUuid(String id) {
+ this.uuid = id;
+ }
+
+ public String getName() {
+ return name;
+ }
+
+ public void setName(String name) {
+ this.name = name;
+ }
+
+ public long getDateRangeStart() {
+ return dateRangeStart;
+ }
+
+ public void setDateRangeStart(long dateRangeStart) {
+ this.dateRangeStart = dateRangeStart;
+ }
+
+ public long getDateRangeEnd() {
+ return dateRangeEnd;
+ }
+
+ public void setDateRangeEnd(long dateRangeEnd) {
+ this.dateRangeEnd = dateRangeEnd;
+ }
+
+ public CubeSegmentStatusEnum getStatus() {
+ return status;
+ }
+
+ public void setStatus(CubeSegmentStatusEnum status) {
+ this.status = status;
+ }
+
+ public long getSizeKB() {
+ return sizeKB;
+ }
+
+ public void setSizeKB(long sizeKB) {
+ this.sizeKB = sizeKB;
+ }
+
+ public long getSourceRecords() {
+ return sourceRecords;
+ }
+
+ public void setSourceRecords(long sourceRecords) {
+ this.sourceRecords = sourceRecords;
+ }
+
+ public long getSourceRecordsSize() {
+ return sourceRecordsSize;
+ }
+
+ public void setSourceRecordsSize(long sourceRecordsSize) {
+ this.sourceRecordsSize = sourceRecordsSize;
+ }
+
+ public long getLastBuildTime() {
+ return lastBuildTime;
+ }
+
+ public void setLastBuildTime(long lastBuildTime) {
+ this.lastBuildTime = lastBuildTime;
+ }
+
+ public String getLastBuildJobID() {
+ return lastBuildJobID;
+ }
+
+ public void setLastBuildJobID(String lastBuildJobID) {
+ this.lastBuildJobID = lastBuildJobID;
+ }
+
+ public String getCreateTime() {
+ return createTime;
+ }
+
+ public void setCreateTime(String createTime) {
+ this.createTime = createTime;
+ }
+
+ public String getBinarySignature() {
+ return binarySignature;
+ }
+
+ public void setBinarySignature(String binarySignature) {
+ this.binarySignature = binarySignature;
+ }
+
+ public CubeInstance getCubeInstance() {
+ return cubeInstance;
+ }
+
+ public void setCubeInstance(CubeInstance cubeInstance) {
+ this.cubeInstance = cubeInstance;
+ }
+
+ public String getStorageLocationIdentifier() {
+
+ return storageLocationIdentifier;
+ }
+
+ public Map<String, String> getDictionaries() {
+ if (dictionaries == null)
+ dictionaries = new ConcurrentHashMap<String, String>();
+ return dictionaries;
+ }
+
+ public Map<String, String> getSnapshots() {
+ if (snapshots == null)
+ snapshots = new ConcurrentHashMap<String, String>();
+ return snapshots;
+ }
+
+ public String getSnapshotResPath(String table) {
+ return getSnapshots().get(table);
+ }
+
+ public void putSnapshotResPath(String table, String snapshotResPath) {
+ getSnapshots().put(table, snapshotResPath);
+ }
+
+ public Collection<String> getDictionaryPaths() {
+ return getDictionaries().values();
+ }
+
+ public Collection<String> getSnapshotPaths() {
+ return getSnapshots().values();
+ }
+
+ public String getDictResPath(TblColRef col) {
+ return getDictionaries().get(dictKey(col));
+ }
+
+ public void putDictResPath(TblColRef col, String dictResPath) {
+ getDictionaries().put(dictKey(col), dictResPath);
+ }
+
+ private String dictKey(TblColRef col) {
+ return col.getTable() + "/" + col.getName();
+ }
+
+ /**
+ * @param storageLocationIdentifier
+ * the storageLocationIdentifier to set
+ */
+ public void setStorageLocationIdentifier(String storageLocationIdentifier) {
+ this.storageLocationIdentifier = storageLocationIdentifier;
+ }
+
+ @Override
+ public int compareTo(CubeSegment other) {
+ if (this.dateRangeEnd < other.dateRangeEnd) {
+ return -1;
+ } else if (this.dateRangeEnd > other.dateRangeEnd) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + ((cubeInstance == null) ? 0 : cubeInstance.hashCode());
+ result = prime * result + ((name == null) ? 0 : name.hashCode());
+ result = prime * result + ((status == null) ? 0 : status.hashCode());
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ CubeSegment other = (CubeSegment) obj;
+ if (cubeInstance == null) {
+ if (other.cubeInstance != null)
+ return false;
+ } else if (!cubeInstance.equals(other.cubeInstance))
+ return false;
+ if (name == null) {
+ if (other.name != null)
+ return false;
+ } else if (!name.equals(other.name))
+ return false;
+ if (status != other.status)
+ return false;
+ return true;
+ }
+
+ @Override
+ public String toString() {
+ return Objects.toStringHelper(this)
+ .add("uuid", uuid)
+ .add("create_time:", createTime)
+ .add("name", name)
+ .add("last_build_job_id", lastBuildJobID)
+ .add("status", status)
+ .toString();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/CubeSegmentStatusEnum.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/CubeSegmentStatusEnum.java b/cube/src/main/java/com/kylinolap/cube/CubeSegmentStatusEnum.java
new file mode 100644
index 0000000..88fb852
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/CubeSegmentStatusEnum.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2013-2014 eBay Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.kylinolap.cube;
+
+/**
+ * @author xduo
+ *
+ */
+public enum CubeSegmentStatusEnum {
+ NEW, READY, READY_PENDING
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/CubeSegmentTypeEnum.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/CubeSegmentTypeEnum.java b/cube/src/main/java/com/kylinolap/cube/CubeSegmentTypeEnum.java
new file mode 100644
index 0000000..e1f6928
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/CubeSegmentTypeEnum.java
@@ -0,0 +1,25 @@
+/*
+ * Copyright 2013-2014 eBay Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.kylinolap.cube;
+
+/**
+ * @author ysong1
+ *
+ */
+public enum CubeSegmentTypeEnum {
+ TRANSIENT, PERMANENT
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/CubeSegmentValidator.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/CubeSegmentValidator.java b/cube/src/main/java/com/kylinolap/cube/CubeSegmentValidator.java
new file mode 100644
index 0000000..c2ceb25
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/CubeSegmentValidator.java
@@ -0,0 +1,240 @@
+/*
+ * Copyright 2013-2014 eBay Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.kylinolap.cube;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.List;
+
+import com.kylinolap.cube.exception.CubeIntegrityException;
+import com.kylinolap.dict.DictionaryManager;
+import com.kylinolap.metadata.model.cube.CubePartitionDesc.CubePartitionType;
+import com.kylinolap.metadata.model.cube.DimensionDesc;
+import com.kylinolap.metadata.model.cube.TblColRef;
+
+/**
+ * @author xduo
+ */
+public class CubeSegmentValidator {
+
+ private CubeSegmentValidator() {
+ }
+
+ public static CubeSegmentValidator getCubeSegmentValidator(CubeBuildTypeEnum buildType, CubePartitionType partitionType) {
+ switch (buildType) {
+ case MERGE:
+ return new MergeOperationValidator();
+ case BUILD:
+ switch (partitionType) {
+ case APPEND:
+ return new IncrementalBuildOperationValidator();
+ case UPDATE_INSERT:
+ return new UpdateBuildOperationValidator();
+ }
+ default:
+ return new CubeSegmentValidator();
+ }
+ }
+
+ void validate(CubeInstance cubeInstance, List<CubeSegment> newSegments) throws CubeIntegrityException {
+ }
+
+ public static class MergeOperationValidator extends CubeSegmentValidator {
+ private void checkContingency(CubeInstance cubeInstance, List<CubeSegment> newSegments) throws CubeIntegrityException {
+ if (cubeInstance.getSegments().size() < 2) {
+ throw new CubeIntegrityException("No segments to merge.");
+ }
+ if (newSegments.size() != 1) {
+ throw new CubeIntegrityException("Invalid date range.");
+ }
+
+ CubeSegment newSegment = newSegments.get(0);
+ CubeSegment startSeg = null;
+ CubeSegment endSeg = null;
+ for (CubeSegment segment : cubeInstance.getSegments()) {
+ if (segment.getDateRangeStart() == newSegment.getDateRangeStart()) {
+ startSeg = segment;
+ }
+ if (segment.getDateRangeEnd() == newSegment.getDateRangeEnd()) {
+ endSeg = segment;
+ }
+ }
+
+ if (null == startSeg || null == endSeg || startSeg.getDateRangeStart() >= endSeg.getDateRangeStart()) {
+ throw new CubeIntegrityException("Invalid date range.");
+ }
+ }
+
+ private void checkLoopTableConsistency(CubeInstance cube, List<CubeSegment> newSegments) throws CubeIntegrityException {
+
+ CubeSegment cubeSeg = newSegments.get(0);
+ DictionaryManager dictMgr = DictionaryManager.getInstance(cube.getConfig());
+ List<CubeSegment> segmentList = cube.getMergingSegments(cubeSeg);
+
+ HashSet<TblColRef> cols = new HashSet<TblColRef>();
+ for (DimensionDesc dim : cube.getDescriptor().getDimensions()) {
+ for (TblColRef col : dim.getColumnRefs()) {
+ // include those dictionaries that do not need mergning
+ try {
+ if (cubeSeg.getCubeDesc().getRowkey().isUseDictionary(col) && !cube.getDescriptor().getFactTable().equalsIgnoreCase((String) dictMgr.decideSourceData(cube.getDescriptor(), col, null)[0])) {
+ cols.add(col);
+ }
+ } catch (IOException e) {
+ throw new CubeIntegrityException("checkLoopTableConsistency not passed when allocating a new segment.");
+ }
+ }
+ }
+
+ // check if all dictionaries on lookup table columns are identical
+ for (TblColRef col : cols) {
+ String dictOfFirstSegment = null;
+ for (CubeSegment segment : segmentList) {
+ String temp = segment.getDictResPath(col);
+ if (temp == null) {
+ throw new CubeIntegrityException("Dictionary is null on column: " + col + " Segment: " + segment);
+ }
+
+ if (dictOfFirstSegment == null) {
+ dictOfFirstSegment = temp;
+ } else {
+ if (!dictOfFirstSegment.equalsIgnoreCase(temp)) {
+ throw new CubeIntegrityException("Segments with different dictionaries(on lookup table) cannot be merged");
+ }
+ }
+ }
+ }
+
+ // check if all segments' snapshot are identical
+ CubeSegment firstSegment = null;
+ for (CubeSegment segment : segmentList) {
+ if (firstSegment == null) {
+ firstSegment = segment;
+ } else {
+ Collection<String> a = firstSegment.getSnapshots().values();
+ Collection<String> b = segment.getSnapshots().values();
+ if (!((a.size() == b.size()) && a.containsAll(b)))
+ throw new CubeIntegrityException("Segments with different snapshots cannot be merged");
+ }
+ }
+
+ }
+
+ @Override
+ public void validate(CubeInstance cubeInstance, List<CubeSegment> newSegments) throws CubeIntegrityException {
+ this.checkContingency(cubeInstance, newSegments);
+ this.checkLoopTableConsistency(cubeInstance, newSegments);
+ }
+ }
+
+ public static class IncrementalBuildOperationValidator extends CubeSegmentValidator {
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * com.kylinolap.cube.CubeSegmentValidator#validate(com.kylinolap.cube
+ * .CubeInstance, java.util.List)
+ */
+ @Override
+ void validate(CubeInstance cubeInstance, List<CubeSegment> newSegments) throws CubeIntegrityException {
+ if (newSegments.size() != 1) {
+ throw new CubeIntegrityException("Invalid date range.");
+ }
+ CubeSegment newSegment = newSegments.get(0);
+ if (cubeInstance.needMergeImmediatelyAfterBuild(newSegment)) {
+
+ } else {
+ // check if user will rebuild one specified segment
+ boolean hasMatchSegment = false;
+ for (CubeSegment segment : cubeInstance.getSegments()) {
+ if (segment.getDateRangeStart() == newSegment.getDateRangeStart()) {
+ if (segment.getDateRangeEnd() == newSegment.getDateRangeEnd()) {
+ hasMatchSegment = true;
+ } else {
+ throw new CubeIntegrityException("Invalid date range.");
+ }
+ }
+ }
+
+ if (!hasMatchSegment) {
+ if (cubeInstance.getSegments().size() == 0) {
+ if (cubeInstance.getDescriptor().getCubePartitionDesc().getPartitionDateStart() != newSegment.getDateRangeStart()) {
+ throw new CubeIntegrityException("Invalid start date.");
+ }
+ } else {
+ CubeSegment lastSegment = cubeInstance.getSegments().get(cubeInstance.getSegments().size() - 1);
+ if (newSegment.getDateRangeStart() != lastSegment.getDateRangeEnd()) {
+ throw new CubeIntegrityException("Invalid start date.");
+ }
+ }
+ }
+ }
+ }
+
+ }
+
+ public static class UpdateBuildOperationValidator extends CubeSegmentValidator {
+
+ /*
+ * (non-Javadoc)
+ *
+ * @see
+ * com.kylinolap.cube.CubeSegmentValidator#validate(com.kylinolap.cube
+ * .CubeInstance, java.util.List)
+ */
+ @Override
+ void validate(CubeInstance cubeInstance, List<CubeSegment> newSegments) throws CubeIntegrityException {
+ if (newSegments.size() != 1 && newSegments.size() != 2) {
+ throw new CubeIntegrityException("Invalid new segment count, got " + newSegments.size());
+ }
+
+ CubeSegment previousSeg = null;
+ for (CubeSegment newSegment : newSegments) {
+ if (null == previousSeg) {
+ previousSeg = newSegment;
+ } else {
+ if (previousSeg.getDateRangeEnd() != newSegment.getDateRangeStart()) {
+ throw new CubeIntegrityException("Invalid date range.");
+ }
+ }
+ }
+
+ if (cubeInstance.getSegments().size() == 0) {
+ if (cubeInstance.getDescriptor().getCubePartitionDesc().getPartitionDateStart() != newSegments.get(0).getDateRangeStart()) {
+ throw new CubeIntegrityException("Invalid start date.");
+ }
+ } else {
+ CubeSegment startSegment = newSegments.get(0);
+ CubeSegment matchSeg = null;
+ for (CubeSegment segment : cubeInstance.getSegments()) {
+ if (segment.getDateRangeStart() == startSegment.getDateRangeStart()) {
+ matchSeg = segment;
+ }
+ }
+
+ if (newSegments.size() == 2 && null == matchSeg) {
+ throw new CubeIntegrityException("Invalid date range.");
+ }
+
+ if (newSegments.size() == 2 && newSegments.get(newSegments.size() - 1).getDateRangeEnd() < matchSeg.getDateRangeEnd()) {
+ throw new CubeIntegrityException("Invalid date range.");
+ }
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/CubeStatusEnum.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/CubeStatusEnum.java b/cube/src/main/java/com/kylinolap/cube/CubeStatusEnum.java
new file mode 100644
index 0000000..16376ae
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/CubeStatusEnum.java
@@ -0,0 +1,22 @@
+/*
+ * Copyright 2013-2014 eBay Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kylinolap.cube;
+
+public enum CubeStatusEnum {
+
+ DISABLED, BUILDING, READY, DESCBROKEN
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/cli/DictionaryGeneratorCLI.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/cli/DictionaryGeneratorCLI.java b/cube/src/main/java/com/kylinolap/cube/cli/DictionaryGeneratorCLI.java
new file mode 100644
index 0000000..c3dbe1a
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/cli/DictionaryGeneratorCLI.java
@@ -0,0 +1,66 @@
+/*
+ * Copyright 2013-2014 eBay Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kylinolap.cube.cli;
+
+import java.io.IOException;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import com.kylinolap.common.KylinConfig;
+import com.kylinolap.cube.CubeInstance;
+import com.kylinolap.cube.CubeManager;
+import com.kylinolap.cube.CubeSegment;
+import com.kylinolap.cube.CubeSegmentStatusEnum;
+import com.kylinolap.metadata.model.cube.DimensionDesc;
+import com.kylinolap.metadata.model.cube.TblColRef;
+
+public class DictionaryGeneratorCLI {
+
+ private static final Logger logger = LoggerFactory.getLogger(DictionaryGeneratorCLI.class);
+
+ public static void processSegment(KylinConfig config, String cubeName, String segmentName, String factColumnsPath) throws IOException {
+ CubeInstance cube = CubeManager.getInstance(config).getCube(cubeName);
+ CubeSegment segment = cube.getSegment(segmentName, CubeSegmentStatusEnum.NEW);
+
+ processSegment(config, segment, factColumnsPath);
+ }
+
+ private static void processSegment(KylinConfig config, CubeSegment cubeSeg, String factColumnsPath) throws IOException {
+ CubeManager cubeMgr = CubeManager.getInstance(config);
+
+ for (DimensionDesc dim : cubeSeg.getCubeDesc().getDimensions()) {
+ // dictionary
+ for (TblColRef col : dim.getColumnRefs()) {
+ if (cubeSeg.getCubeDesc().getRowkey().isUseDictionary(col)) {
+ logger.info("Building dictionary for " + col);
+ cubeMgr.buildDictionary(cubeSeg, col, factColumnsPath);
+ }
+ }
+
+ // build snapshot
+ if (dim.getTable() != null && !dim.getTable().equalsIgnoreCase(cubeSeg.getCubeDesc().getFactTable())) {
+ // CubeSegment seg = cube.getTheOnlySegment();
+ logger.info("Building snapshot of " + dim.getTable());
+ cubeMgr.buildSnapshotTable(cubeSeg, dim.getTable());
+ logger.info("Checking snapshot of " + dim.getTable());
+ cubeMgr.getLookupTable(cubeSeg, dim); // load the table for
+ // sanity check
+ }
+ }
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/cli/DumpDictionaryCLI.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/cli/DumpDictionaryCLI.java b/cube/src/main/java/com/kylinolap/cube/cli/DumpDictionaryCLI.java
new file mode 100644
index 0000000..cde8c97
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/cli/DumpDictionaryCLI.java
@@ -0,0 +1,55 @@
+/*
+ * Copyright 2013-2014 eBay Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kylinolap.cube.cli;
+
+import java.io.DataInputStream;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.util.Date;
+
+import com.kylinolap.common.util.JsonUtil;
+import com.kylinolap.dict.DictionaryInfo;
+import com.kylinolap.dict.DictionaryInfoSerializer;
+
+public class DumpDictionaryCLI {
+
+ public static void main(String[] args) throws IOException {
+ for (String path : args) {
+ dump(new File(path));
+ }
+ }
+
+ public static void dump(File f) throws IOException {
+ if (f.isDirectory()) {
+ for (File c : f.listFiles())
+ dump(c);
+ return;
+ }
+
+ if (f.getName().endsWith(".dict")) {
+ DictionaryInfoSerializer ser = new DictionaryInfoSerializer();
+ DictionaryInfo dictInfo = ser.deserialize(new DataInputStream(new FileInputStream(f)));
+
+ System.out.println("============================================================================");
+ System.out.println("File: " + f.getAbsolutePath());
+ System.out.println(new Date(dictInfo.getLastModified()));
+ System.out.println(JsonUtil.writeValueAsIndentString(dictInfo));
+ dictInfo.getDictionaryObject().dump(System.out);
+ System.out.println();
+ }
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/common/BytesSplitter.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/common/BytesSplitter.java b/cube/src/main/java/com/kylinolap/cube/common/BytesSplitter.java
new file mode 100644
index 0000000..fec0db1
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/common/BytesSplitter.java
@@ -0,0 +1,141 @@
+/*
+ * Copyright 2013-2014 eBay Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kylinolap.cube.common;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.hadoop.hbase.util.Bytes;
+import org.apache.hadoop.io.Text;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * @author xjiang
+ */
+public class BytesSplitter {
+ private static final Logger logger = LoggerFactory.getLogger(BytesSplitter.class);
+
+ private static final int[] COMMON_DELIMS = new int[] { "\177".codePointAt(0), "|".codePointAt(0), "\t".codePointAt(0), ",".codePointAt(0) };
+
+ private SplittedBytes[] splitBuffers;
+ private int bufferSize;
+
+ public SplittedBytes[] getSplitBuffers() {
+ return splitBuffers;
+ }
+
+ public SplittedBytes getSplitBuffer(int index) {
+ return splitBuffers[index];
+ }
+
+ public int getBufferSize() {
+ return bufferSize;
+ }
+
+ public BytesSplitter(int splitLen, int bytesLen) {
+ this.splitBuffers = new SplittedBytes[splitLen];
+ for (int i = 0; i < splitLen; i++) {
+ this.splitBuffers[i] = new SplittedBytes(bytesLen);
+ }
+ this.bufferSize = 0;
+ }
+
+ public int split(byte[] bytes, int byteLen, byte delimiter) {
+ this.bufferSize = 0;
+ int offset = 0;
+ int length = 0;
+ for (int i = 0; i < byteLen; i++) {
+ if (bytes[i] == delimiter) {
+ SplittedBytes split = this.splitBuffers[this.bufferSize++];
+ split.length = length;
+ System.arraycopy(bytes, offset, split.value, 0, length);
+ offset = i + 1;
+ length = 0;
+ } else {
+ length++;
+ }
+ }
+ SplittedBytes split = this.splitBuffers[this.bufferSize++];
+ System.arraycopy(bytes, offset, split.value, 0, length);
+ split.length = length;
+
+ return bufferSize;
+ }
+
+ public static List<String> splitToString(byte[] bytes, int offset, byte delimiter) {
+ List<String> splitStrings = new ArrayList<String>();
+ int splitOffset = 0;
+ int splitLength = 0;
+ for (int i = offset; i < bytes.length; i++) {
+ if (bytes[i] == delimiter) {
+ String str = Bytes.toString(bytes, splitOffset, splitLength);
+ splitStrings.add(str);
+ splitOffset = i + 1;
+ splitLength = 0;
+ } else {
+ splitLength++;
+ }
+ }
+ String str = Bytes.toString(bytes, splitOffset, splitLength);
+ splitStrings.add(str);
+ return splitStrings;
+ }
+
+ public byte inferByteRowDelimiter(byte[] bytes, int byteLen, int expectedSplits) throws IOException {
+
+ if (expectedSplits > this.splitBuffers.length)
+ throw new IOException("expectSplits can not be greater than split buffer size");
+
+ boolean delimiterFound = false;
+ byte foundDelimiter = 0;
+ for (int i = 0; i < bytes.length; ++i) {
+ byte c = bytes[i];
+ if (!Character.isLetterOrDigit((char) c)) {
+ try {
+ int actualSplits = this.split(bytes, byteLen, c);
+ if (actualSplits == expectedSplits) {
+ if (!delimiterFound) {
+ logger.info("Delimiter found, value is : " + c);
+ delimiterFound = true;
+ foundDelimiter = c;
+ } else if (c != foundDelimiter) {
+ throw new IOException("Duplicate delimiter found, found delimiter is : " + foundDelimiter + " new delimiter is " + c);
+ }
+ }
+ } catch (Exception e) {
+ logger.info("Unqualified delimiter pruned, value is " + c);
+ }
+ }
+ }
+
+ if (delimiterFound)
+ return foundDelimiter;
+ else
+ throw new IOException("No delimiter found");
+ }
+
+ public int detectDelim(Text value, int expectedParts) {
+ for (int i = 0; i < COMMON_DELIMS.length; i++) {
+ int nParts = split(value.getBytes(), value.getLength(), (byte) COMMON_DELIMS[i]);
+ if (nParts == expectedParts)
+ return COMMON_DELIMS[i];
+ }
+ throw new RuntimeException("Cannot detect delimeter from first line -- " + value.toString() + " -- expect " + expectedParts + " columns");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/common/RowKeySplitter.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/common/RowKeySplitter.java b/cube/src/main/java/com/kylinolap/cube/common/RowKeySplitter.java
new file mode 100644
index 0000000..c049b3a
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/common/RowKeySplitter.java
@@ -0,0 +1,88 @@
+/*
+ * Copyright 2013-2014 eBay Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kylinolap.cube.common;
+
+import org.apache.hadoop.hbase.util.Bytes;
+
+import com.kylinolap.cube.CubeSegment;
+import com.kylinolap.cube.cuboid.Cuboid;
+import com.kylinolap.cube.kv.RowConstants;
+import com.kylinolap.cube.kv.RowKeyColumnIO;
+import com.kylinolap.metadata.model.cube.CubeDesc;
+import com.kylinolap.metadata.model.cube.TblColRef;
+
+/**
+ * @author George Song (ysong1)
+ *
+ */
+public class RowKeySplitter {
+
+ private CubeDesc cubeDesc;
+ private RowKeyColumnIO colIO;
+
+ private SplittedBytes[] splitBuffers;
+ private int bufferSize;
+
+ public SplittedBytes[] getSplitBuffers() {
+ return splitBuffers;
+ }
+
+ public int getBufferSize() {
+ return bufferSize;
+ }
+
+ public RowKeySplitter(CubeSegment cubeSeg, int splitLen, int bytesLen) {
+ this.cubeDesc = cubeSeg.getCubeDesc();
+ this.colIO = new RowKeyColumnIO(cubeSeg);
+
+ this.splitBuffers = new SplittedBytes[splitLen];
+ for (int i = 0; i < splitLen; i++) {
+ this.splitBuffers[i] = new SplittedBytes(bytesLen);
+ }
+ this.bufferSize = 0;
+ }
+
+ /**
+ * @param bytes
+ * @param byteLen
+ * @return cuboid ID
+ */
+ public long split(byte[] bytes, int byteLen) {
+ this.bufferSize = 0;
+ int offset = 0;
+
+ // extract cuboid id
+ SplittedBytes cuboidIdSplit = this.splitBuffers[this.bufferSize++];
+ cuboidIdSplit.length = RowConstants.ROWKEY_CUBOIDID_LEN;
+ System.arraycopy(bytes, offset, cuboidIdSplit.value, 0, RowConstants.ROWKEY_CUBOIDID_LEN);
+ offset += RowConstants.ROWKEY_CUBOIDID_LEN;
+
+ long cuboidId = Bytes.toLong(cuboidIdSplit.value, 0, cuboidIdSplit.length);
+ Cuboid cuboid = Cuboid.findById(cubeDesc, cuboidId);
+
+ // rowkey columns
+ for (int i = 0; i < cuboid.getColumns().size(); i++) {
+ TblColRef col = cuboid.getColumns().get(i);
+ int colLength = colIO.getColumnLength(col);
+ SplittedBytes split = this.splitBuffers[this.bufferSize++];
+ split.length = colLength;
+ System.arraycopy(bytes, offset, split.value, 0, colLength);
+ offset += colLength;
+ }
+
+ return cuboidId;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/common/SplittedBytes.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/common/SplittedBytes.java b/cube/src/main/java/com/kylinolap/cube/common/SplittedBytes.java
new file mode 100644
index 0000000..00a0206
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/common/SplittedBytes.java
@@ -0,0 +1,30 @@
+/*
+ * Copyright 2013-2014 eBay Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kylinolap.cube.common;
+
+/**
+ * @author George Song (ysong1)
+ *
+ */
+public class SplittedBytes {
+ public SplittedBytes(int length) {
+ value = new byte[length];
+ length = 0;
+ }
+
+ public byte[] value;
+ public int length;
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/cuboid/Cuboid.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/cuboid/Cuboid.java b/cube/src/main/java/com/kylinolap/cube/cuboid/Cuboid.java
new file mode 100644
index 0000000..ea673a5
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/cuboid/Cuboid.java
@@ -0,0 +1,323 @@
+/*
+ * Copyright 2013-2014 eBay Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kylinolap.cube.cuboid;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.LinkedList;
+import java.util.List;
+import java.util.Map;
+import java.util.Queue;
+import java.util.concurrent.ConcurrentHashMap;
+
+import org.apache.hadoop.hbase.util.Bytes;
+
+import com.kylinolap.metadata.model.cube.CubeDesc;
+import com.kylinolap.metadata.model.cube.RowKeyColDesc;
+import com.kylinolap.metadata.model.cube.RowKeyDesc;
+import com.kylinolap.metadata.model.cube.RowKeyDesc.AggrGroupMask;
+import com.kylinolap.metadata.model.cube.RowKeyDesc.HierarchyMask;
+import com.kylinolap.metadata.model.cube.TblColRef;
+
+/**
+ * @author George Song (ysong1)
+ */
+public class Cuboid implements Comparable<Cuboid> {
+
+ private final static Map<String, Map<Long, Cuboid>> CUBOID_CACHE = new ConcurrentHashMap<String, Map<Long, Cuboid>>();
+
+ public static Cuboid findById(CubeDesc cube, byte[] cuboidID) {
+ return findById(cube, Bytes.toLong(cuboidID));
+ }
+
+ public static Cuboid findById(CubeDesc cube, long cuboidID) {
+ Map<Long, Cuboid> cubeCache = CUBOID_CACHE.get(cube.getName());
+ if (cubeCache == null) {
+ cubeCache = new ConcurrentHashMap<Long, Cuboid>();
+ CUBOID_CACHE.put(cube.getName(), cubeCache);
+ }
+ Cuboid cuboid = cubeCache.get(cuboidID);
+ if (cuboid == null) {
+ long validCuboidID = translateToValidCuboid(cube, cuboidID);
+ cuboid = new Cuboid(cube, cuboidID, validCuboidID);
+ cubeCache.put(cuboidID, cuboid);
+ }
+ return cuboid;
+ }
+
+ public static boolean isValid(CubeDesc cube, long cuboidID) {
+ RowKeyDesc rowkey = cube.getRowkey();
+
+ if (cuboidID < 0) {
+ throw new IllegalArgumentException("Cuboid " + cuboidID + " should be greater than 0");
+ }
+
+ if (checkBaseCuboid(rowkey, cuboidID)) {
+ return true;
+ }
+
+ if (checkMandatoryColumns(rowkey, cuboidID) == false) {
+ return false;
+ }
+
+ if (checkAggregationGroup(rowkey, cuboidID) == false) {
+ return false;
+ }
+
+ if (checkHierarchy(rowkey, cuboidID) == false) {
+ return false;
+ }
+
+ return true;
+ }
+
+ public static long getBaseCuboidId(CubeDesc cube) {
+ return cube.getRowkey().getFullMask();
+ }
+
+ // Breadth-First-Search
+ private static long translateToValidCuboid(CubeDesc cube, long cuboidID) {
+ if (Cuboid.isValid(cube, cuboidID)) {
+ return cuboidID;
+ }
+
+ HashSet<Long> dedupped = new HashSet<Long>();
+ Queue<Long> queue = new LinkedList<Long>();
+ List<Long> parents = Cuboid.getAllPossibleParents(cube, cuboidID);
+
+ // check each parent
+ addToQueue(queue, parents, dedupped);
+ while (queue.size() > 0) {
+ long parent = pollFromQueue(queue, dedupped);
+ if (Cuboid.isValid(cube, parent)) {
+ return parent;
+ } else {
+ addToQueue(queue, Cuboid.getAllPossibleParents(cube, parent), dedupped);
+ }
+ }
+ return -1;
+ }
+
+ private static List<Long> getAllPossibleParents(CubeDesc cube, long cuboidID) {
+ List<Long> allPossibleParents = new ArrayList<Long>();
+
+ for (int i = 0; i < cube.getRowkey().getRowKeyColumns().length; i++) {
+ long mask = 1L << i;
+ long parentId = cuboidID | mask;
+ if (parentId != cuboidID) {
+ allPossibleParents.add(parentId);
+ }
+ }
+
+ return allPossibleParents;
+ }
+
+ private static void addToQueue(Queue<Long> queue, List<Long> parents, HashSet<Long> dedupped) {
+ Collections.sort(parents);
+ for (Long p : parents) {
+ if (!dedupped.contains(p)) {
+ dedupped.add(p);
+ queue.offer(p);
+ }
+ }
+ }
+
+ private static long pollFromQueue(Queue<Long> queue, HashSet<Long> dedupped) {
+ long element = queue.poll();
+ dedupped.remove(element);
+ return element;
+ }
+
+ private static boolean checkBaseCuboid(RowKeyDesc rowkey, long cuboidID) {
+ long baseCuboidId = rowkey.getFullMask();
+ if (cuboidID > baseCuboidId) {
+ throw new IllegalArgumentException("Cubiod " + cuboidID + " is out of scope 0-" + baseCuboidId);
+ }
+ return baseCuboidId == cuboidID;
+ }
+
+ private static boolean checkMandatoryColumns(RowKeyDesc rowkey, long cuboidID) {
+ long mandatoryColumnMask = rowkey.getMandatoryColumnMask();
+
+ // note the all-zero cuboid (except for mandatory) is not valid
+ if (cuboidID <= mandatoryColumnMask)
+ return false;
+
+ return (cuboidID & mandatoryColumnMask) == mandatoryColumnMask;
+ }
+
+ private static boolean checkHierarchy(RowKeyDesc rowkey, long cuboidID) {
+ List<HierarchyMask> hierarchyMaskList = rowkey.getHierarchyMasks();
+ // if no hierarchy defined in metadata
+ if (hierarchyMaskList == null || hierarchyMaskList.size() == 0) {
+ return true;
+ }
+
+ hier: for (HierarchyMask hierarchyMasks : hierarchyMaskList) {
+ long result = cuboidID & hierarchyMasks.fullMask;
+ if (result > 0) {
+ // if match one of the hierarchy constrains, return true;
+ for (long mask : hierarchyMasks.allMasks) {
+ if (result == mask) {
+ continue hier;
+ }
+ }
+ return false;
+ }
+ }
+ return true;
+ }
+
+ private static boolean checkAggregationGroup(RowKeyDesc rowkey, long cuboidID) {
+ long cuboidWithoutMandatory = cuboidID & ~rowkey.getMandatoryColumnMask();
+ long leftover;
+ for (AggrGroupMask mask : rowkey.getAggrGroupMasks()) {
+ if ((cuboidWithoutMandatory & mask.uniqueMask) != 0) {
+ leftover = cuboidWithoutMandatory & ~mask.groupMask;
+ return leftover == 0 || leftover == mask.leftoverMask;
+ }
+ }
+
+ leftover = cuboidWithoutMandatory & rowkey.getTailMask();
+ return leftover == 0 || leftover == rowkey.getTailMask();
+ }
+
+ private CubeDesc cube;
+ private final long inputID;
+ private final long id;
+ private final byte[] idBytes;
+ private final boolean requirePostAggregation;
+ private List<TblColRef> dimensionColumns;
+
+ // will translate the cuboidID if it is not valid
+ private Cuboid(CubeDesc cube, long originalID, long validID) {
+ this.cube = cube;
+ this.inputID = originalID;
+ this.id = validID;
+ this.idBytes = Bytes.toBytes(id);
+ this.dimensionColumns = translateIdToColumns(this.id);
+ this.requirePostAggregation = calcExtraAggregation(this.inputID, this.id) != 0;
+ }
+
+ private List<TblColRef> translateIdToColumns(long cuboidID) {
+ List<TblColRef> dimesnions = new ArrayList<TblColRef>();
+ RowKeyColDesc[] allColumns = cube.getRowkey().getRowKeyColumns();
+ for (int i = 0; i < allColumns.length; i++) {
+ // NOTE: the order of column in list!!!
+ long bitmask = 1L << allColumns[i].getBitIndex();
+ if ((cuboidID & bitmask) != 0) {
+ TblColRef colRef = allColumns[i].getColRef();
+ dimesnions.add(colRef);
+ }
+ }
+ return dimesnions;
+ }
+
+ private long calcExtraAggregation(long inputID, long id) {
+ long diff = id ^ inputID;
+ return eliminateHierarchyAggregation(diff);
+ }
+
+ // higher level in hierarchy can be ignored when counting aggregation columns
+ private long eliminateHierarchyAggregation(long id) {
+ List<HierarchyMask> hierarchyMaskList = cube.getRowkey().getHierarchyMasks();
+ if (hierarchyMaskList != null && hierarchyMaskList.size() > 0) {
+ for (HierarchyMask hierMask : hierarchyMaskList) {
+ long[] allMasks = hierMask.allMasks;
+ for (int i = allMasks.length - 1; i > 0; i--) {
+ long bit = allMasks[i] ^ allMasks[i - 1];
+ if ((inputID & bit) != 0) {
+ id &= ~allMasks[i - 1];
+ }
+ }
+ }
+ }
+ return id;
+ }
+
+ public CubeDesc getCube() {
+ return cube;
+ }
+
+ public List<TblColRef> getColumns() {
+ return dimensionColumns;
+ }
+
+ public List<TblColRef> getAggregationColumns() {
+ long aggrColsID = eliminateHierarchyAggregation(id);
+ return translateIdToColumns(aggrColsID);
+ }
+
+ public long getId() {
+ return id;
+ }
+
+ public byte[] getBytes() {
+ return idBytes;
+ }
+
+ public long getInputID() {
+ return inputID;
+ }
+
+ public boolean useAncestor() {
+ return inputID != id;
+ }
+
+ public boolean requirePostAggregation() {
+ return requirePostAggregation;
+ }
+
+ @Override
+ public String toString() {
+ return "Cuboid [id=" + id + "]";
+ }
+
+ @Override
+ public int hashCode() {
+ final int prime = 31;
+ int result = 1;
+ result = prime * result + (int) (id ^ (id >>> 32));
+ return result;
+ }
+
+ @Override
+ public boolean equals(Object obj) {
+ if (this == obj)
+ return true;
+ if (obj == null)
+ return false;
+ if (getClass() != obj.getClass())
+ return false;
+ Cuboid other = (Cuboid) obj;
+ if (id != other.id)
+ return false;
+ return true;
+ }
+
+ @Override
+ public int compareTo(Cuboid o) {
+ if (this.id < o.id) {
+ return -1;
+ } else if (this.id > o.id) {
+ return 1;
+ } else {
+ return 0;
+ }
+ }
+
+}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/cuboid/CuboidCLI.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/cuboid/CuboidCLI.java b/cube/src/main/java/com/kylinolap/cube/cuboid/CuboidCLI.java
new file mode 100644
index 0000000..95759c8
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/cuboid/CuboidCLI.java
@@ -0,0 +1,166 @@
+/*
+ * Copyright 2013-2014 eBay Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package com.kylinolap.cube.cuboid;
+
+import java.io.IOException;
+import java.util.Collection;
+import java.util.LinkedList;
+import java.util.TreeSet;
+
+import com.kylinolap.common.KylinConfig;
+import com.kylinolap.metadata.MetadataManager;
+import com.kylinolap.metadata.model.cube.CubeDesc;
+import com.kylinolap.metadata.model.cube.RowKeyDesc;
+import com.kylinolap.metadata.model.cube.RowKeyDesc.AggrGroupMask;
+import com.kylinolap.metadata.model.cube.RowKeyDesc.HierarchyMask;
+
+/**
+ * @author yangli9
+ *
+ */
+public class CuboidCLI {
+
+ public static void main(String[] args) throws IOException {
+ MetadataManager metaMgr = MetadataManager.getInstance(KylinConfig.getInstanceFromEnv());
+
+ if ("test".equals(args[0])) {
+ CubeDesc cubeDesc = metaMgr.getCubeDesc(args[1]);
+ simulateCuboidGeneration(cubeDesc);
+ }
+ }
+
+ public static int simulateCuboidGeneration(CubeDesc cube) {
+ CuboidScheduler scheduler = new CuboidScheduler(cube);
+
+ long baseCuboid = Cuboid.getBaseCuboidId(cube);
+ Collection<Long> cuboidSet = new TreeSet<Long>();
+ cuboidSet.add(baseCuboid);
+ LinkedList<Long> cuboidQueue = new LinkedList<Long>();
+ cuboidQueue.push(baseCuboid);
+ while (!cuboidQueue.isEmpty()) {
+ long cuboid = cuboidQueue.pop();
+ Collection<Long> spnanningCuboids = scheduler.getSpanningCuboid(cuboid);
+ for (Long sc : spnanningCuboids) {
+ boolean notfound = cuboidSet.add(sc);
+ if (!notfound) {
+ throw new IllegalStateException("Find duplicate spanning cuboid " + sc + " from cuboid " + cuboid);
+ }
+ cuboidQueue.push(sc);
+ }
+ }
+
+ TreeSet<Long> enumCuboids = enumCalcCuboidCount(cube);
+ if (enumCuboids.equals(cuboidSet) == false) {
+ throw new IllegalStateException("Expected cuboid set " + enumCuboids + "; but actual cuboid set " + cuboidSet);
+ }
+
+ int mathCount = mathCalcCuboidCount(cube);
+ if (mathCount != enumCuboids.size()) {
+ throw new IllegalStateException("Math cuboid count " + mathCount + ", but actual cuboid count " + enumCuboids.size());
+ }
+
+ return mathCount;
+
+ }
+
+ public static TreeSet<Long> enumCalcCuboidCount(CubeDesc cube) {
+ long baseCuboid = Cuboid.getBaseCuboidId(cube);
+ TreeSet<Long> expectedCuboids = new TreeSet<Long>();
+ for (long cuboid = 0; cuboid <= baseCuboid; cuboid++) {
+ if (Cuboid.isValid(cube, cuboid)) {
+ expectedCuboids.add(cuboid);
+ }
+ }
+ return expectedCuboids;
+ }
+
+ public static int[] calculateAllLevelCount(CubeDesc cube) {
+ int levels = cube.getRowkey().getNCuboidBuildLevels();
+ int[] allLevelCounts = new int[levels + 1];
+
+ CuboidScheduler scheduler = new CuboidScheduler(cube);
+ LinkedList<Long> nextQueue = new LinkedList<Long>();
+ LinkedList<Long> currentQueue = new LinkedList<Long>();
+ long baseCuboid = Cuboid.getBaseCuboidId(cube);
+ currentQueue.push(baseCuboid);
+
+ for (int i = 0; i <= levels; i++) {
+ allLevelCounts[i] = currentQueue.size();
+ while (!currentQueue.isEmpty()) {
+ long cuboid = currentQueue.pop();
+ Collection<Long> spnanningCuboids = scheduler.getSpanningCuboid(cuboid);
+ nextQueue.addAll(spnanningCuboids);
+ }
+ currentQueue = nextQueue;
+ nextQueue = new LinkedList<Long>();
+ }
+
+ return allLevelCounts;
+ }
+
+ public static int mathCalcCuboidCount(CubeDesc cube) {
+ int result = 1; // 1 for base cuboid
+
+ RowKeyDesc rowkey = cube.getRowkey();
+ AggrGroupMask[] aggrGroupMasks = rowkey.getAggrGroupMasks();
+ for (int i = 0; i < aggrGroupMasks.length; i++) {
+ boolean hasTail = i < aggrGroupMasks.length - 1 || rowkey.getTailMask() > 0;
+ result += mathCalcCuboidCount_aggrGroup(rowkey, aggrGroupMasks[i], hasTail);
+ }
+
+ return result;
+ }
+
+ private static int mathCalcCuboidCount_aggrGroup(RowKeyDesc rowkey, AggrGroupMask aggrGroupMask, boolean hasTail) {
+ long groupMask = aggrGroupMask.groupMask;
+ int n = mathCalcCuboidCount_combination(rowkey, groupMask);
+ n -= 2; // exclude group all 1 and all 0
+
+ long nonUniqueMask = groupMask & (~aggrGroupMask.uniqueMask);
+ if (nonUniqueMask > 0) {
+ // exclude duplicates caused by non-unique columns
+ // FIXME this assumes non-unique masks consolidates in ONE following group which maybe not be true
+ n -= mathCalcCuboidCount_combination(rowkey, nonUniqueMask) - 1; // exclude all 0
+ }
+
+ if (hasTail) {
+ n *= 2; // tail being 1 and 0
+ n += 2; // +1 for group all 1 and tail 0; +1 for group all 0 and tail 1
+ }
+
+ return n;
+ }
+
+ private static int mathCalcCuboidCount_combination(RowKeyDesc rowkey, long colMask) {
+ if (colMask == 0) // no column selected
+ return 0;
+
+ int count = 1;
+
+ for (HierarchyMask hierMask : rowkey.getHierarchyMasks()) {
+ long hierBits = colMask & hierMask.fullMask;
+ if (hierBits != 0) {
+ count *= Long.bitCount(hierBits) + 1; // +1 is for all-zero case
+ colMask &= ~hierBits;
+ }
+ }
+
+ count *= Math.pow(2, Long.bitCount(colMask));
+
+ return count;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/cuboid/CuboidScheduler.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/cuboid/CuboidScheduler.java b/cube/src/main/java/com/kylinolap/cube/cuboid/CuboidScheduler.java
new file mode 100644
index 0000000..9f77770
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/cuboid/CuboidScheduler.java
@@ -0,0 +1,190 @@
+/*
+ * Copyright 2013-2014 eBay Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.kylinolap.cube.cuboid;
+
+/**
+ * @author George Song (ysong1)
+ *
+ */
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import com.kylinolap.metadata.model.cube.CubeDesc;
+import com.kylinolap.metadata.model.cube.RowKeyDesc;
+import com.kylinolap.metadata.model.cube.RowKeyDesc.AggrGroupMask;
+
+public class CuboidScheduler {
+
+ private final CubeDesc cubeDef;
+ private final int size;
+ private final long max;
+ private final Map<Long, Collection<Long>> cache;
+
+ public CuboidScheduler(CubeDesc cube) {
+ this.cubeDef = cube;
+ this.size = cube.getRowkey().getRowKeyColumns().length;
+ this.max = (long) Math.pow(2, size) - 1;
+ this.cache = new ConcurrentHashMap<Long, Collection<Long>>();
+ }
+
+ public Collection<Long> getSpanningCuboid(long cuboid) {
+ if (cuboid > max || cuboid < 0) {
+ throw new IllegalArgumentException("Cuboid " + cuboid + " is out of scope 0-" + max);
+ }
+
+ Collection<Long> result = cache.get(cuboid);
+ if (result != null) {
+ return result;
+ }
+
+ // smaller sibling's children
+ Collection<Long> allPrevOffspring = new HashSet<Long>();
+ for (Long sibling : findSmallerSibling(cuboid)) {
+ Collection<Long> prevOffsprings = generateChildren(sibling);
+ allPrevOffspring.addAll(prevOffsprings);
+ }
+
+ // my children is my generation excluding smaller sibling's generation
+ result = new ArrayList<Long>();
+ for (Long offspring : generateChildren(cuboid)) {
+ if (!allPrevOffspring.contains(offspring)) {
+ result.add(offspring);
+ }
+ }
+
+ cache.put(cuboid, result);
+ return result;
+ }
+
+ private Collection<Long> generateChildren(long cuboid) {
+ Collection<Long> result = new HashSet<Long>();
+
+ // generate zero tail cuboid -- the one with all 1 in the first
+ // aggregation group and all 0 for the rest bits
+ generateZeroTailBase(cuboid, result);
+
+ RowKeyDesc rowkey = cubeDef.getRowkey();
+ long cuboidWithoutMandatory = cuboid & ~rowkey.getMandatoryColumnMask();
+ for (AggrGroupMask mask : rowkey.getAggrGroupMasks()) {
+ if (belongTo(cuboidWithoutMandatory, mask) == false)
+ continue;
+
+ long[] groupOneBitMasks = mask.groupOneBitMasks;
+ for (int i = 0; i < groupOneBitMasks.length; i++) {
+ long oneBit = groupOneBitMasks[i];
+ if ((cuboid & oneBit) == 0)
+ continue;
+
+ long child = cuboid ^ oneBit;
+ if (Cuboid.isValid(cubeDef, child)) {
+ result.add(child);
+ }
+ }
+
+ if ((cuboidWithoutMandatory & mask.uniqueMask) > 0)
+ break;
+ }
+
+ return result;
+ }
+
+ private void generateZeroTailBase(long cuboid, Collection<Long> result) {
+ RowKeyDesc rowkey = cubeDef.getRowkey();
+
+ long cuboidWithoutMandatory = cuboid & ~rowkey.getMandatoryColumnMask();
+
+ for (AggrGroupMask mask : rowkey.getAggrGroupMasks()) {
+ if ((cuboidWithoutMandatory & mask.groupMask) == mask.groupMask && (cuboidWithoutMandatory & mask.leftoverMask) == mask.leftoverMask) {
+ long zeroTail = rowkey.getMandatoryColumnMask() | mask.groupMask;
+ if (zeroTail > 0 && zeroTail != cuboid) {
+ result.add(zeroTail);
+ }
+ }
+ if ((cuboidWithoutMandatory & mask.uniqueMask) > 0)
+ break;
+ }
+ }
+
+ public Collection<Long> findSmallerSibling(long cuboid) {
+ if (!Cuboid.isValid(cubeDef, cuboid)) {
+ return Collections.emptyList();
+ }
+
+ RowKeyDesc rowkey = cubeDef.getRowkey();
+
+ // do combination in all related groups
+ long groupAllBitMask = 0;
+ for (AggrGroupMask mask : rowkey.getAggrGroupMasks()) {
+ if ((mask.groupMask & cuboid) > 0) {
+ groupAllBitMask |= mask.groupMask;
+ }
+ }
+
+ long groupBitValue = cuboid & groupAllBitMask;
+ long leftBitValue = cuboid & ~groupAllBitMask;
+ long[] groupOneBits = bits(groupAllBitMask);
+
+ Collection<Long> siblings = new HashSet<Long>();
+ combination(cuboid, siblings, groupOneBits, 0, leftBitValue, Long.bitCount(groupBitValue));
+ return siblings;
+ }
+
+ private long[] bits(long groupAllBitMask) {
+ int size = Long.bitCount(groupAllBitMask);
+ long[] r = new long[size];
+ long l = groupAllBitMask;
+ int i = 0;
+ while (l != 0) {
+ long bit = Long.highestOneBit(l);
+ r[i++] = bit;
+ l ^= bit;
+ }
+ return r;
+ }
+
+ private void combination(long cuboid, Collection<Long> siblings, long[] bitMasks, int offset, long bitValue, int k) {
+ if (k == 0) {
+ if (Cuboid.isValid(cubeDef, bitValue)) {
+ siblings.add(bitValue);
+ }
+ } else {
+ for (int i = offset; i < bitMasks.length; i++) {
+ long newBitValue = bitValue | bitMasks[i];
+ if (newBitValue < cuboid) {
+ combination(cuboid, siblings, bitMasks, i + 1, newBitValue, k - 1);
+ }
+ }
+ }
+ }
+
+ private boolean belongTo(long cuboidWithoutMandatory, AggrGroupMask mask) {
+ long groupBits = cuboidWithoutMandatory & mask.groupMask;
+ long leftoverBits = cuboidWithoutMandatory & mask.leftoverMask;
+ return groupBits > 0 && (leftoverBits == 0 || leftoverBits == mask.leftoverMask);
+ }
+
+ public int getCardinality(long cuboid) {
+ if (cuboid > max || cuboid < 0) {
+ throw new IllegalArgumentException("Cubiod " + cuboid + " is out of scope 0-" + max);
+ }
+
+ return Long.bitCount(cuboid);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-kylin/blob/4b631f92/cube/src/main/java/com/kylinolap/cube/dataGen/ColumnConfig.java
----------------------------------------------------------------------
diff --git a/cube/src/main/java/com/kylinolap/cube/dataGen/ColumnConfig.java b/cube/src/main/java/com/kylinolap/cube/dataGen/ColumnConfig.java
new file mode 100644
index 0000000..3ee1cf0
--- /dev/null
+++ b/cube/src/main/java/com/kylinolap/cube/dataGen/ColumnConfig.java
@@ -0,0 +1,54 @@
+package com.kylinolap.cube.dataGen;
+
+import java.util.ArrayList;
+
+import com.fasterxml.jackson.annotation.JsonAutoDetect;
+import com.fasterxml.jackson.annotation.JsonProperty;
+
+/**
+ * Created by honma on 5/29/14.
+ */
+@JsonAutoDetect(fieldVisibility = JsonAutoDetect.Visibility.NONE, getterVisibility = JsonAutoDetect.Visibility.NONE, isGetterVisibility = JsonAutoDetect.Visibility.NONE, setterVisibility = JsonAutoDetect.Visibility.NONE)
+public class ColumnConfig {
+ @JsonProperty("columnName")
+ private String columnName;
+ @JsonProperty("valueSet")
+ private ArrayList<String> valueSet;
+ @JsonProperty("exclusive")
+ private boolean exclusive;
+ @JsonProperty("asRange")
+ private boolean asRange;
+
+ public boolean isAsRange() {
+ return asRange;
+ }
+
+ public void setAsRange(boolean asRange) {
+ this.asRange = asRange;
+ }
+
+ public boolean isExclusive() {
+ return exclusive;
+ }
+
+ public void setExclusive(boolean exclusive) {
+ this.exclusive = exclusive;
+ }
+
+ public String getColumnName() {
+ return columnName;
+ }
+
+ public void setColumnName(String columnName) {
+ this.columnName = columnName;
+ }
+
+ public ArrayList<String> getValueSet() {
+ return valueSet;
+ }
+
+ public void setValueSet(ArrayList<String> valueSet) {
+ this.valueSet = valueSet;
+ }
+
+}