You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by su...@apache.org on 2018/12/11 21:49:48 UTC
[incubator-pinot] 02/02: Filter out virtual-columns in realtime
segment conversion path
This is an automated email from the ASF dual-hosted git repository.
sunithabeeram pushed a commit to branch VirtualColumnsRT
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
commit a2227ce6dbc1033c86650352a67395d447415288
Author: Sunitha Beeram <sb...@sbeeram-ld2.linkedin.biz>
AuthorDate: Tue Dec 11 12:47:05 2018 -0800
Filter out virtual-columns in realtime segment conversion path
---
.../converter/RealtimeSegmentConverter.java | 46 +++++++++++++-------
.../converter/RealtimeSegmentConverterTest.java | 50 ++++++++++++++++++++++
2 files changed, 81 insertions(+), 15 deletions(-)
diff --git a/pinot-core/src/main/java/com/linkedin/pinot/core/realtime/converter/RealtimeSegmentConverter.java b/pinot-core/src/main/java/com/linkedin/pinot/core/realtime/converter/RealtimeSegmentConverter.java
index 4608e03..6ee3296 100644
--- a/pinot-core/src/main/java/com/linkedin/pinot/core/realtime/converter/RealtimeSegmentConverter.java
+++ b/pinot-core/src/main/java/com/linkedin/pinot/core/realtime/converter/RealtimeSegmentConverter.java
@@ -15,6 +15,7 @@
*/
package com.linkedin.pinot.core.realtime.converter;
+import com.google.common.annotations.VisibleForTesting;
import com.linkedin.pinot.common.config.ColumnPartitionConfig;
import com.linkedin.pinot.common.config.SegmentPartitionConfig;
import com.linkedin.pinot.common.data.FieldSpec;
@@ -57,28 +58,14 @@ public class RealtimeSegmentConverter {
if (new File(outputPath).exists()) {
throw new IllegalAccessError("path already exists:" + outputPath);
}
- TimeFieldSpec original = schema.getTimeFieldSpec();
- // Use outgoing granularity for creating segment
- TimeGranularitySpec outgoing = original.getOutgoingGranularitySpec();
-
- TimeFieldSpec newTimeSpec = new TimeFieldSpec(outgoing);
- Schema newSchema = new Schema();
- for (String dimension : schema.getDimensionNames()) {
- newSchema.addField(schema.getFieldSpecFor(dimension));
- }
- for (String metric : schema.getMetricNames()) {
- newSchema.addField(schema.getFieldSpecFor(metric));
- }
-
- newSchema.addField(newTimeSpec);
this.realtimeSegmentImpl = realtimeSegment;
this.outputPath = outputPath;
this.invertedIndexColumns = new ArrayList<>(invertedIndexColumns);
if (sortedColumn != null && this.invertedIndexColumns.contains(sortedColumn)) {
this.invertedIndexColumns.remove(sortedColumn);
}
- this.dataSchema = newSchema;
+ this.dataSchema = getUpdatedSchema(schema);
this.sortedColumn = sortedColumn;
this.tableName = tableName;
this.segmentName = segmentName;
@@ -150,4 +137,33 @@ public class RealtimeSegmentConverter {
}
}
}
+
+ /**
+ * Returns a new schema based on the original one. The new schema removes columns as needed (for ex, virtual cols)
+ * and adds the new timespec to the schema.
+ */
+ @VisibleForTesting
+ public
+ Schema getUpdatedSchema(Schema original) {
+
+ TimeFieldSpec tfs = original.getTimeFieldSpec();
+ // Use outgoing granularity for creating segment
+ TimeGranularitySpec outgoing = tfs.getOutgoingGranularitySpec();
+ TimeFieldSpec newTimeSpec = new TimeFieldSpec(outgoing);
+
+ Schema newSchema = new Schema();
+ for (String dimension : original.getDimensionNames()) {
+ if (!original.isVirtualColumn(dimension)) {
+ newSchema.addField(original.getFieldSpecFor(dimension));
+ }
+ }
+ for (String metric : original.getMetricNames()) {
+ if (!original.isVirtualColumn(metric)) {
+ newSchema.addField(original.getFieldSpecFor(metric));
+ }
+ }
+ newSchema.addField(newTimeSpec);
+
+ return newSchema;
+ }
}
diff --git a/pinot-core/src/test/java/com/linkedin/pinot/realtime/converter/RealtimeSegmentConverterTest.java b/pinot-core/src/test/java/com/linkedin/pinot/realtime/converter/RealtimeSegmentConverterTest.java
new file mode 100644
index 0000000..2d094ec
--- /dev/null
+++ b/pinot-core/src/test/java/com/linkedin/pinot/realtime/converter/RealtimeSegmentConverterTest.java
@@ -0,0 +1,50 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.linkedin.pinot.realtime.converter;
+
+import com.linkedin.pinot.common.data.DimensionFieldSpec;
+import com.linkedin.pinot.common.data.FieldSpec;
+import com.linkedin.pinot.common.data.Schema;
+import com.linkedin.pinot.common.data.TimeFieldSpec;
+import com.linkedin.pinot.core.realtime.converter.RealtimeSegmentConverter;
+import com.linkedin.pinot.core.segment.virtualcolumn.VirtualColumnProviderFactory;
+import java.util.concurrent.TimeUnit;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+
+public class RealtimeSegmentConverterTest {
+
+ @Test
+ public void testNoVirtualColumnsInSchema() {
+ Schema schema = new Schema();
+ FieldSpec spec = new DimensionFieldSpec("col1", FieldSpec.DataType.STRING, true);
+ schema.addField(spec);
+ TimeFieldSpec tfs = new TimeFieldSpec("col1", FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS,
+ "col2", FieldSpec.DataType.LONG, TimeUnit.DAYS);
+ schema.addField(tfs);
+ VirtualColumnProviderFactory.addBuiltInVirtualColumnsToSchema(schema);
+ Assert.assertEquals(schema.getColumnNames().size(), 5);
+ Assert.assertEquals(schema.getTimeFieldSpec().getIncomingGranularitySpec().getTimeType(), TimeUnit.MILLISECONDS);
+
+ RealtimeSegmentConverter converter = new RealtimeSegmentConverter(null, "", schema,
+ "testTable", "col1", "segment1", "col1");
+
+ Schema newSchema = converter.getUpdatedSchema(schema);
+ Assert.assertEquals(newSchema.getColumnNames().size(), 2);
+ Assert.assertEquals(newSchema.getTimeFieldSpec().getIncomingGranularitySpec().getTimeType(), TimeUnit.DAYS);
+ }
+}
---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org