You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by su...@apache.org on 2018/12/11 21:49:48 UTC

[incubator-pinot] 02/02: Filter out virtual-columns in realtime segment conversion path

This is an automated email from the ASF dual-hosted git repository.

sunithabeeram pushed a commit to branch VirtualColumnsRT
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git

commit a2227ce6dbc1033c86650352a67395d447415288
Author: Sunitha Beeram <sb...@sbeeram-ld2.linkedin.biz>
AuthorDate: Tue Dec 11 12:47:05 2018 -0800

    Filter out virtual-columns in realtime segment conversion path
---
 .../converter/RealtimeSegmentConverter.java        | 46 +++++++++++++-------
 .../converter/RealtimeSegmentConverterTest.java    | 50 ++++++++++++++++++++++
 2 files changed, 81 insertions(+), 15 deletions(-)

diff --git a/pinot-core/src/main/java/com/linkedin/pinot/core/realtime/converter/RealtimeSegmentConverter.java b/pinot-core/src/main/java/com/linkedin/pinot/core/realtime/converter/RealtimeSegmentConverter.java
index 4608e03..6ee3296 100644
--- a/pinot-core/src/main/java/com/linkedin/pinot/core/realtime/converter/RealtimeSegmentConverter.java
+++ b/pinot-core/src/main/java/com/linkedin/pinot/core/realtime/converter/RealtimeSegmentConverter.java
@@ -15,6 +15,7 @@
  */
 package com.linkedin.pinot.core.realtime.converter;
 
+import com.google.common.annotations.VisibleForTesting;
 import com.linkedin.pinot.common.config.ColumnPartitionConfig;
 import com.linkedin.pinot.common.config.SegmentPartitionConfig;
 import com.linkedin.pinot.common.data.FieldSpec;
@@ -57,28 +58,14 @@ public class RealtimeSegmentConverter {
     if (new File(outputPath).exists()) {
       throw new IllegalAccessError("path already exists:" + outputPath);
     }
-    TimeFieldSpec original = schema.getTimeFieldSpec();
-    // Use outgoing granularity for creating segment
-    TimeGranularitySpec outgoing = original.getOutgoingGranularitySpec();
-
-    TimeFieldSpec newTimeSpec = new TimeFieldSpec(outgoing);
 
-    Schema newSchema = new Schema();
-    for (String dimension : schema.getDimensionNames()) {
-      newSchema.addField(schema.getFieldSpecFor(dimension));
-    }
-    for (String metric : schema.getMetricNames()) {
-      newSchema.addField(schema.getFieldSpecFor(metric));
-    }
-
-    newSchema.addField(newTimeSpec);
     this.realtimeSegmentImpl = realtimeSegment;
     this.outputPath = outputPath;
     this.invertedIndexColumns = new ArrayList<>(invertedIndexColumns);
     if (sortedColumn != null && this.invertedIndexColumns.contains(sortedColumn)) {
       this.invertedIndexColumns.remove(sortedColumn);
     }
-    this.dataSchema = newSchema;
+    this.dataSchema = getUpdatedSchema(schema);
     this.sortedColumn = sortedColumn;
     this.tableName = tableName;
     this.segmentName = segmentName;
@@ -150,4 +137,33 @@ public class RealtimeSegmentConverter {
       }
     }
   }
+
+  /**
+   * Returns a new schema based on the original one. The new schema removes columns as needed (for ex, virtual cols)
+   * and adds the new timespec to the schema.
+   */
+  @VisibleForTesting
+  public
+  Schema getUpdatedSchema(Schema original) {
+
+    TimeFieldSpec tfs = original.getTimeFieldSpec();
+    // Use outgoing granularity for creating segment
+    TimeGranularitySpec outgoing = tfs.getOutgoingGranularitySpec();
+    TimeFieldSpec newTimeSpec = new TimeFieldSpec(outgoing);
+
+    Schema newSchema = new Schema();
+    for (String dimension : original.getDimensionNames()) {
+      if (!original.isVirtualColumn(dimension)) {
+        newSchema.addField(original.getFieldSpecFor(dimension));
+      }
+    }
+    for (String metric : original.getMetricNames()) {
+      if (!original.isVirtualColumn(metric)) {
+        newSchema.addField(original.getFieldSpecFor(metric));
+      }
+    }
+    newSchema.addField(newTimeSpec);
+
+    return newSchema;
+  }
 }
diff --git a/pinot-core/src/test/java/com/linkedin/pinot/realtime/converter/RealtimeSegmentConverterTest.java b/pinot-core/src/test/java/com/linkedin/pinot/realtime/converter/RealtimeSegmentConverterTest.java
new file mode 100644
index 0000000..2d094ec
--- /dev/null
+++ b/pinot-core/src/test/java/com/linkedin/pinot/realtime/converter/RealtimeSegmentConverterTest.java
@@ -0,0 +1,50 @@
+/**
+ * Copyright (C) 2014-2018 LinkedIn Corp. (pinot-core@linkedin.com)
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *         http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package com.linkedin.pinot.realtime.converter;
+
+import com.linkedin.pinot.common.data.DimensionFieldSpec;
+import com.linkedin.pinot.common.data.FieldSpec;
+import com.linkedin.pinot.common.data.Schema;
+import com.linkedin.pinot.common.data.TimeFieldSpec;
+import com.linkedin.pinot.core.realtime.converter.RealtimeSegmentConverter;
+import com.linkedin.pinot.core.segment.virtualcolumn.VirtualColumnProviderFactory;
+import java.util.concurrent.TimeUnit;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+
+public class RealtimeSegmentConverterTest {
+
+  @Test
+  public void testNoVirtualColumnsInSchema() {
+    Schema schema = new Schema();
+    FieldSpec spec = new DimensionFieldSpec("col1", FieldSpec.DataType.STRING, true);
+    schema.addField(spec);
+    TimeFieldSpec tfs = new TimeFieldSpec("col1", FieldSpec.DataType.LONG, TimeUnit.MILLISECONDS,
+        "col2", FieldSpec.DataType.LONG, TimeUnit.DAYS);
+    schema.addField(tfs);
+    VirtualColumnProviderFactory.addBuiltInVirtualColumnsToSchema(schema);
+    Assert.assertEquals(schema.getColumnNames().size(), 5);
+    Assert.assertEquals(schema.getTimeFieldSpec().getIncomingGranularitySpec().getTimeType(), TimeUnit.MILLISECONDS);
+
+    RealtimeSegmentConverter converter = new RealtimeSegmentConverter(null, "", schema,
+        "testTable", "col1", "segment1", "col1");
+
+    Schema newSchema = converter.getUpdatedSchema(schema);
+    Assert.assertEquals(newSchema.getColumnNames().size(), 2);
+    Assert.assertEquals(newSchema.getTimeFieldSpec().getIncomingGranularitySpec().getTimeType(), TimeUnit.DAYS);
+  }
+}


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org