You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pinot.apache.org by GitBox <gi...@apache.org> on 2019/10/23 01:08:38 UTC

[GitHub] [incubator-pinot] snleee commented on a change in pull request #4737: Adding Druid Segment RecordReader

snleee commented on a change in pull request #4737: Adding Druid Segment RecordReader
URL: https://github.com/apache/incubator-pinot/pull/4737#discussion_r337810579
 
 

 ##########
 File path: pinot-druid/src/main/java1/org/apache/pinot/druid/data/readers/DruidSegmentRecordReader.java
 ##########
 @@ -0,0 +1,183 @@
+package org.apache.pinot.druid.data.readers;
+
+import com.fasterxml.jackson.databind.ObjectMapper;
+import java.io.File;
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.stream.Collectors;
+
+import org.apache.druid.jackson.DefaultObjectMapper;
+import org.apache.druid.java.util.common.granularity.Granularities;
+import org.apache.druid.java.util.common.guava.Sequence;
+import org.apache.druid.java.util.common.guava.Yielder;
+import org.apache.druid.java.util.common.guava.YieldingAccumulator;
+import org.apache.druid.segment.BaseObjectColumnValueSelector;
+import org.apache.druid.segment.ColumnSelectorFactory;
+import org.apache.druid.segment.Cursor;
+import org.apache.druid.segment.IndexIO;
+import org.apache.druid.segment.QueryableIndex;
+import org.apache.druid.segment.QueryableIndexStorageAdapter;
+import org.apache.druid.segment.VirtualColumns;
+import org.apache.druid.segment.column.ColumnConfig;
+import org.apache.druid.segment.column.ColumnHolder;
+import org.apache.druid.segment.filter.Filters;
+import org.apache.druid.query.DruidProcessingConfig;
+
+import org.apache.pinot.common.data.Schema;
+import org.apache.pinot.core.data.GenericRow;
+import org.apache.pinot.core.data.readers.RecordReader;
+import org.apache.pinot.core.indexsegment.generator.SegmentGeneratorConfig;
+
+import org.joda.time.DateTime;
+import org.joda.time.chrono.ISOChronology;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+
+/**
+ * The DruidSegmentRecordReader allows us to convert all of the rows in a Druid segment file
+ * into GenericRows, which are made into Pinot segments.
+ */
+public class DruidSegmentRecordReader implements RecordReader {
+  private static final Logger LOGGER = LoggerFactory.getLogger(DruidSegmentRecordReader.class);
+
+  private Schema _pinotSchema;
+  private Cursor _cursor;
+  private ArrayList<String> _columnNames;
+  private List<BaseObjectColumnValueSelector> _selectors;
+
+  private void init(String indexPath, Schema schema)
+      throws IOException {
+
+    // Only the columns whose names are in the Pinot schema will get processed
+    _pinotSchema = schema;
+    // getColumnNames() puts the column names in the schema into a Set
+    // so the columns will end up in a random order.
+    // Consider a different implementation where the order is consistent.
+    _columnNames = new ArrayList<>();
+    _columnNames.addAll(_pinotSchema.getColumnNames());
 
 Review comment:
   We need to add the validation that's doing the following:
   
   1. Check if all columns from `Schema (pinot schema)` exist in Druid's segment
   2. Check if all of those column types match

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
users@infra.apache.org


With regards,
Apache Git Services

---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscribe@pinot.apache.org
For additional commands, e-mail: commits-help@pinot.apache.org