You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@orc.apache.org by GitBox <gi...@apache.org> on 2021/03/05 05:53:05 UTC

[GitHub] [orc] dongjoon-hyun commented on a change in pull request #648: ORC-751: [C++] Implement Predicate Pushdown for C++ Reader

dongjoon-hyun commented on a change in pull request #648:
URL: https://github.com/apache/orc/pull/648#discussion_r588047504



##########
File path: c++/src/Reader.cc
##########
@@ -68,6 +68,12 @@ namespace orc {
       return columnPath.substr(0, columnPath.length() - 1);
   }
 
+  WriterVersion getWriterVersionImpl(const FileContents * contents) {
+    if (!contents->postscript->has_writerversion()) {
+      return WriterVersion_ORIGINAL;

Review comment:
       +1 for @pgaref 's comment.

##########
File path: c++/src/Reader.hh
##########
@@ -142,6 +143,30 @@ namespace orc {
 
     // row index of current stripe with column id as the key
     std::unordered_map<uint64_t, proto::RowIndex> rowIndexes;
+    std::map<uint32_t, BloomFilterIndex> bloomFilterIndex;

Review comment:
       Shall we remove this in this PR because this seems to be unused in this PR?

##########
File path: c++/src/sargs/SargsApplier.cc
##########
@@ -25,7 +26,13 @@ namespace orc {
                                     const std::string& colName) {
     for (uint64_t i = 0; i != type.getSubtypeCount(); ++i) {
       if (type.getFieldName(i) == colName) {
-        return type.getSubtype(i)->getColumnId();
+        if (type.getKind() == CHAR || type.getKind() == VARCHAR) {
+          ///FIXME: disable PPD for char and varchar types as their rules
+          // vary among different engines.
+          return INVALID_COLUMN_ID;

Review comment:
       Do we have a test coverage for this code path?
   If not, I'm +1 for @pgaref 's opinion. We can do this later.

##########
File path: c++/include/orc/Reader.hh
##########
@@ -538,6 +549,12 @@ namespace orc {
      */
     virtual void seekToRow(uint64_t rowNumber) = 0;
 
+    /**
+     * If PPD is enabled, returns true and store number of selected RGs and
+     * number of evaluated RGs into the stats pair; otherwise returns false.
+     */
+    virtual bool getPPDStats(std::pair<uint64_t, uint64_t>& stats) const = 0;

Review comment:
       +1 for @pgaref 's comment.




----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

For queries about this service, please contact Infrastructure at:
users@infra.apache.org