You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2021/08/27 11:50:49 UTC

[drill] branch master updated: DRILL-7991: Add Configuration Option to HDF5 Reader to Skip Preview (#2302)

This is an automated email from the ASF dual-hosted git repository.

cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new 3b87661  DRILL-7991: Add Configuration Option to HDF5 Reader to Skip Preview (#2302)
3b87661 is described below

commit 3b876616f0bb46ca1dc25d95d502c818988d7d4f
Author: Charles S. Givre <cg...@apache.org>
AuthorDate: Fri Aug 27 07:50:43 2021 -0400

    DRILL-7991: Add Configuration Option to HDF5 Reader to Skip Preview (#2302)
    
    * Code Working
    
    * Updated docs
    
    * Bump jhdf to version 0.6.2
---
 contrib/format-hdf5/README.md                      |  6 ++++--
 contrib/format-hdf5/pom.xml                        |  2 +-
 .../drill/exec/store/hdf5/HDF5BatchReader.java     |  5 ++++-
 .../drill/exec/store/hdf5/HDF5FormatConfig.java    | 19 ++++++++++++------
 .../main/resources/bootstrap-format-plugins.json   |  9 ++++++---
 .../drill/exec/store/hdf5/TestHDF5Format.java      | 23 ++++++++++++++++++++++
 6 files changed, 51 insertions(+), 13 deletions(-)

diff --git a/contrib/format-hdf5/README.md b/contrib/format-hdf5/README.md
index 11c4072..58d97f4 100644
--- a/contrib/format-hdf5/README.md
+++ b/contrib/format-hdf5/README.md
@@ -10,6 +10,8 @@ There are three configuration variables in this plugin:
 * `type`: This should be set to `hdf5`.
 * `extensions`: This is a list of the file extensions used to identify HDF5 files. Typically HDF5 uses `.h5` or `.hdf5` as file extensions. This defaults to `.h5`.
 * `defaultPath`: The default path defines which path Drill will query for data. Typically this should be left as `null` in the configuration file. Its usage is explained below.
+* `showPreview`: Set to `true` if you want Drill to render a preview of datasets in the metadata view, `false` if not.  Defaults to `true` however for large files or very
+    complex data, you should set to `false` for better performance.
 
 ### Example Configuration
 For most uses, the configuration below will suffice to enable Drill to query HDF5 files.
@@ -19,7 +21,8 @@ For most uses, the configuration below will suffice to enable Drill to query HDF
       "extensions": [
         "h5"
       ],
-      "defaultPath": null
+      "defaultPath": null,
+      "showPreview": true
     }
 ```
 ## Usage
@@ -136,4 +139,3 @@ There are several limitations with the HDF5 format plugin in Drill.
  
  [1]: https://en.wikipedia.org/wiki/Hierarchical_Data_Format
  [2]: https://www.hdfgroup.org
- 
\ No newline at end of file
diff --git a/contrib/format-hdf5/pom.xml b/contrib/format-hdf5/pom.xml
index 9ee578b..c029de9 100644
--- a/contrib/format-hdf5/pom.xml
+++ b/contrib/format-hdf5/pom.xml
@@ -39,7 +39,7 @@
     <dependency>
       <groupId>io.jhdf</groupId>
       <artifactId>jhdf</artifactId>
-      <version>0.6.1</version>
+      <version>0.6.2</version>
     </dependency>
 
     <!-- Test dependencies -->
diff --git a/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java b/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java
index eaca6d9..6979dbb 100644
--- a/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java
+++ b/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java
@@ -153,6 +153,8 @@ public class HDF5BatchReader implements ManagedReader<FileSchemaNegotiator> {
 
   private CustomErrorContext errorContext;
 
+  private boolean showMetadataPreview;
+
   private int[] dimensions;
 
   public static class HDF5ReaderConfig {
@@ -173,6 +175,7 @@ public class HDF5BatchReader implements ManagedReader<FileSchemaNegotiator> {
     this.readerConfig = readerConfig;
     this.maxRecords = maxRecords;
     dataWriters = new ArrayList<>();
+    this.showMetadataPreview = readerConfig.formatConfig.showPreview();
   }
 
   @Override
@@ -434,7 +437,7 @@ public class HDF5BatchReader implements ManagedReader<FileSchemaNegotiator> {
       dimensionsWriter.setString(Arrays.toString(dataset.getDimensions()));
 
       // Do not project links
-      if (! metadataRow.isLink()) {
+      if (! metadataRow.isLink() && showMetadataPreview) {
         projectDataset(rowWriter, metadataRow.getPath());
       }
     }
diff --git a/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5FormatConfig.java b/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5FormatConfig.java
index 3ce2bd9..f018c37 100644
--- a/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5FormatConfig.java
+++ b/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5FormatConfig.java
@@ -36,15 +36,18 @@ public class HDF5FormatConfig implements FormatPluginConfig {
 
   private final List<String> extensions;
   private final String defaultPath;
+  private final boolean showPreview;
 
   @JsonCreator
   public HDF5FormatConfig(
       @JsonProperty("extensions") List<String> extensions,
-      @JsonProperty("defaultPath") String defaultPath) {
+      @JsonProperty("defaultPath") String defaultPath,
+      @JsonProperty("showPreview") boolean showPreview) {
     this.extensions = extensions == null
         ? Collections.singletonList("h5")
         : ImmutableList.copyOf(extensions);
     this.defaultPath = defaultPath;
+    this.showPreview = showPreview;
   }
 
   @JsonInclude(JsonInclude.Include.NON_DEFAULT)
@@ -56,6 +59,8 @@ public class HDF5FormatConfig implements FormatPluginConfig {
     return defaultPath;
   }
 
+  public boolean showPreview() { return showPreview; }
+
   @Override
   public boolean equals(Object obj) {
     if (this == obj) {
@@ -66,19 +71,21 @@ public class HDF5FormatConfig implements FormatPluginConfig {
     }
     HDF5FormatConfig other = (HDF5FormatConfig) obj;
     return Objects.equals(extensions, other.getExtensions()) &&
-           Objects.equals(defaultPath, other.defaultPath);
+      Objects.equals(defaultPath, other.defaultPath) &&
+      Objects.equals(showPreview, other.showPreview);
   }
 
   @Override
   public int hashCode() {
-    return Objects.hash(extensions, defaultPath);
+    return Objects.hash(extensions, defaultPath, showPreview);
   }
 
   @Override
   public String toString() {
     return new PlanStringBuilder(this)
-        .field("extensions", extensions)
-        .field("default path", defaultPath)
-        .toString();
+      .field("extensions", extensions)
+      .field("default path", defaultPath)
+      .field("show preview", showPreview)
+      .toString();
   }
 }
diff --git a/contrib/format-hdf5/src/main/resources/bootstrap-format-plugins.json b/contrib/format-hdf5/src/main/resources/bootstrap-format-plugins.json
index 1bee10f..290763e 100644
--- a/contrib/format-hdf5/src/main/resources/bootstrap-format-plugins.json
+++ b/contrib/format-hdf5/src/main/resources/bootstrap-format-plugins.json
@@ -7,7 +7,8 @@
           "type": "hdf5",
           "extensions": [
             "h5"
-          ]
+          ],
+          "showPreview": true
         }
       }
     },
@@ -18,7 +19,8 @@
           "type": "hdf5",
           "extensions": [
             "h5"
-          ]
+          ],
+          "showPreview": true
         }
       }
     },
@@ -29,7 +31,8 @@
           "type": "hdf5",
           "extensions": [
             "h5"
-          ]
+          ],
+          "showPreview": true
         }
       }
     }
diff --git a/contrib/format-hdf5/src/test/java/org/apache/drill/exec/store/hdf5/TestHDF5Format.java b/contrib/format-hdf5/src/test/java/org/apache/drill/exec/store/hdf5/TestHDF5Format.java
index 48e6d80..e427216 100644
--- a/contrib/format-hdf5/src/test/java/org/apache/drill/exec/store/hdf5/TestHDF5Format.java
+++ b/contrib/format-hdf5/src/test/java/org/apache/drill/exec/store/hdf5/TestHDF5Format.java
@@ -112,6 +112,29 @@ public class TestHDF5Format extends ClusterTest {
   }
 
   @Test
+  public void testStarQueryWithoutPreview() throws Exception {
+    String sql = "SELECT * FROM table(dfs.`hdf5/dset.h5` (type => 'hdf5', showPreview => false))";
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .add("path", MinorType.VARCHAR, DataMode.OPTIONAL)
+      .add("data_type", MinorType.VARCHAR, DataMode.OPTIONAL)
+      .add("file_name", MinorType.VARCHAR, DataMode.OPTIONAL)
+      .add("data_size", MinorType.BIGINT, DataMode.OPTIONAL)
+      .add("is_link", MinorType.BIT, DataMode.OPTIONAL)
+      .add("element_count", MinorType.BIGINT, DataMode.OPTIONAL)
+      .add("dataset_data_type", MinorType.VARCHAR, DataMode.OPTIONAL)
+      .add("dimensions", MinorType.VARCHAR, DataMode.OPTIONAL)
+      .build();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow("/dset", "DATASET", "dset.h5", 96, false, 24, "int", "[4, 6]")
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
+
+  @Test
   public void testFlattenColumnQuery() throws RpcException {
     String sql = "SELECT data[0] AS col1,\n" +
             "data[1] as col2,\n" +