You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2021/08/27 11:50:49 UTC
[drill] branch master updated: DRILL-7991: Add Configuration Option
to HDF5 Reader to Skip Preview (#2302)
This is an automated email from the ASF dual-hosted git repository.
cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new 3b87661 DRILL-7991: Add Configuration Option to HDF5 Reader to Skip Preview (#2302)
3b87661 is described below
commit 3b876616f0bb46ca1dc25d95d502c818988d7d4f
Author: Charles S. Givre <cg...@apache.org>
AuthorDate: Fri Aug 27 07:50:43 2021 -0400
DRILL-7991: Add Configuration Option to HDF5 Reader to Skip Preview (#2302)
* Code Working
* Updated docs
* Bump jhdf to version 0.6.2
---
contrib/format-hdf5/README.md | 6 ++++--
contrib/format-hdf5/pom.xml | 2 +-
.../drill/exec/store/hdf5/HDF5BatchReader.java | 5 ++++-
.../drill/exec/store/hdf5/HDF5FormatConfig.java | 19 ++++++++++++------
.../main/resources/bootstrap-format-plugins.json | 9 ++++++---
.../drill/exec/store/hdf5/TestHDF5Format.java | 23 ++++++++++++++++++++++
6 files changed, 51 insertions(+), 13 deletions(-)
diff --git a/contrib/format-hdf5/README.md b/contrib/format-hdf5/README.md
index 11c4072..58d97f4 100644
--- a/contrib/format-hdf5/README.md
+++ b/contrib/format-hdf5/README.md
@@ -10,6 +10,8 @@ There are three configuration variables in this plugin:
* `type`: This should be set to `hdf5`.
* `extensions`: This is a list of the file extensions used to identify HDF5 files. Typically HDF5 uses `.h5` or `.hdf5` as file extensions. This defaults to `.h5`.
* `defaultPath`: The default path defines which path Drill will query for data. Typically this should be left as `null` in the configuration file. Its usage is explained below.
+* `showPreview`: Set to `true` if you want Drill to render a preview of datasets in the metadata view, `false` if not. Defaults to `true` however for large files or very
+ complex data, you should set to `false` for better performance.
### Example Configuration
For most uses, the configuration below will suffice to enable Drill to query HDF5 files.
@@ -19,7 +21,8 @@ For most uses, the configuration below will suffice to enable Drill to query HDF
"extensions": [
"h5"
],
- "defaultPath": null
+ "defaultPath": null,
+ "showPreview": true
}
```
## Usage
@@ -136,4 +139,3 @@ There are several limitations with the HDF5 format plugin in Drill.
[1]: https://en.wikipedia.org/wiki/Hierarchical_Data_Format
[2]: https://www.hdfgroup.org
-
\ No newline at end of file
diff --git a/contrib/format-hdf5/pom.xml b/contrib/format-hdf5/pom.xml
index 9ee578b..c029de9 100644
--- a/contrib/format-hdf5/pom.xml
+++ b/contrib/format-hdf5/pom.xml
@@ -39,7 +39,7 @@
<dependency>
<groupId>io.jhdf</groupId>
<artifactId>jhdf</artifactId>
- <version>0.6.1</version>
+ <version>0.6.2</version>
</dependency>
<!-- Test dependencies -->
diff --git a/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java b/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java
index eaca6d9..6979dbb 100644
--- a/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java
+++ b/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5BatchReader.java
@@ -153,6 +153,8 @@ public class HDF5BatchReader implements ManagedReader<FileSchemaNegotiator> {
private CustomErrorContext errorContext;
+ private boolean showMetadataPreview;
+
private int[] dimensions;
public static class HDF5ReaderConfig {
@@ -173,6 +175,7 @@ public class HDF5BatchReader implements ManagedReader<FileSchemaNegotiator> {
this.readerConfig = readerConfig;
this.maxRecords = maxRecords;
dataWriters = new ArrayList<>();
+ this.showMetadataPreview = readerConfig.formatConfig.showPreview();
}
@Override
@@ -434,7 +437,7 @@ public class HDF5BatchReader implements ManagedReader<FileSchemaNegotiator> {
dimensionsWriter.setString(Arrays.toString(dataset.getDimensions()));
// Do not project links
- if (! metadataRow.isLink()) {
+ if (! metadataRow.isLink() && showMetadataPreview) {
projectDataset(rowWriter, metadataRow.getPath());
}
}
diff --git a/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5FormatConfig.java b/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5FormatConfig.java
index 3ce2bd9..f018c37 100644
--- a/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5FormatConfig.java
+++ b/contrib/format-hdf5/src/main/java/org/apache/drill/exec/store/hdf5/HDF5FormatConfig.java
@@ -36,15 +36,18 @@ public class HDF5FormatConfig implements FormatPluginConfig {
private final List<String> extensions;
private final String defaultPath;
+ private final boolean showPreview;
@JsonCreator
public HDF5FormatConfig(
@JsonProperty("extensions") List<String> extensions,
- @JsonProperty("defaultPath") String defaultPath) {
+ @JsonProperty("defaultPath") String defaultPath,
+ @JsonProperty("showPreview") boolean showPreview) {
this.extensions = extensions == null
? Collections.singletonList("h5")
: ImmutableList.copyOf(extensions);
this.defaultPath = defaultPath;
+ this.showPreview = showPreview;
}
@JsonInclude(JsonInclude.Include.NON_DEFAULT)
@@ -56,6 +59,8 @@ public class HDF5FormatConfig implements FormatPluginConfig {
return defaultPath;
}
+ public boolean showPreview() { return showPreview; }
+
@Override
public boolean equals(Object obj) {
if (this == obj) {
@@ -66,19 +71,21 @@ public class HDF5FormatConfig implements FormatPluginConfig {
}
HDF5FormatConfig other = (HDF5FormatConfig) obj;
return Objects.equals(extensions, other.getExtensions()) &&
- Objects.equals(defaultPath, other.defaultPath);
+ Objects.equals(defaultPath, other.defaultPath) &&
+ Objects.equals(showPreview, other.showPreview);
}
@Override
public int hashCode() {
- return Objects.hash(extensions, defaultPath);
+ return Objects.hash(extensions, defaultPath, showPreview);
}
@Override
public String toString() {
return new PlanStringBuilder(this)
- .field("extensions", extensions)
- .field("default path", defaultPath)
- .toString();
+ .field("extensions", extensions)
+ .field("default path", defaultPath)
+ .field("show preview", showPreview)
+ .toString();
}
}
diff --git a/contrib/format-hdf5/src/main/resources/bootstrap-format-plugins.json b/contrib/format-hdf5/src/main/resources/bootstrap-format-plugins.json
index 1bee10f..290763e 100644
--- a/contrib/format-hdf5/src/main/resources/bootstrap-format-plugins.json
+++ b/contrib/format-hdf5/src/main/resources/bootstrap-format-plugins.json
@@ -7,7 +7,8 @@
"type": "hdf5",
"extensions": [
"h5"
- ]
+ ],
+ "showPreview": true
}
}
},
@@ -18,7 +19,8 @@
"type": "hdf5",
"extensions": [
"h5"
- ]
+ ],
+ "showPreview": true
}
}
},
@@ -29,7 +31,8 @@
"type": "hdf5",
"extensions": [
"h5"
- ]
+ ],
+ "showPreview": true
}
}
}
diff --git a/contrib/format-hdf5/src/test/java/org/apache/drill/exec/store/hdf5/TestHDF5Format.java b/contrib/format-hdf5/src/test/java/org/apache/drill/exec/store/hdf5/TestHDF5Format.java
index 48e6d80..e427216 100644
--- a/contrib/format-hdf5/src/test/java/org/apache/drill/exec/store/hdf5/TestHDF5Format.java
+++ b/contrib/format-hdf5/src/test/java/org/apache/drill/exec/store/hdf5/TestHDF5Format.java
@@ -112,6 +112,29 @@ public class TestHDF5Format extends ClusterTest {
}
@Test
+ public void testStarQueryWithoutPreview() throws Exception {
+ String sql = "SELECT * FROM table(dfs.`hdf5/dset.h5` (type => 'hdf5', showPreview => false))";
+ RowSet results = client.queryBuilder().sql(sql).rowSet();
+
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .add("path", MinorType.VARCHAR, DataMode.OPTIONAL)
+ .add("data_type", MinorType.VARCHAR, DataMode.OPTIONAL)
+ .add("file_name", MinorType.VARCHAR, DataMode.OPTIONAL)
+ .add("data_size", MinorType.BIGINT, DataMode.OPTIONAL)
+ .add("is_link", MinorType.BIT, DataMode.OPTIONAL)
+ .add("element_count", MinorType.BIGINT, DataMode.OPTIONAL)
+ .add("dataset_data_type", MinorType.VARCHAR, DataMode.OPTIONAL)
+ .add("dimensions", MinorType.VARCHAR, DataMode.OPTIONAL)
+ .build();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow("/dset", "DATASET", "dset.h5", 96, false, 24, "int", "[4, 6]")
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(results);
+ }
+
+ @Test
public void testFlattenColumnQuery() throws RpcException {
String sql = "SELECT data[0] AS col1,\n" +
"data[1] as col2,\n" +