You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2020/10/22 18:04:11 UTC

[drill] 02/02: Initial commit

This is an automated email from the ASF dual-hosted git repository.

cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git

commit 5632dea97f2fb016954567877124f89a2fdfd440
Author: Charles Givre <cg...@apache.org>
AuthorDate: Wed Oct 21 11:42:56 2020 -0400

    Initial commit
---
 contrib/format-excel/pom.xml                       |  2 +-
 .../drill/exec/store/excel/ExcelBatchReader.java   |  4 ++-
 .../drill/exec/store/excel/TestExcelFormat.java    | 41 ++++++++++++++++++++++
 3 files changed, 45 insertions(+), 2 deletions(-)

diff --git a/contrib/format-excel/pom.xml b/contrib/format-excel/pom.xml
index 7399266..39572a9 100644
--- a/contrib/format-excel/pom.xml
+++ b/contrib/format-excel/pom.xml
@@ -67,7 +67,7 @@
     <dependency>
       <groupId>com.github.pjfanning</groupId>
       <artifactId>excel-streaming-reader</artifactId>
-      <version>2.3.3</version>
+      <version>2.3.5</version>
     </dependency>
   </dependencies>
   <build>
diff --git a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
index c60b163..1df4071 100644
--- a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
+++ b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
@@ -288,6 +288,8 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> {
             break;
           case FORMULA:
             case NUMERIC:
+          case _NONE:
+          case BLANK:
             tempColumnName = cell.getStringCellValue();
             makeColumn(builder, tempColumnName, TypeProtos.MinorType.FLOAT8);
             excelFieldNames.add(colPosition, tempColumnName);
@@ -457,7 +459,7 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> {
     } else if (cellType == CellType.NUMERIC && DateUtil.isCellDateFormatted(cell)) {
       // Case if the column is a date or time
       addColumnToArray(rowWriter, excelFieldNames.get(colPosition), MinorType.TIMESTAMP, false);
-    } else if (cellType == CellType.NUMERIC || cellType == CellType.FORMULA) {
+    } else if (cellType == CellType.NUMERIC || cellType == CellType.FORMULA || cellType == CellType.BLANK || cellType == CellType._NONE) {
       // Case if the column is numeric
       addColumnToArray(rowWriter, excelFieldNames.get(colPosition), MinorType.FLOAT8, false);
     } else {
diff --git a/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java b/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java
index 387d708..934f78c 100644
--- a/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java
+++ b/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java
@@ -21,6 +21,7 @@ package org.apache.drill.exec.store.excel;
 import org.apache.drill.categories.RowSetTests;
 import org.apache.drill.common.exceptions.DrillRuntimeException;
 import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MinorType;
 import org.apache.drill.exec.physical.rowSet.RowSet;
 import org.apache.drill.exec.physical.rowSet.RowSetBuilder;
 import org.apache.drill.exec.record.metadata.SchemaBuilder;
@@ -431,4 +432,44 @@ public class TestExcelFormat extends ClusterTest {
       .include("Limit", "maxRecords=5")
       .match();
   }
+
+  @Test
+  public void testBlankColumnFix() throws Exception {
+    String sql = "SELECT * FROM dfs.`excel/zips-small.xlsx`";
+
+    RowSet results = client.queryBuilder().sql(sql).rowSet();
+    TupleMetadata expectedSchema = new SchemaBuilder()
+      .addNullable("zip", MinorType.FLOAT8)
+      .addNullable("lat", MinorType.FLOAT8)
+      .addNullable("lng", MinorType.FLOAT8)
+      .addNullable("city", MinorType.VARCHAR)
+      .addNullable("state_id", MinorType.VARCHAR)
+      .addNullable("state_name", MinorType.VARCHAR)
+      .addNullable("zcta", MinorType.VARCHAR)
+      .addNullable("parent_zcta", MinorType.FLOAT8)
+      .addNullable("population", MinorType.FLOAT8)
+      .addNullable("density", MinorType.FLOAT8)
+      .addNullable("county_fips", MinorType.FLOAT8)
+      .addNullable("county_name", MinorType.VARCHAR)
+      .addNullable("county_weights", MinorType.VARCHAR)
+      .addNullable("county_names_all", MinorType.VARCHAR)
+      .addNullable("county_fips_all", MinorType.VARCHAR)
+      .addNullable("imprecise", MinorType.VARCHAR)
+      .addNullable("military", MinorType.VARCHAR)
+      .addNullable("timezone", MinorType.VARCHAR)
+      .buildSchema();
+
+    RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+      .addRow(601.0, 18.18004, -66.75218, "Adjuntas", "PR", "Puerto Rico", "TRUE", 0.0, 17242.0, 111.4, 72001.0, "Adjuntas", "{'72001':99.43,'72141':0.57}", "Adjuntas|Utuado",
+      "72001|72141", "FALSE", "FALSE", "America/Puerto_Rico")
+      .addRow(602.0, 18.36073, -67.17517, "Aguada", "PR", "Puerto Rico", "TRUE", 0.0, 38442.0, 523.5, 72003.0, "Aguada", "{'72003':100}", "Aguada", "72003", "FALSE", "FALSE", "America" +
+    "/Puerto_Rico")
+      .addRow(603.0, 18.45439, -67.12202, "Aguadilla", "PR", "Puerto Rico", "TRUE", 0.0, 48814.0, 667.9, 72005.0, "Aguadilla", "{'72005':100}", "Aguadilla", "72005", "FALSE", "FALSE",
+    "America/Puerto_Rico")
+      .addRow(606.0, 18.16724, -66.93828, "Maricao", "PR", "Puerto Rico", "TRUE", 0.0, 6437.0, 60.4, 72093.0, "Maricao", "{'72093':94.88,'72121':1.35,'72153':3.78}", "Maricao|Yauco" +
+    "|Sabana Grande", "72093|72153|72121", "FALSE", "FALSE", "America/Puerto_Rico")
+      .build();
+
+    new RowSetComparison(expected).verifyAndClearAll(results);
+  }
 }