You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2020/10/22 18:04:11 UTC
[drill] 02/02: Initial commit
This is an automated email from the ASF dual-hosted git repository.
cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
commit 5632dea97f2fb016954567877124f89a2fdfd440
Author: Charles Givre <cg...@apache.org>
AuthorDate: Wed Oct 21 11:42:56 2020 -0400
Initial commit
---
contrib/format-excel/pom.xml | 2 +-
.../drill/exec/store/excel/ExcelBatchReader.java | 4 ++-
.../drill/exec/store/excel/TestExcelFormat.java | 41 ++++++++++++++++++++++
3 files changed, 45 insertions(+), 2 deletions(-)
diff --git a/contrib/format-excel/pom.xml b/contrib/format-excel/pom.xml
index 7399266..39572a9 100644
--- a/contrib/format-excel/pom.xml
+++ b/contrib/format-excel/pom.xml
@@ -67,7 +67,7 @@
<dependency>
<groupId>com.github.pjfanning</groupId>
<artifactId>excel-streaming-reader</artifactId>
- <version>2.3.3</version>
+ <version>2.3.5</version>
</dependency>
</dependencies>
<build>
diff --git a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
index c60b163..1df4071 100644
--- a/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
+++ b/contrib/format-excel/src/main/java/org/apache/drill/exec/store/excel/ExcelBatchReader.java
@@ -288,6 +288,8 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> {
break;
case FORMULA:
case NUMERIC:
+ case _NONE:
+ case BLANK:
tempColumnName = cell.getStringCellValue();
makeColumn(builder, tempColumnName, TypeProtos.MinorType.FLOAT8);
excelFieldNames.add(colPosition, tempColumnName);
@@ -457,7 +459,7 @@ public class ExcelBatchReader implements ManagedReader<FileSchemaNegotiator> {
} else if (cellType == CellType.NUMERIC && DateUtil.isCellDateFormatted(cell)) {
// Case if the column is a date or time
addColumnToArray(rowWriter, excelFieldNames.get(colPosition), MinorType.TIMESTAMP, false);
- } else if (cellType == CellType.NUMERIC || cellType == CellType.FORMULA) {
+ } else if (cellType == CellType.NUMERIC || cellType == CellType.FORMULA || cellType == CellType.BLANK || cellType == CellType._NONE) {
// Case if the column is numeric
addColumnToArray(rowWriter, excelFieldNames.get(colPosition), MinorType.FLOAT8, false);
} else {
diff --git a/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java b/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java
index 387d708..934f78c 100644
--- a/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java
+++ b/contrib/format-excel/src/test/java/org/apache/drill/exec/store/excel/TestExcelFormat.java
@@ -21,6 +21,7 @@ package org.apache.drill.exec.store.excel;
import org.apache.drill.categories.RowSetTests;
import org.apache.drill.common.exceptions.DrillRuntimeException;
import org.apache.drill.common.types.TypeProtos;
+import org.apache.drill.common.types.TypeProtos.MinorType;
import org.apache.drill.exec.physical.rowSet.RowSet;
import org.apache.drill.exec.physical.rowSet.RowSetBuilder;
import org.apache.drill.exec.record.metadata.SchemaBuilder;
@@ -431,4 +432,44 @@ public class TestExcelFormat extends ClusterTest {
.include("Limit", "maxRecords=5")
.match();
}
+
+ @Test
+ public void testBlankColumnFix() throws Exception {
+ String sql = "SELECT * FROM dfs.`excel/zips-small.xlsx`";
+
+ RowSet results = client.queryBuilder().sql(sql).rowSet();
+ TupleMetadata expectedSchema = new SchemaBuilder()
+ .addNullable("zip", MinorType.FLOAT8)
+ .addNullable("lat", MinorType.FLOAT8)
+ .addNullable("lng", MinorType.FLOAT8)
+ .addNullable("city", MinorType.VARCHAR)
+ .addNullable("state_id", MinorType.VARCHAR)
+ .addNullable("state_name", MinorType.VARCHAR)
+ .addNullable("zcta", MinorType.VARCHAR)
+ .addNullable("parent_zcta", MinorType.FLOAT8)
+ .addNullable("population", MinorType.FLOAT8)
+ .addNullable("density", MinorType.FLOAT8)
+ .addNullable("county_fips", MinorType.FLOAT8)
+ .addNullable("county_name", MinorType.VARCHAR)
+ .addNullable("county_weights", MinorType.VARCHAR)
+ .addNullable("county_names_all", MinorType.VARCHAR)
+ .addNullable("county_fips_all", MinorType.VARCHAR)
+ .addNullable("imprecise", MinorType.VARCHAR)
+ .addNullable("military", MinorType.VARCHAR)
+ .addNullable("timezone", MinorType.VARCHAR)
+ .buildSchema();
+
+ RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
+ .addRow(601.0, 18.18004, -66.75218, "Adjuntas", "PR", "Puerto Rico", "TRUE", 0.0, 17242.0, 111.4, 72001.0, "Adjuntas", "{'72001':99.43,'72141':0.57}", "Adjuntas|Utuado",
+ "72001|72141", "FALSE", "FALSE", "America/Puerto_Rico")
+ .addRow(602.0, 18.36073, -67.17517, "Aguada", "PR", "Puerto Rico", "TRUE", 0.0, 38442.0, 523.5, 72003.0, "Aguada", "{'72003':100}", "Aguada", "72003", "FALSE", "FALSE", "America" +
+ "/Puerto_Rico")
+ .addRow(603.0, 18.45439, -67.12202, "Aguadilla", "PR", "Puerto Rico", "TRUE", 0.0, 48814.0, 667.9, 72005.0, "Aguadilla", "{'72005':100}", "Aguadilla", "72005", "FALSE", "FALSE",
+ "America/Puerto_Rico")
+ .addRow(606.0, 18.16724, -66.93828, "Maricao", "PR", "Puerto Rico", "TRUE", 0.0, 6437.0, 60.4, 72093.0, "Maricao", "{'72093':94.88,'72121':1.35,'72153':3.78}", "Maricao|Yauco" +
+ "|Sabana Grande", "72093|72153|72121", "FALSE", "FALSE", "America/Puerto_Rico")
+ .build();
+
+ new RowSetComparison(expected).verifyAndClearAll(results);
+ }
}