You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2022/02/27 00:36:04 UTC
[drill] branch master updated: SAS Reader fixes (#2472)
This is an automated email from the ASF dual-hosted git repository.
cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git
The following commit(s) were added to refs/heads/master by this push:
new fac0549 SAS Reader fixes (#2472)
fac0549 is described below
commit fac05493694f1f6a6123a591c447c27ee37f5463
Author: pseudomo <yu...@mail.ru>
AuthorDate: Sun Feb 27 01:35:55 2022 +0100
SAS Reader fixes (#2472)
Co-authored-by: pseudomo <ps...@yandex.ru>
---
.../drill/exec/store/sas/SasBatchReader.java | 87 +++++++++++-----------
.../apache/drill/exec/store/sas/TestSasReader.java | 18 ++---
2 files changed, 53 insertions(+), 52 deletions(-)
diff --git a/contrib/format-sas/src/main/java/org/apache/drill/exec/store/sas/SasBatchReader.java b/contrib/format-sas/src/main/java/org/apache/drill/exec/store/sas/SasBatchReader.java
index ac6d987..0305f8b 100644
--- a/contrib/format-sas/src/main/java/org/apache/drill/exec/store/sas/SasBatchReader.java
+++ b/contrib/format-sas/src/main/java/org/apache/drill/exec/store/sas/SasBatchReader.java
@@ -19,8 +19,10 @@
package org.apache.drill.exec.store.sas;
import com.epam.parso.Column;
+import com.epam.parso.ColumnFormat;
import com.epam.parso.SasFileProperties;
import com.epam.parso.SasFileReader;
+import com.epam.parso.impl.DateTimeConstants;
import com.epam.parso.impl.SasFileReaderImpl;
import org.apache.drill.common.AutoCloseables;
import org.apache.drill.common.exceptions.CustomErrorContext;
@@ -163,30 +165,28 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
private TupleMetadata buildSchema() {
SchemaBuilder builder = new SchemaBuilder();
List<Column> columns = sasFileReader.getColumns();
- int counter = 0;
for (Column column : columns) {
- String fieldName = column.getName();
+ String columnName = column.getName();
+ String columnType = column.getType().getSimpleName();
+ ColumnFormat columnFormat = column.getFormat();
try {
MinorType type = null;
- if (firstRow[counter] != null) {
- type = getType(firstRow[counter].getClass().getSimpleName());
- if (type == MinorType.BIGINT && !column.getFormat().isEmpty()) {
- logger.debug("Found possible time");
- type = MinorType.TIME;
- }
+ if (DateTimeConstants.TIME_FORMAT_STRINGS.contains(columnFormat.getName())) {
+ type = MinorType.TIME;
+ } else if (DateTimeConstants.DATE_FORMAT_STRINGS.containsKey(columnFormat.getName())) {
+ type = MinorType.DATE;
+ } else if (DateTimeConstants.DATETIME_FORMAT_STRINGS.containsKey(columnFormat.getName())) {
+ type = MinorType.TIMESTAMP;
} else {
- // If the first row is null
- String columnType = column.getType().getSimpleName();
type = getType(columnType);
}
- builder.addNullable(fieldName, type);
+ builder.addNullable(columnName, type);
} catch (Exception e) {
throw UserException.dataReadError()
- .message("Error with column type: " + firstRow[counter].getClass().getSimpleName())
+ .message("Error with type of column " + columnName + "; Type: " + columnType)
.addContext(errorContext)
.build(logger);
}
- counter++;
}
return builder.buildSchema();
@@ -199,14 +199,14 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
MinorType type = field.getType().getMinorType();
if (type == MinorType.FLOAT8) {
writerList.add(new DoubleSasColumnWriter(colIndex, fieldName, rowWriter));
- } else if (type == MinorType.BIGINT) {
- writerList.add(new BigIntSasColumnWriter(colIndex, fieldName, rowWriter));
} else if (type == MinorType.DATE) {
writerList.add(new DateSasColumnWriter(colIndex, fieldName, rowWriter));
} else if (type == MinorType.TIME) {
writerList.add(new TimeSasColumnWriter(colIndex, fieldName, rowWriter));
- } else if (type == MinorType.VARCHAR){
+ } else if (type == MinorType.VARCHAR) {
writerList.add(new StringSasColumnWriter(colIndex, fieldName, rowWriter));
+ } else if (type == MinorType.TIMESTAMP) {
+ writerList.add(new TimestampSasColumnWriter(colIndex, fieldName, rowWriter));
} else {
throw UserException.dataReadError()
.message(fieldName + " is an unparsable data type: " + type.name() + ". The SAS reader does not support this data type.")
@@ -221,11 +221,11 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
switch (simpleType) {
case "String":
return MinorType.VARCHAR;
- case "Numeric":
case "Double":
- return MinorType.FLOAT8;
+ case "Number":
+ case "Numeric":
case "Long":
- return MinorType.BIGINT;
+ return MinorType.FLOAT8;
case "Date":
return MinorType.DATE;
default:
@@ -366,7 +366,7 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
@Override
public void load(Object[] row) {
if (row[columnIndex] != null) {
- writer.setString((String) row[columnIndex]);
+ writer.setString(row[columnIndex].toString());
}
}
@@ -377,18 +377,6 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
}
}
- public static class BigIntSasColumnWriter extends SasColumnWriter {
-
- BigIntSasColumnWriter (int columnIndex, String columnName, RowSetLoader rowWriter) {
- super(columnIndex, columnName, rowWriter.scalar(columnName));
- }
-
- @Override
- public void load(Object[] row) {
- writer.setLong((Long) row[columnIndex]);
- }
- }
-
public static class DateSasColumnWriter extends SasColumnWriter {
DateSasColumnWriter (int columnIndex, String columnName, RowSetLoader rowWriter) {
@@ -397,8 +385,10 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
@Override
public void load(Object[] row) {
- LocalDate value = convertDateToLocalDate((Date)row[columnIndex]);
- writer.setDate(value);
+ if (row[columnIndex] != null) {
+ LocalDate value = convertDateToLocalDate((Date)row[columnIndex]);
+ writer.setDate(value);
+ }
}
public void load(LocalDate date) {
@@ -408,13 +398,13 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
public static class TimeSasColumnWriter extends SasColumnWriter {
- TimeSasColumnWriter (int columnIndex, String columnName, RowSetLoader rowWriter) {
+ TimeSasColumnWriter(int columnIndex, String columnName, RowSetLoader rowWriter) {
super(columnIndex, columnName, rowWriter.scalar(columnName));
}
@Override
public void load(Object[] row) {
- int seconds = ((Long)row[columnIndex]).intValue();
+ int seconds = ((Long) row[columnIndex]).intValue();
LocalTime value = LocalTime.parse(formatSeconds(seconds));
writer.setTime(value);
}
@@ -453,13 +443,24 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
@Override
public void load(Object[] row) {
- // The SAS reader does something strange with zeros. For whatever reason, even if the
- // field is a floating point number, the value is returned as a long. This causes class
- // cast exceptions.
- if (row[columnIndex].equals(0L)) {
- writer.setDouble(0.0);
- } else {
- writer.setDouble((Double) row[columnIndex]);
+ if (row[columnIndex] != null) {
+ if (row[columnIndex] instanceof Number) {
+ writer.setDouble(((Number) row[columnIndex]).doubleValue());
+ }
+ }
+ }
+ }
+
+ public static class TimestampSasColumnWriter extends SasColumnWriter {
+
+ TimestampSasColumnWriter(int columnIndex, String columnName, RowSetLoader rowWriter) {
+ super(columnIndex, columnName, rowWriter.scalar(columnName));
+ }
+
+ @Override
+ public void load(Object[] row) {
+ if (row[columnIndex] != null) {
+ writer.setTimestamp(((Date) row[columnIndex]).toInstant());
}
}
}
diff --git a/contrib/format-sas/src/test/java/org/apache/drill/exec/store/sas/TestSasReader.java b/contrib/format-sas/src/test/java/org/apache/drill/exec/store/sas/TestSasReader.java
index c007f1a..be0965e 100644
--- a/contrib/format-sas/src/test/java/org/apache/drill/exec/store/sas/TestSasReader.java
+++ b/contrib/format-sas/src/test/java/org/apache/drill/exec/store/sas/TestSasReader.java
@@ -56,7 +56,7 @@ public class TestSasReader extends ClusterTest {
RowSet results = client.queryBuilder().sql(sql).rowSet();
TupleMetadata expectedSchema = new SchemaBuilder()
- .addNullable("x1", MinorType.BIGINT)
+ .addNullable("x1", MinorType.FLOAT8)
.addNullable("x2", MinorType.FLOAT8)
.addNullable("x3", MinorType.VARCHAR)
.addNullable("x4", MinorType.FLOAT8)
@@ -70,13 +70,13 @@ public class TestSasReader extends ClusterTest {
.addNullable("x12", MinorType.FLOAT8)
.addNullable("x13", MinorType.FLOAT8)
.addNullable("x14", MinorType.FLOAT8)
- .addNullable("x15", MinorType.BIGINT)
- .addNullable("x16", MinorType.BIGINT)
- .addNullable("x17", MinorType.BIGINT)
- .addNullable("x18", MinorType.BIGINT)
- .addNullable("x19", MinorType.BIGINT)
- .addNullable("x20", MinorType.BIGINT)
- .addNullable("x21", MinorType.BIGINT)
+ .addNullable("x15", MinorType.FLOAT8)
+ .addNullable("x16", MinorType.FLOAT8)
+ .addNullable("x17", MinorType.FLOAT8)
+ .addNullable("x18", MinorType.FLOAT8)
+ .addNullable("x19", MinorType.FLOAT8)
+ .addNullable("x20", MinorType.FLOAT8)
+ .addNullable("x21", MinorType.FLOAT8)
.buildSchema();
RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
@@ -122,7 +122,7 @@ public class TestSasReader extends ClusterTest {
RowSet results = client.queryBuilder().sql(sql).rowSet();
TupleMetadata expectedSchema = new SchemaBuilder()
- .addNullable("x1", MinorType.BIGINT)
+ .addNullable("x1", MinorType.FLOAT8)
.addNullable("x2", MinorType.FLOAT8)
.addNullable("x3", MinorType.VARCHAR)
.buildSchema();