You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@drill.apache.org by cg...@apache.org on 2022/02/27 00:36:04 UTC

[drill] branch master updated: SAS Reader fixes (#2472)

This is an automated email from the ASF dual-hosted git repository.

cgivre pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/drill.git


The following commit(s) were added to refs/heads/master by this push:
     new fac0549  SAS Reader fixes (#2472)
fac0549 is described below

commit fac05493694f1f6a6123a591c447c27ee37f5463
Author: pseudomo <yu...@mail.ru>
AuthorDate: Sun Feb 27 01:35:55 2022 +0100

    SAS Reader fixes (#2472)
    
    Co-authored-by: pseudomo <ps...@yandex.ru>
---
 .../drill/exec/store/sas/SasBatchReader.java       | 87 +++++++++++-----------
 .../apache/drill/exec/store/sas/TestSasReader.java | 18 ++---
 2 files changed, 53 insertions(+), 52 deletions(-)

diff --git a/contrib/format-sas/src/main/java/org/apache/drill/exec/store/sas/SasBatchReader.java b/contrib/format-sas/src/main/java/org/apache/drill/exec/store/sas/SasBatchReader.java
index ac6d987..0305f8b 100644
--- a/contrib/format-sas/src/main/java/org/apache/drill/exec/store/sas/SasBatchReader.java
+++ b/contrib/format-sas/src/main/java/org/apache/drill/exec/store/sas/SasBatchReader.java
@@ -19,8 +19,10 @@
 package org.apache.drill.exec.store.sas;
 
 import com.epam.parso.Column;
+import com.epam.parso.ColumnFormat;
 import com.epam.parso.SasFileProperties;
 import com.epam.parso.SasFileReader;
+import com.epam.parso.impl.DateTimeConstants;
 import com.epam.parso.impl.SasFileReaderImpl;
 import org.apache.drill.common.AutoCloseables;
 import org.apache.drill.common.exceptions.CustomErrorContext;
@@ -163,30 +165,28 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
   private TupleMetadata buildSchema() {
     SchemaBuilder builder = new SchemaBuilder();
     List<Column> columns = sasFileReader.getColumns();
-    int counter = 0;
     for (Column column : columns) {
-      String fieldName = column.getName();
+      String columnName = column.getName();
+      String columnType = column.getType().getSimpleName();
+      ColumnFormat columnFormat = column.getFormat();
       try {
         MinorType type = null;
-        if (firstRow[counter] != null) {
-          type = getType(firstRow[counter].getClass().getSimpleName());
-          if (type == MinorType.BIGINT && !column.getFormat().isEmpty()) {
-            logger.debug("Found possible time");
-            type = MinorType.TIME;
-          }
+        if (DateTimeConstants.TIME_FORMAT_STRINGS.contains(columnFormat.getName())) {
+          type = MinorType.TIME;
+        } else if (DateTimeConstants.DATE_FORMAT_STRINGS.containsKey(columnFormat.getName())) {
+          type = MinorType.DATE;
+        } else if (DateTimeConstants.DATETIME_FORMAT_STRINGS.containsKey(columnFormat.getName())) {
+          type = MinorType.TIMESTAMP;
         } else {
-          // If the first row is null
-          String columnType = column.getType().getSimpleName();
           type = getType(columnType);
         }
-        builder.addNullable(fieldName, type);
+        builder.addNullable(columnName, type);
       } catch (Exception e) {
         throw UserException.dataReadError()
-          .message("Error with column type: " + firstRow[counter].getClass().getSimpleName())
+          .message("Error with type of column " + columnName + "; Type: " + columnType)
           .addContext(errorContext)
           .build(logger);
       }
-      counter++;
     }
 
     return builder.buildSchema();
@@ -199,14 +199,14 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
       MinorType type = field.getType().getMinorType();
       if (type == MinorType.FLOAT8) {
         writerList.add(new DoubleSasColumnWriter(colIndex, fieldName, rowWriter));
-      } else if (type == MinorType.BIGINT) {
-        writerList.add(new BigIntSasColumnWriter(colIndex, fieldName, rowWriter));
       } else if (type == MinorType.DATE) {
         writerList.add(new DateSasColumnWriter(colIndex, fieldName, rowWriter));
       } else if (type == MinorType.TIME) {
         writerList.add(new TimeSasColumnWriter(colIndex, fieldName, rowWriter));
-      } else if (type == MinorType.VARCHAR){
+      } else if (type == MinorType.VARCHAR) {
         writerList.add(new StringSasColumnWriter(colIndex, fieldName, rowWriter));
+      } else if (type == MinorType.TIMESTAMP) {
+        writerList.add(new TimestampSasColumnWriter(colIndex, fieldName, rowWriter));
       } else {
         throw UserException.dataReadError()
           .message(fieldName + " is an unparsable data type: " + type.name() + ".  The SAS reader does not support this data type.")
@@ -221,11 +221,11 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
     switch (simpleType) {
       case "String":
         return MinorType.VARCHAR;
-      case "Numeric":
       case "Double":
-        return MinorType.FLOAT8;
+      case "Number":
+      case "Numeric":
       case "Long":
-        return MinorType.BIGINT;
+        return MinorType.FLOAT8;
       case "Date":
         return MinorType.DATE;
       default:
@@ -366,7 +366,7 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
     @Override
     public void load(Object[] row) {
       if (row[columnIndex] != null) {
-        writer.setString((String) row[columnIndex]);
+        writer.setString(row[columnIndex].toString());
       }
     }
 
@@ -377,18 +377,6 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
     }
   }
 
-  public static class BigIntSasColumnWriter extends SasColumnWriter {
-
-    BigIntSasColumnWriter (int columnIndex, String columnName, RowSetLoader rowWriter) {
-      super(columnIndex, columnName, rowWriter.scalar(columnName));
-    }
-
-    @Override
-    public void load(Object[] row) {
-      writer.setLong((Long) row[columnIndex]);
-    }
-  }
-
   public static class DateSasColumnWriter extends SasColumnWriter {
 
     DateSasColumnWriter (int columnIndex, String columnName, RowSetLoader rowWriter) {
@@ -397,8 +385,10 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
 
     @Override
     public void load(Object[] row) {
-      LocalDate value = convertDateToLocalDate((Date)row[columnIndex]);
-      writer.setDate(value);
+      if (row[columnIndex] != null) {
+        LocalDate value = convertDateToLocalDate((Date)row[columnIndex]);
+        writer.setDate(value);
+      }
     }
 
     public void load(LocalDate date) {
@@ -408,13 +398,13 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
 
   public static class TimeSasColumnWriter extends SasColumnWriter {
 
-    TimeSasColumnWriter (int columnIndex, String columnName, RowSetLoader rowWriter) {
+    TimeSasColumnWriter(int columnIndex, String columnName, RowSetLoader rowWriter) {
       super(columnIndex, columnName, rowWriter.scalar(columnName));
     }
 
     @Override
     public void load(Object[] row) {
-      int seconds = ((Long)row[columnIndex]).intValue();
+      int seconds = ((Long) row[columnIndex]).intValue();
       LocalTime value = LocalTime.parse(formatSeconds(seconds));
       writer.setTime(value);
     }
@@ -453,13 +443,24 @@ public class SasBatchReader implements ManagedReader<FileScanFramework.FileSchem
 
     @Override
     public void load(Object[] row) {
-      // The SAS reader does something strange with zeros. For whatever reason, even if the
-      // field is a floating point number, the value is returned as a long.  This causes class
-      // cast exceptions.
-      if (row[columnIndex].equals(0L)) {
-        writer.setDouble(0.0);
-      } else {
-        writer.setDouble((Double) row[columnIndex]);
+      if (row[columnIndex] != null) {
+        if (row[columnIndex] instanceof Number) {
+          writer.setDouble(((Number) row[columnIndex]).doubleValue());
+        }
+      }
+    }
+  }
+
+  public static class TimestampSasColumnWriter extends SasColumnWriter {
+
+    TimestampSasColumnWriter(int columnIndex, String columnName, RowSetLoader rowWriter) {
+      super(columnIndex, columnName, rowWriter.scalar(columnName));
+    }
+
+    @Override
+    public void load(Object[] row) {
+      if (row[columnIndex] != null) {
+        writer.setTimestamp(((Date) row[columnIndex]).toInstant());
       }
     }
   }
diff --git a/contrib/format-sas/src/test/java/org/apache/drill/exec/store/sas/TestSasReader.java b/contrib/format-sas/src/test/java/org/apache/drill/exec/store/sas/TestSasReader.java
index c007f1a..be0965e 100644
--- a/contrib/format-sas/src/test/java/org/apache/drill/exec/store/sas/TestSasReader.java
+++ b/contrib/format-sas/src/test/java/org/apache/drill/exec/store/sas/TestSasReader.java
@@ -56,7 +56,7 @@ public class TestSasReader extends ClusterTest {
     RowSet results  = client.queryBuilder().sql(sql).rowSet();
 
     TupleMetadata expectedSchema = new SchemaBuilder()
-      .addNullable("x1", MinorType.BIGINT)
+      .addNullable("x1", MinorType.FLOAT8)
       .addNullable("x2", MinorType.FLOAT8)
       .addNullable("x3", MinorType.VARCHAR)
       .addNullable("x4", MinorType.FLOAT8)
@@ -70,13 +70,13 @@ public class TestSasReader extends ClusterTest {
       .addNullable("x12", MinorType.FLOAT8)
       .addNullable("x13", MinorType.FLOAT8)
       .addNullable("x14", MinorType.FLOAT8)
-      .addNullable("x15", MinorType.BIGINT)
-      .addNullable("x16", MinorType.BIGINT)
-      .addNullable("x17", MinorType.BIGINT)
-      .addNullable("x18", MinorType.BIGINT)
-      .addNullable("x19", MinorType.BIGINT)
-      .addNullable("x20", MinorType.BIGINT)
-      .addNullable("x21", MinorType.BIGINT)
+      .addNullable("x15", MinorType.FLOAT8)
+      .addNullable("x16", MinorType.FLOAT8)
+      .addNullable("x17", MinorType.FLOAT8)
+      .addNullable("x18", MinorType.FLOAT8)
+      .addNullable("x19", MinorType.FLOAT8)
+      .addNullable("x20", MinorType.FLOAT8)
+      .addNullable("x21", MinorType.FLOAT8)
       .buildSchema();
 
     RowSet expected = new RowSetBuilder(client.allocator(), expectedSchema)
@@ -122,7 +122,7 @@ public class TestSasReader extends ClusterTest {
     RowSet results  = client.queryBuilder().sql(sql).rowSet();
 
     TupleMetadata expectedSchema = new SchemaBuilder()
-      .addNullable("x1", MinorType.BIGINT)
+      .addNullable("x1", MinorType.FLOAT8)
       .addNullable("x2", MinorType.FLOAT8)
       .addNullable("x3", MinorType.VARCHAR)
       .buildSchema();