You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2022/05/18 17:52:32 UTC

[arrow] branch master updated: ARROW-16427: [Java] Provide explicit column type mapping

This is an automated email from the ASF dual-hosted git repository.

lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 05bd8fdd80 ARROW-16427: [Java] Provide explicit column type mapping
05bd8fdd80 is described below

commit 05bd8fdd801d4c9bcf57950b79c9a133a49d38bb
Author: Todd Farmer <to...@fivefarmers.com>
AuthorDate: Wed May 18 13:52:18 2022 -0400

    ARROW-16427: [Java] Provide explicit column type mapping
    
    Closes #13166 from toddfarmer/toddfarmer/arrow-16427
    
    Authored-by: Todd Farmer <to...@fivefarmers.com>
    Signed-off-by: David Li <li...@gmail.com>
---
 .../arrow/adapter/jdbc/JdbcToArrowConfig.java      |  57 ++++++++
 .../adapter/jdbc/JdbcToArrowConfigBuilder.java     |  19 ++-
 .../arrow/adapter/jdbc/JdbcToArrowUtils.java       |  27 +++-
 .../arrow/adapter/jdbc/ResultSetUtility.java       |  98 ++++++++++---
 .../arrow/adapter/jdbc/h2/JdbcToArrowTest.java     | 153 ++++++++++++++++++++-
 5 files changed, 331 insertions(+), 23 deletions(-)

diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
index a1bb8b667f..d1c21621a2 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
@@ -57,6 +57,8 @@ public final class JdbcToArrowConfig {
   private final boolean reuseVectorSchemaRoot;
   private final Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
   private final Map<String, JdbcFieldInfo> arraySubTypesByColumnName;
+  private final Map<Integer, JdbcFieldInfo> explicitTypesByColumnIndex;
+  private final Map<String, JdbcFieldInfo> explicitTypesByColumnName;
   /**
    * The maximum rowCount to read each time when partially convert data.
    * Default value is 1024 and -1 means disable partial read.
@@ -140,6 +142,31 @@ public final class JdbcToArrowConfig {
       Map<String, JdbcFieldInfo> arraySubTypesByColumnName,
       int targetBatchSize,
       Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter) {
+
+    this(
+        allocator,
+        calendar,
+        includeMetadata,
+        reuseVectorSchemaRoot,
+        arraySubTypesByColumnIndex,
+        arraySubTypesByColumnName,
+        targetBatchSize,
+        jdbcToArrowTypeConverter,
+        null,
+        null);
+  }
+
+  JdbcToArrowConfig(
+      BufferAllocator allocator,
+      Calendar calendar,
+      boolean includeMetadata,
+      boolean reuseVectorSchemaRoot,
+      Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex,
+      Map<String, JdbcFieldInfo> arraySubTypesByColumnName,
+      int targetBatchSize,
+      Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter,
+      Map<Integer, JdbcFieldInfo> explicitTypesByColumnIndex,
+      Map<String, JdbcFieldInfo> explicitTypesByColumnName) {
     Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
     this.allocator = allocator;
     this.calendar = calendar;
@@ -148,6 +175,8 @@ public final class JdbcToArrowConfig {
     this.arraySubTypesByColumnIndex = arraySubTypesByColumnIndex;
     this.arraySubTypesByColumnName = arraySubTypesByColumnName;
     this.targetBatchSize = targetBatchSize;
+    this.explicitTypesByColumnIndex = explicitTypesByColumnIndex;
+    this.explicitTypesByColumnName = explicitTypesByColumnName;
 
     // set up type converter
     this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter != null ? jdbcToArrowTypeConverter :
@@ -231,4 +260,32 @@ public final class JdbcToArrowConfig {
       return arraySubTypesByColumnName.get(name);
     }
   }
+
+  /**
+   * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column index.
+   *
+   * @param index The {@link java.sql.ResultSetMetaData} column index to evaluate for explicit type mapping.
+   * @return The {@link JdbcFieldInfo} defined for the column, or <code>null</code> if not defined.
+   */
+  public JdbcFieldInfo getExplicitTypeByColumnIndex(int index) {
+    if (explicitTypesByColumnIndex == null) {
+      return null;
+    } else {
+      return explicitTypesByColumnIndex.get(index);
+    }
+  }
+
+  /**
+   * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column name.
+   *
+   * @param name The {@link java.sql.ResultSetMetaData} column name to evaluate for explicit type mapping.
+   * @return The {@link JdbcFieldInfo} defined for the column, or <code>null</code> if not defined.
+   */
+  public JdbcFieldInfo getExplicitTypeByColumnName(String name) {
+    if (explicitTypesByColumnName == null) {
+      return null;
+    } else {
+      return explicitTypesByColumnName.get(name);
+    }
+  }
 }
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
index 3941d978f9..2f1f91ca1c 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
@@ -37,7 +37,8 @@ public class JdbcToArrowConfigBuilder {
   private boolean reuseVectorSchemaRoot;
   private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
   private Map<String, JdbcFieldInfo> arraySubTypesByColumnName;
-
+  private Map<Integer, JdbcFieldInfo> explicitTypesByColumnIndex;
+  private Map<String, JdbcFieldInfo> explicitTypesByColumnName;
   private int targetBatchSize;
   private Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter;
 
@@ -53,6 +54,8 @@ public class JdbcToArrowConfigBuilder {
     this.reuseVectorSchemaRoot = false;
     this.arraySubTypesByColumnIndex = null;
     this.arraySubTypesByColumnName = null;
+    this.explicitTypesByColumnIndex = null;
+    this.explicitTypesByColumnName = null;
   }
 
   /**
@@ -164,6 +167,16 @@ public class JdbcToArrowConfigBuilder {
     return this;
   }
 
+  public JdbcToArrowConfigBuilder setExplicitTypesByColumnIndex(Map<Integer, JdbcFieldInfo> map) {
+    this.explicitTypesByColumnIndex = map;
+    return this;
+  }
+
+  public JdbcToArrowConfigBuilder setExplicitTypesByColumnName(Map<String, JdbcFieldInfo> map) {
+    this.explicitTypesByColumnName = map;
+    return this;
+  }
+
   public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) {
     this.targetBatchSize = targetBatchSize;
     return this;
@@ -196,6 +209,8 @@ public class JdbcToArrowConfigBuilder {
         arraySubTypesByColumnIndex,
         arraySubTypesByColumnName,
         targetBatchSize,
-        jdbcToArrowTypeConverter);
+        jdbcToArrowTypeConverter,
+        explicitTypesByColumnIndex,
+        explicitTypesByColumnName);
   }
 }
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index 462e6386c5..a918afaf05 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -255,7 +255,8 @@ public class JdbcToArrowUtils {
         metadata = null;
       }
 
-      final ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(new JdbcFieldInfo(rsmd, i));
+      final JdbcFieldInfo columnFieldInfo = getJdbcFieldInfoForColumn(rsmd, i, config);
+      final ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo);
       if (arrowType != null) {
         final FieldType fieldType = new FieldType(
                 isColumnNullable(rsmd, i), arrowType, /* dictionary encoding */ null, metadata);
@@ -278,6 +279,30 @@ public class JdbcToArrowUtils {
     return new Schema(fields, null);
   }
 
+  private static JdbcFieldInfo getJdbcFieldInfoForColumn(
+      ResultSetMetaData rsmd,
+      int arrayColumn,
+      JdbcToArrowConfig config)
+          throws SQLException {
+    Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null");
+    Preconditions.checkNotNull(config, "Configuration must not be null");
+    Preconditions.checkArgument(
+            arrayColumn > 0,
+            "ResultSetMetaData columns start with 1; column cannot be less than 1");
+    Preconditions.checkArgument(
+            arrayColumn <= rsmd.getColumnCount(),
+            "Column number cannot be more than the number of columns");
+
+    JdbcFieldInfo fieldInfo = config.getExplicitTypeByColumnIndex(arrayColumn);
+    if (fieldInfo == null) {
+      fieldInfo = config.getExplicitTypeByColumnName(rsmd.getColumnLabel(arrayColumn));
+    }
+    if (fieldInfo != null) {
+      return fieldInfo;
+    }
+    return new JdbcFieldInfo(rsmd, arrayColumn);
+  }
+
   /* Uses the configuration to determine what the array sub-type JdbcFieldInfo is.
    * If no sub-type can be found, returns null.
    */
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
index 06e84db06a..2ffba2e484 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
@@ -332,17 +332,13 @@ public class ResultSetUtility {
       private int precision;
       private int scale;
       private int nullable;
+      private String label;
 
-      private MockColumnMetaData(int i, MockDataElement element) throws SQLException {
-        this.index = i;
-        this.sqlType = element.sqlType;
-        this.precision = element.getPrecision();
-        this.scale = element.getScale();
-        this.nullable = element.isNullable();
-      }
+
+      private MockColumnMetaData() {}
 
       private String getLabel() {
-        return "col_" + index;
+        return label;
       }
 
       private String getName() {
@@ -365,8 +361,57 @@ public class ResultSetUtility {
         return nullable;
       }
 
-      static MockColumnMetaData fromDataElement(MockDataElement element, int i) throws SQLException {
-        return new MockColumnMetaData(i, element);
+      public static MockColumnMetaData fromDataElement(MockDataElement element, int i) throws SQLException {
+        return MockColumnMetaData.builder()
+                .index(i)
+                .sqlType(element.getSqlType())
+                .precision(element.getPrecision())
+                .scale(element.getScale())
+                .nullable(element.isNullable())
+                .label("col_" + i)
+                .build();
+      }
+
+      public static Builder builder() {
+        return new Builder();
+      }
+
+      public static class Builder {
+        private MockColumnMetaData columnMetaData = new MockColumnMetaData();
+
+        public Builder index(int index) {
+          this.columnMetaData.index = index;
+          return this;
+        }
+
+        public Builder label(String label) {
+          this.columnMetaData.label = label;
+          return this;
+        }
+
+        public Builder sqlType(int sqlType) {
+          this.columnMetaData.sqlType = sqlType;
+          return this;
+        }
+
+        public Builder precision(int precision) {
+          this.columnMetaData.precision = precision;
+          return this;
+        }
+
+        public Builder scale(int scale) {
+          this.columnMetaData.scale = scale;
+          return this;
+        }
+
+        public Builder nullable(int nullable) {
+          this.columnMetaData.nullable = nullable;
+          return this;
+        }
+
+        public MockColumnMetaData build() {
+          return this.columnMetaData;
+        }
       }
 
     }
@@ -410,24 +455,39 @@ public class ResultSetUtility {
     }
 
     private int getPrecision() throws SQLException {
-      if (this.sqlType == Types.VARCHAR) {
-        return getValueAsString().length();
+      switch (this.sqlType) {
+        case Types.VARCHAR:
+          return getValueAsString().length();
+        case Types.DECIMAL:
+          return getBigDecimal().precision();
+        default:
+          throw getExceptionToThrow("Unable to determine precision for data type: " + sqlType);
       }
-      throw getExceptionToThrow("Unable to determine precision for data type!");
     }
 
     private int getScale() throws SQLException {
-      if (this.sqlType == Types.VARCHAR) {
-        return 0;
+      switch (this.sqlType) {
+        case Types.VARCHAR:
+          return 0;
+        case Types.DECIMAL:
+          return getBigDecimal().scale();
+        default:
+          throw getExceptionToThrow("Unable to determine scale for data type!");
       }
-      throw getExceptionToThrow("Unable to determine scale for data type!");
     }
 
     private int isNullable() throws SQLException {
-      if (this.sqlType == Types.VARCHAR) {
-        return ResultSetMetaData.columnNullable;
+      switch (this.sqlType) {
+        case Types.VARCHAR:
+        case Types.DECIMAL:
+          return ResultSetMetaData.columnNullable;
+        default:
+          return ResultSetMetaData.columnNullableUnknown;
       }
-      return ResultSetMetaData.columnNullableUnknown;
+    }
+
+    private int getSqlType() throws SQLException {
+      return this.sqlType;
     }
 
     public BigDecimal getBigDecimal() throws SQLException {
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
index 78a6284904..0fcf0af084 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
@@ -40,21 +40,30 @@ import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDoubleValue
 import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getFloatValues;
 import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getIntValues;
 import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getLongValues;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
 
 import java.io.IOException;
+import java.math.BigDecimal;
 import java.sql.ResultSet;
 import java.sql.ResultSetMetaData;
 import java.sql.SQLException;
 import java.sql.Types;
+import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Calendar;
 import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
 import java.util.stream.Collectors;
 import java.util.stream.Stream;
 
 import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
 import org.apache.arrow.adapter.jdbc.ArrowVectorIterator;
+import org.apache.arrow.adapter.jdbc.JdbcFieldInfo;
 import org.apache.arrow.adapter.jdbc.JdbcToArrow;
 import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
 import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
@@ -311,4 +320,146 @@ public class JdbcToArrowTest extends AbstractJdbcToArrowTest {
     assertEquals("1", element.getString());
   }
 
+  @Test
+  public void testUnreliableMetaDataPrecisionAndScale() throws Exception {
+    BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+    int x = 0;
+    final int targetRows = 0;
+    ResultSet rs = buildIncorrectPrecisionAndScaleMetaDataResultSet();
+    ResultSetMetaData rsmd = rs.getMetaData();
+    assertEquals("Column type should be Types.DECIMAL", Types.DECIMAL, rsmd.getColumnType(1));
+    assertEquals("Column scale should be zero", 0, rsmd.getScale(1));
+    assertEquals("Column precision should be zero", 0, rsmd.getPrecision(1));
+    rs.next();
+    BigDecimal bd1 = rs.getBigDecimal(1);
+    assertEquals("Value should be 1000000000000000.01", new BigDecimal("1000000000000000.01"), bd1);
+    assertEquals("Value scale should be 2", 2, bd1.scale());
+    assertEquals("Value precision should be 18", 18, bd1.precision());
+    assertFalse("No more rows!", rs.next());
+
+    // reset the ResultSet:
+    rs.beforeFirst();
+    JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(
+          allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false)
+          .setReuseVectorSchemaRoot(reuseVectorSchemaRoot)
+          .build();
+    try {
+      ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config);
+      while (iter.hasNext()) {
+        iter.next();
+      }
+      fail("Expected to fail due to mismatched metadata!");
+      iter.close();
+    } catch (Exception ex) {
+      // expected to fail
+    }
+
+    // reset the ResultSet:
+    rs.beforeFirst();
+    JdbcFieldInfo explicitMappingField = new JdbcFieldInfo(Types.DECIMAL, 18, 2);
+    Map<Integer, JdbcFieldInfo> explicitMapping = new HashMap<>();
+    explicitMapping.put(1, explicitMappingField);
+    config = new JdbcToArrowConfigBuilder(
+            allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false)
+            .setReuseVectorSchemaRoot(reuseVectorSchemaRoot)
+            .setExplicitTypesByColumnIndex(explicitMapping)
+            .build();
+
+    try {
+      ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config);
+      while (iter.hasNext()) {
+        iter.next();
+      }
+      iter.close();
+    } catch (Exception ex) {
+      fail("Should not fail with explicit metadata supplied!");
+    }
+
+  }
+
+  @Test
+  public void testInconsistentPrecisionAndScale() throws Exception {
+    BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+    int x = 0;
+    final int targetRows = 0;
+    ResultSet rs = buildVaryingPrecisionAndScaleResultSet();
+    ResultSetMetaData rsmd = rs.getMetaData();
+    assertEquals("Column type should be Types.DECIMAL", Types.DECIMAL, rsmd.getColumnType(1));
+    assertEquals("Column scale should be zero", 0, rsmd.getScale(1));
+    assertEquals("Column precision should be zero", 0, rsmd.getPrecision(1));
+    rs.next();
+    BigDecimal bd1 = rs.getBigDecimal(1);
+    assertEquals("Value should be 1000000000000000.01", new BigDecimal("1000000000000000.01"), bd1);
+    assertEquals("Value scale should be 2", 2, bd1.scale());
+    assertEquals("Value precision should be 18", 18, bd1.precision());
+    rs.next();
+    BigDecimal bd2 = rs.getBigDecimal(1);
+    assertEquals("Value should be 1000000000300.0000001", new BigDecimal("1000000000300.0000001"), bd2);
+    assertEquals("Value scale should be 7", 7, bd2.scale());
+    assertEquals("Value precision should be 20", 20, bd2.precision());
+    rs.beforeFirst();
+    JdbcFieldInfo explicitMappingField = new JdbcFieldInfo(Types.DECIMAL, 20, 7);
+    Map<Integer, JdbcFieldInfo> explicitMapping = new HashMap<>();
+    explicitMapping.put(1, explicitMappingField);
+
+    JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(
+            allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false)
+            .setReuseVectorSchemaRoot(reuseVectorSchemaRoot)
+            .setExplicitTypesByColumnIndex(explicitMapping)
+            .build();
+    try {
+      ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config);
+      while (iter.hasNext()) {
+        iter.next();
+        fail("This is expected to fail due to ARROW-16600");
+      }
+      iter.close();
+    } catch (Exception ex) {
+      // Once ARROW-16600 is implemented, this should no longer fail.
+    }
+  }
+
+  private ResultSet buildIncorrectPrecisionAndScaleMetaDataResultSet() throws SQLException {
+    ResultSetUtility.MockResultSetMetaData.MockColumnMetaData columnMetaData =
+            ResultSetUtility.MockResultSetMetaData.MockColumnMetaData.builder()
+                    .index(1)
+                    .sqlType(Types.DECIMAL)
+                    .precision(0)
+                    .scale(0)
+                    .build();
+    ArrayList<ResultSetUtility.MockResultSetMetaData.MockColumnMetaData> cols = new ArrayList<>();
+    cols.add(columnMetaData);
+    ResultSetMetaData metadata = new ResultSetUtility.MockResultSetMetaData(cols);
+    return ResultSetUtility.MockResultSet.builder()
+            .setMetaData(metadata)
+            .addDataElement(
+                    new ResultSetUtility.MockDataElement(new BigDecimal("1000000000000000.01"), Types.DECIMAL)
+            )
+            .finishRow()
+            .build();
+  }
+
+  private ResultSet buildVaryingPrecisionAndScaleResultSet() throws SQLException {
+    ResultSetUtility.MockResultSetMetaData.MockColumnMetaData columnMetaData =
+            ResultSetUtility.MockResultSetMetaData.MockColumnMetaData.builder()
+            .index(1)
+            .sqlType(Types.DECIMAL)
+            .precision(0)
+            .scale(0)
+            .build();
+    ArrayList<ResultSetUtility.MockResultSetMetaData.MockColumnMetaData> cols = new ArrayList<>();
+    cols.add(columnMetaData);
+    ResultSetMetaData metadata = new ResultSetUtility.MockResultSetMetaData(cols);
+    return ResultSetUtility.MockResultSet.builder()
+            .setMetaData(metadata)
+            .addDataElement(
+                    new ResultSetUtility.MockDataElement(new BigDecimal("1000000000000000.01"), Types.DECIMAL)
+            )
+            .finishRow()
+            .addDataElement(
+                    new ResultSetUtility.MockDataElement(new BigDecimal("1000000000300.0000001"), Types.DECIMAL)
+            )
+            .finishRow()
+            .build();
+  }
 }