You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by li...@apache.org on 2022/05/18 17:52:32 UTC
[arrow] branch master updated: ARROW-16427: [Java] Provide explicit column type mapping
This is an automated email from the ASF dual-hosted git repository.
lidavidm pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git
The following commit(s) were added to refs/heads/master by this push:
new 05bd8fdd80 ARROW-16427: [Java] Provide explicit column type mapping
05bd8fdd80 is described below
commit 05bd8fdd801d4c9bcf57950b79c9a133a49d38bb
Author: Todd Farmer <to...@fivefarmers.com>
AuthorDate: Wed May 18 13:52:18 2022 -0400
ARROW-16427: [Java] Provide explicit column type mapping
Closes #13166 from toddfarmer/toddfarmer/arrow-16427
Authored-by: Todd Farmer <to...@fivefarmers.com>
Signed-off-by: David Li <li...@gmail.com>
---
.../arrow/adapter/jdbc/JdbcToArrowConfig.java | 57 ++++++++
.../adapter/jdbc/JdbcToArrowConfigBuilder.java | 19 ++-
.../arrow/adapter/jdbc/JdbcToArrowUtils.java | 27 +++-
.../arrow/adapter/jdbc/ResultSetUtility.java | 98 ++++++++++---
.../arrow/adapter/jdbc/h2/JdbcToArrowTest.java | 153 ++++++++++++++++++++-
5 files changed, 331 insertions(+), 23 deletions(-)
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
index a1bb8b667f..d1c21621a2 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfig.java
@@ -57,6 +57,8 @@ public final class JdbcToArrowConfig {
private final boolean reuseVectorSchemaRoot;
private final Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
private final Map<String, JdbcFieldInfo> arraySubTypesByColumnName;
+ private final Map<Integer, JdbcFieldInfo> explicitTypesByColumnIndex;
+ private final Map<String, JdbcFieldInfo> explicitTypesByColumnName;
/**
* The maximum rowCount to read each time when partially convert data.
* Default value is 1024 and -1 means disable partial read.
@@ -140,6 +142,31 @@ public final class JdbcToArrowConfig {
Map<String, JdbcFieldInfo> arraySubTypesByColumnName,
int targetBatchSize,
Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter) {
+
+ this(
+ allocator,
+ calendar,
+ includeMetadata,
+ reuseVectorSchemaRoot,
+ arraySubTypesByColumnIndex,
+ arraySubTypesByColumnName,
+ targetBatchSize,
+ jdbcToArrowTypeConverter,
+ null,
+ null);
+ }
+
+ JdbcToArrowConfig(
+ BufferAllocator allocator,
+ Calendar calendar,
+ boolean includeMetadata,
+ boolean reuseVectorSchemaRoot,
+ Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex,
+ Map<String, JdbcFieldInfo> arraySubTypesByColumnName,
+ int targetBatchSize,
+ Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter,
+ Map<Integer, JdbcFieldInfo> explicitTypesByColumnIndex,
+ Map<String, JdbcFieldInfo> explicitTypesByColumnName) {
Preconditions.checkNotNull(allocator, "Memory allocator cannot be null");
this.allocator = allocator;
this.calendar = calendar;
@@ -148,6 +175,8 @@ public final class JdbcToArrowConfig {
this.arraySubTypesByColumnIndex = arraySubTypesByColumnIndex;
this.arraySubTypesByColumnName = arraySubTypesByColumnName;
this.targetBatchSize = targetBatchSize;
+ this.explicitTypesByColumnIndex = explicitTypesByColumnIndex;
+ this.explicitTypesByColumnName = explicitTypesByColumnName;
// set up type converter
this.jdbcToArrowTypeConverter = jdbcToArrowTypeConverter != null ? jdbcToArrowTypeConverter :
@@ -231,4 +260,32 @@ public final class JdbcToArrowConfig {
return arraySubTypesByColumnName.get(name);
}
}
+
+ /**
+ * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column index.
+ *
+ * @param index The {@link java.sql.ResultSetMetaData} column index to evaluate for explicit type mapping.
+ * @return The {@link JdbcFieldInfo} defined for the column, or <code>null</code> if not defined.
+ */
+ public JdbcFieldInfo getExplicitTypeByColumnIndex(int index) {
+ if (explicitTypesByColumnIndex == null) {
+ return null;
+ } else {
+ return explicitTypesByColumnIndex.get(index);
+ }
+ }
+
+ /**
+ * Returns the type {@link JdbcFieldInfo} explicitly defined for the provided column name.
+ *
+ * @param name The {@link java.sql.ResultSetMetaData} column name to evaluate for explicit type mapping.
+ * @return The {@link JdbcFieldInfo} defined for the column, or <code>null</code> if not defined.
+ */
+ public JdbcFieldInfo getExplicitTypeByColumnName(String name) {
+ if (explicitTypesByColumnName == null) {
+ return null;
+ } else {
+ return explicitTypesByColumnName.get(name);
+ }
+ }
}
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
index 3941d978f9..2f1f91ca1c 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowConfigBuilder.java
@@ -37,7 +37,8 @@ public class JdbcToArrowConfigBuilder {
private boolean reuseVectorSchemaRoot;
private Map<Integer, JdbcFieldInfo> arraySubTypesByColumnIndex;
private Map<String, JdbcFieldInfo> arraySubTypesByColumnName;
-
+ private Map<Integer, JdbcFieldInfo> explicitTypesByColumnIndex;
+ private Map<String, JdbcFieldInfo> explicitTypesByColumnName;
private int targetBatchSize;
private Function<JdbcFieldInfo, ArrowType> jdbcToArrowTypeConverter;
@@ -53,6 +54,8 @@ public class JdbcToArrowConfigBuilder {
this.reuseVectorSchemaRoot = false;
this.arraySubTypesByColumnIndex = null;
this.arraySubTypesByColumnName = null;
+ this.explicitTypesByColumnIndex = null;
+ this.explicitTypesByColumnName = null;
}
/**
@@ -164,6 +167,16 @@ public class JdbcToArrowConfigBuilder {
return this;
}
+ public JdbcToArrowConfigBuilder setExplicitTypesByColumnIndex(Map<Integer, JdbcFieldInfo> map) {
+ this.explicitTypesByColumnIndex = map;
+ return this;
+ }
+
+ public JdbcToArrowConfigBuilder setExplicitTypesByColumnName(Map<String, JdbcFieldInfo> map) {
+ this.explicitTypesByColumnName = map;
+ return this;
+ }
+
public JdbcToArrowConfigBuilder setTargetBatchSize(int targetBatchSize) {
this.targetBatchSize = targetBatchSize;
return this;
@@ -196,6 +209,8 @@ public class JdbcToArrowConfigBuilder {
arraySubTypesByColumnIndex,
arraySubTypesByColumnName,
targetBatchSize,
- jdbcToArrowTypeConverter);
+ jdbcToArrowTypeConverter,
+ explicitTypesByColumnIndex,
+ explicitTypesByColumnName);
}
}
diff --git a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
index 462e6386c5..a918afaf05 100644
--- a/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
+++ b/java/adapter/jdbc/src/main/java/org/apache/arrow/adapter/jdbc/JdbcToArrowUtils.java
@@ -255,7 +255,8 @@ public class JdbcToArrowUtils {
metadata = null;
}
- final ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(new JdbcFieldInfo(rsmd, i));
+ final JdbcFieldInfo columnFieldInfo = getJdbcFieldInfoForColumn(rsmd, i, config);
+ final ArrowType arrowType = config.getJdbcToArrowTypeConverter().apply(columnFieldInfo);
if (arrowType != null) {
final FieldType fieldType = new FieldType(
isColumnNullable(rsmd, i), arrowType, /* dictionary encoding */ null, metadata);
@@ -278,6 +279,30 @@ public class JdbcToArrowUtils {
return new Schema(fields, null);
}
+ private static JdbcFieldInfo getJdbcFieldInfoForColumn(
+ ResultSetMetaData rsmd,
+ int arrayColumn,
+ JdbcToArrowConfig config)
+ throws SQLException {
+ Preconditions.checkNotNull(rsmd, "ResultSet MetaData object cannot be null");
+ Preconditions.checkNotNull(config, "Configuration must not be null");
+ Preconditions.checkArgument(
+ arrayColumn > 0,
+ "ResultSetMetaData columns start with 1; column cannot be less than 1");
+ Preconditions.checkArgument(
+ arrayColumn <= rsmd.getColumnCount(),
+ "Column number cannot be more than the number of columns");
+
+ JdbcFieldInfo fieldInfo = config.getExplicitTypeByColumnIndex(arrayColumn);
+ if (fieldInfo == null) {
+ fieldInfo = config.getExplicitTypeByColumnName(rsmd.getColumnLabel(arrayColumn));
+ }
+ if (fieldInfo != null) {
+ return fieldInfo;
+ }
+ return new JdbcFieldInfo(rsmd, arrayColumn);
+ }
+
/* Uses the configuration to determine what the array sub-type JdbcFieldInfo is.
* If no sub-type can be found, returns null.
*/
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
index 06e84db06a..2ffba2e484 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/ResultSetUtility.java
@@ -332,17 +332,13 @@ public class ResultSetUtility {
private int precision;
private int scale;
private int nullable;
+ private String label;
- private MockColumnMetaData(int i, MockDataElement element) throws SQLException {
- this.index = i;
- this.sqlType = element.sqlType;
- this.precision = element.getPrecision();
- this.scale = element.getScale();
- this.nullable = element.isNullable();
- }
+
+ private MockColumnMetaData() {}
private String getLabel() {
- return "col_" + index;
+ return label;
}
private String getName() {
@@ -365,8 +361,57 @@ public class ResultSetUtility {
return nullable;
}
- static MockColumnMetaData fromDataElement(MockDataElement element, int i) throws SQLException {
- return new MockColumnMetaData(i, element);
+ public static MockColumnMetaData fromDataElement(MockDataElement element, int i) throws SQLException {
+ return MockColumnMetaData.builder()
+ .index(i)
+ .sqlType(element.getSqlType())
+ .precision(element.getPrecision())
+ .scale(element.getScale())
+ .nullable(element.isNullable())
+ .label("col_" + i)
+ .build();
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public static class Builder {
+ private MockColumnMetaData columnMetaData = new MockColumnMetaData();
+
+ public Builder index(int index) {
+ this.columnMetaData.index = index;
+ return this;
+ }
+
+ public Builder label(String label) {
+ this.columnMetaData.label = label;
+ return this;
+ }
+
+ public Builder sqlType(int sqlType) {
+ this.columnMetaData.sqlType = sqlType;
+ return this;
+ }
+
+ public Builder precision(int precision) {
+ this.columnMetaData.precision = precision;
+ return this;
+ }
+
+ public Builder scale(int scale) {
+ this.columnMetaData.scale = scale;
+ return this;
+ }
+
+ public Builder nullable(int nullable) {
+ this.columnMetaData.nullable = nullable;
+ return this;
+ }
+
+ public MockColumnMetaData build() {
+ return this.columnMetaData;
+ }
}
}
@@ -410,24 +455,39 @@ public class ResultSetUtility {
}
private int getPrecision() throws SQLException {
- if (this.sqlType == Types.VARCHAR) {
- return getValueAsString().length();
+ switch (this.sqlType) {
+ case Types.VARCHAR:
+ return getValueAsString().length();
+ case Types.DECIMAL:
+ return getBigDecimal().precision();
+ default:
+ throw getExceptionToThrow("Unable to determine precision for data type: " + sqlType);
}
- throw getExceptionToThrow("Unable to determine precision for data type!");
}
private int getScale() throws SQLException {
- if (this.sqlType == Types.VARCHAR) {
- return 0;
+ switch (this.sqlType) {
+ case Types.VARCHAR:
+ return 0;
+ case Types.DECIMAL:
+ return getBigDecimal().scale();
+ default:
+ throw getExceptionToThrow("Unable to determine scale for data type!");
}
- throw getExceptionToThrow("Unable to determine scale for data type!");
}
private int isNullable() throws SQLException {
- if (this.sqlType == Types.VARCHAR) {
- return ResultSetMetaData.columnNullable;
+ switch (this.sqlType) {
+ case Types.VARCHAR:
+ case Types.DECIMAL:
+ return ResultSetMetaData.columnNullable;
+ default:
+ return ResultSetMetaData.columnNullableUnknown;
}
- return ResultSetMetaData.columnNullableUnknown;
+ }
+
+ private int getSqlType() throws SQLException {
+ return this.sqlType;
}
public BigDecimal getBigDecimal() throws SQLException {
diff --git a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
index 78a6284904..0fcf0af084 100644
--- a/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
+++ b/java/adapter/jdbc/src/test/java/org/apache/arrow/adapter/jdbc/h2/JdbcToArrowTest.java
@@ -40,21 +40,30 @@ import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getDoubleValue
import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getFloatValues;
import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getIntValues;
import static org.apache.arrow.adapter.jdbc.JdbcToArrowTestHelper.getLongValues;
-import static org.junit.Assert.*;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertNotNull;
+import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
import java.io.IOException;
+import java.math.BigDecimal;
import java.sql.ResultSet;
import java.sql.ResultSetMetaData;
import java.sql.SQLException;
import java.sql.Types;
+import java.util.ArrayList;
import java.util.Arrays;
import java.util.Calendar;
import java.util.Collection;
+import java.util.HashMap;
+import java.util.Map;
import java.util.stream.Collectors;
import java.util.stream.Stream;
import org.apache.arrow.adapter.jdbc.AbstractJdbcToArrowTest;
import org.apache.arrow.adapter.jdbc.ArrowVectorIterator;
+import org.apache.arrow.adapter.jdbc.JdbcFieldInfo;
import org.apache.arrow.adapter.jdbc.JdbcToArrow;
import org.apache.arrow.adapter.jdbc.JdbcToArrowConfig;
import org.apache.arrow.adapter.jdbc.JdbcToArrowConfigBuilder;
@@ -311,4 +320,146 @@ public class JdbcToArrowTest extends AbstractJdbcToArrowTest {
assertEquals("1", element.getString());
}
+ @Test
+ public void testUnreliableMetaDataPrecisionAndScale() throws Exception {
+ BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ int x = 0;
+ final int targetRows = 0;
+ ResultSet rs = buildIncorrectPrecisionAndScaleMetaDataResultSet();
+ ResultSetMetaData rsmd = rs.getMetaData();
+ assertEquals("Column type should be Types.DECIMAL", Types.DECIMAL, rsmd.getColumnType(1));
+ assertEquals("Column scale should be zero", 0, rsmd.getScale(1));
+ assertEquals("Column precision should be zero", 0, rsmd.getPrecision(1));
+ rs.next();
+ BigDecimal bd1 = rs.getBigDecimal(1);
+ assertEquals("Value should be 1000000000000000.01", new BigDecimal("1000000000000000.01"), bd1);
+ assertEquals("Value scale should be 2", 2, bd1.scale());
+ assertEquals("Value precision should be 18", 18, bd1.precision());
+ assertFalse("No more rows!", rs.next());
+
+ // reset the ResultSet:
+ rs.beforeFirst();
+ JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(
+ allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false)
+ .setReuseVectorSchemaRoot(reuseVectorSchemaRoot)
+ .build();
+ try {
+ ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config);
+ while (iter.hasNext()) {
+ iter.next();
+ }
+ fail("Expected to fail due to mismatched metadata!");
+ iter.close();
+ } catch (Exception ex) {
+ // expected to fail
+ }
+
+ // reset the ResultSet:
+ rs.beforeFirst();
+ JdbcFieldInfo explicitMappingField = new JdbcFieldInfo(Types.DECIMAL, 18, 2);
+ Map<Integer, JdbcFieldInfo> explicitMapping = new HashMap<>();
+ explicitMapping.put(1, explicitMappingField);
+ config = new JdbcToArrowConfigBuilder(
+ allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false)
+ .setReuseVectorSchemaRoot(reuseVectorSchemaRoot)
+ .setExplicitTypesByColumnIndex(explicitMapping)
+ .build();
+
+ try {
+ ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config);
+ while (iter.hasNext()) {
+ iter.next();
+ }
+ iter.close();
+ } catch (Exception ex) {
+ fail("Should not fail with explicit metadata supplied!");
+ }
+
+ }
+
+ @Test
+ public void testInconsistentPrecisionAndScale() throws Exception {
+ BufferAllocator allocator = new RootAllocator(Integer.MAX_VALUE);
+ int x = 0;
+ final int targetRows = 0;
+ ResultSet rs = buildVaryingPrecisionAndScaleResultSet();
+ ResultSetMetaData rsmd = rs.getMetaData();
+ assertEquals("Column type should be Types.DECIMAL", Types.DECIMAL, rsmd.getColumnType(1));
+ assertEquals("Column scale should be zero", 0, rsmd.getScale(1));
+ assertEquals("Column precision should be zero", 0, rsmd.getPrecision(1));
+ rs.next();
+ BigDecimal bd1 = rs.getBigDecimal(1);
+ assertEquals("Value should be 1000000000000000.01", new BigDecimal("1000000000000000.01"), bd1);
+ assertEquals("Value scale should be 2", 2, bd1.scale());
+ assertEquals("Value precision should be 18", 18, bd1.precision());
+ rs.next();
+ BigDecimal bd2 = rs.getBigDecimal(1);
+ assertEquals("Value should be 1000000000300.0000001", new BigDecimal("1000000000300.0000001"), bd2);
+ assertEquals("Value scale should be 7", 7, bd2.scale());
+ assertEquals("Value precision should be 20", 20, bd2.precision());
+ rs.beforeFirst();
+ JdbcFieldInfo explicitMappingField = new JdbcFieldInfo(Types.DECIMAL, 20, 7);
+ Map<Integer, JdbcFieldInfo> explicitMapping = new HashMap<>();
+ explicitMapping.put(1, explicitMappingField);
+
+ JdbcToArrowConfig config = new JdbcToArrowConfigBuilder(
+ allocator, JdbcToArrowUtils.getUtcCalendar(), /* include metadata */ false)
+ .setReuseVectorSchemaRoot(reuseVectorSchemaRoot)
+ .setExplicitTypesByColumnIndex(explicitMapping)
+ .build();
+ try {
+ ArrowVectorIterator iter = JdbcToArrow.sqlToArrowVectorIterator(rs, config);
+ while (iter.hasNext()) {
+ iter.next();
+ fail("This is expected to fail due to ARROW-16600");
+ }
+ iter.close();
+ } catch (Exception ex) {
+ // Once ARROW-16600 is implemented, this should no longer fail.
+ }
+ }
+
+ private ResultSet buildIncorrectPrecisionAndScaleMetaDataResultSet() throws SQLException {
+ ResultSetUtility.MockResultSetMetaData.MockColumnMetaData columnMetaData =
+ ResultSetUtility.MockResultSetMetaData.MockColumnMetaData.builder()
+ .index(1)
+ .sqlType(Types.DECIMAL)
+ .precision(0)
+ .scale(0)
+ .build();
+ ArrayList<ResultSetUtility.MockResultSetMetaData.MockColumnMetaData> cols = new ArrayList<>();
+ cols.add(columnMetaData);
+ ResultSetMetaData metadata = new ResultSetUtility.MockResultSetMetaData(cols);
+ return ResultSetUtility.MockResultSet.builder()
+ .setMetaData(metadata)
+ .addDataElement(
+ new ResultSetUtility.MockDataElement(new BigDecimal("1000000000000000.01"), Types.DECIMAL)
+ )
+ .finishRow()
+ .build();
+ }
+
+ private ResultSet buildVaryingPrecisionAndScaleResultSet() throws SQLException {
+ ResultSetUtility.MockResultSetMetaData.MockColumnMetaData columnMetaData =
+ ResultSetUtility.MockResultSetMetaData.MockColumnMetaData.builder()
+ .index(1)
+ .sqlType(Types.DECIMAL)
+ .precision(0)
+ .scale(0)
+ .build();
+ ArrayList<ResultSetUtility.MockResultSetMetaData.MockColumnMetaData> cols = new ArrayList<>();
+ cols.add(columnMetaData);
+ ResultSetMetaData metadata = new ResultSetUtility.MockResultSetMetaData(cols);
+ return ResultSetUtility.MockResultSet.builder()
+ .setMetaData(metadata)
+ .addDataElement(
+ new ResultSetUtility.MockDataElement(new BigDecimal("1000000000000000.01"), Types.DECIMAL)
+ )
+ .finishRow()
+ .addDataElement(
+ new ResultSetUtility.MockDataElement(new BigDecimal("1000000000300.0000001"), Types.DECIMAL)
+ )
+ .finishRow()
+ .build();
+ }
}