You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@sqoop.apache.org by fszabo2 <gi...@git.apache.org> on 2018/11/29 15:58:12 UTC

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

GitHub user fszabo2 opened a pull request:

    https://github.com/apache/sqoop/pull/60

    SQOOP-3396: Add parquet numeric support for Parquet in Hive import

    

You can merge this pull request into a Git repository by running:

    $ git pull https://github.com/fszabo2/sqoop SQOOP-3396

Alternatively you can review and apply these changes as the patch at:

    https://github.com/apache/sqoop/pull/60.patch

To close this pull request, make a commit to your master/trunk branch
with (at least) the following in the commit message:

    This closes #60
    
----
commit 8c434766ade5a8faada21787dd86e29020a51cea
Author: Fero Szabo <fe...@...>
Date:   2018-11-29T14:25:03Z

    Apache headers added, and TableDefWriter white space changes reverted.

commit 8db419a37111aa9d523dd15931761534d9e7e8bb
Author: Fero Szabo <fe...@...>
Date:   2018-11-29T14:25:03Z

    Apache headers added, and TableDefWriter white space changes reverted.

commit c1695016c25260db80ca6772cbee4f82818c1ba6
Author: Fero Szabo <fe...@...>
Date:   2018-11-29T15:52:43Z

    Merge branch 'SQOOP-3396' of https://github.com/fszabo2/sqoop into SQOOP-3396

----


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238334653
  
    --- Diff: src/test/org/apache/sqoop/importjob/numerictypes/NumericTypesImportTestBase.java ---
    @@ -65,240 +46,79 @@
      * 2. Decimal padding during avro or parquet import
      * In case of Oracle and Postgres, Sqoop has to pad the values with 0s to avoid errors.
      */
    -public abstract class NumericTypesImportTestBase<T extends AvroTestConfiguration & ParquetTestConfiguration> extends ImportJobTestCase implements DatabaseAdapterFactory {
    +public abstract class NumericTypesImportTestBase<T extends ImportJobTestConfiguration> extends ThirdPartyTestBase<T>  {
     
       public static final Log LOG = LogFactory.getLog(NumericTypesImportTestBase.class.getName());
     
    -  private Configuration conf = new Configuration();
    -
    -  private final T configuration;
    -  private final DatabaseAdapter adapter;
       private final boolean failWithoutExtraArgs;
       private final boolean failWithPadding;
     
    -  // Constants for the basic test case, that doesn't use extra arguments
    -  // that are required to avoid errors, i.e. padding and default precision and scale.
    -  protected final static boolean SUCCEED_WITHOUT_EXTRA_ARGS = false;
    -  protected final static boolean FAIL_WITHOUT_EXTRA_ARGS = true;
    -
    -  // Constants for the test case that has padding specified but not default precision and scale.
    -  protected final static boolean SUCCEED_WITH_PADDING_ONLY = false;
    -  protected final static boolean FAIL_WITH_PADDING_ONLY = true;
    -
    -  private Path tableDirPath;
    -
       public NumericTypesImportTestBase(T configuration, boolean failWithoutExtraArgs, boolean failWithPaddingOnly) {
    -    this.adapter = createAdapter();
    -    this.configuration = configuration;
    +    super(configuration);
         this.failWithoutExtraArgs = failWithoutExtraArgs;
         this.failWithPadding = failWithPaddingOnly;
       }
     
    -  @Rule
    -  public ExpectedException thrown = ExpectedException.none();
    -
    -  @Override
    -  protected Configuration getConf() {
    -    return conf;
    -  }
    -
    -  @Override
    -  protected boolean useHsqldbTestServer() {
    -    return false;
    -  }
    -
    -  @Override
    -  protected String getConnectString() {
    -    return adapter.getConnectionString();
    -  }
    -
    -  @Override
    -  protected SqoopOptions getSqoopOptions(Configuration conf) {
    -    SqoopOptions opts = new SqoopOptions(conf);
    -    adapter.injectConnectionParameters(opts);
    -    return opts;
    -  }
    -
    -  @Override
    -  protected void dropTableIfExists(String table) throws SQLException {
    -    adapter.dropTableIfExists(table, getManager());
    -  }
    -
       @Before
       public void setUp() {
         super.setUp();
    -    String[] names = configuration.getNames();
    -    String[] types = configuration.getTypes();
    -    createTableWithColTypesAndNames(names, types, new String[0]);
    -    List<String[]> inputData = configuration.getSampleData();
    -    for (String[] input  : inputData) {
    -      insertIntoTable(names, types, input);
    -    }
         tableDirPath = new Path(getWarehouseDir() + "/" + getTableName());
       }
     
    -  @After
    -  public void tearDown() {
    -    try {
    -      dropTableIfExists(getTableName());
    -    } catch (SQLException e) {
    -      LOG.warn("Error trying to drop table on tearDown: " + e);
    -    }
    -    super.tearDown();
    -  }
    +  public Path tableDirPath;
     
    -  private ArgumentArrayBuilder getArgsBuilder(SqoopOptions.FileLayout fileLayout) {
    -    ArgumentArrayBuilder builder = new ArgumentArrayBuilder();
    -    if (AvroDataFile.equals(fileLayout)) {
    -      builder.withOption("as-avrodatafile");
    -    }
    -    else if (ParquetFile.equals(fileLayout)) {
    -      builder.withOption("as-parquetfile");
    -    }
    +  @Rule
    +  public ExpectedException thrown = ExpectedException.none();
    +
    +  abstract public ArgumentArrayBuilder getArgsBuilder();
    +  abstract public void verify();
     
    +  public ArgumentArrayBuilder includeCommonOptions(ArgumentArrayBuilder builder) {
         return builder.withCommonHadoopFlags(true)
             .withOption("warehouse-dir", getWarehouseDir())
             .withOption("num-mappers", "1")
             .withOption("table", getTableName())
             .withOption("connect", getConnectString());
       }
     
    -  /**
    -   * Adds properties to the given arg builder for decimal precision and scale.
    -   * @param builder
    -   */
    -  private void addPrecisionAndScale(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.avro.logical_types.decimal.default.precision", "38");
    -    builder.withProperty("sqoop.avro.logical_types.decimal.default.scale", "3");
    -  }
    -
    -  /**
    -   * Enables padding for decimals in avro and parquet import.
    -   * @param builder
    -   */
    -  private void addPadding(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.avro.decimal_padding.enable", "true");
    -  }
    -
    -  private void addEnableAvroDecimal(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.avro.logical_types.decimal.enable", "true");
    -  }
    -
    -  private void addEnableParquetDecimal(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.parquet.logical_types.decimal.enable", "true");
    -  }
     
    -  private void configureJunitToExpectFailure(boolean failWithPadding) {
    -    if (failWithPadding) {
    +  @Test
    +  public void testImportWithoutPadding() throws IOException {
    +    if(failWithoutExtraArgs){
           thrown.expect(IOException.class);
           thrown.expectMessage("Failure during job; return status 1");
    +      NumericTypesTestUtils.configureJunitToExpectFailure(thrown);
         }
    -  }
    -
    -  @Test
    -  public void testAvroImportWithoutPadding() throws IOException {
    -    configureJunitToExpectFailure(failWithoutExtraArgs);
    -    ArgumentArrayBuilder builder = getArgsBuilder(AvroDataFile);
    -    addEnableAvroDecimal(builder);
    +    ArgumentArrayBuilder builder = getArgsBuilder();
         String[] args = builder.build();
         runImport(args);
         if (!failWithoutExtraArgs) {
    -      verify(AvroDataFile);
    -    }
    -  }
    -
    -  @Test
    -  public void testAvroImportWithPadding() throws IOException {
    -    configureJunitToExpectFailure(failWithPadding);
    -    ArgumentArrayBuilder builder = getArgsBuilder(AvroDataFile);
    -    addEnableAvroDecimal(builder);
    -    addPadding(builder);
    -    runImport(builder.build());
    -    if (!failWithPadding) {
    -      verify(AvroDataFile);
    +      verify();
         }
       }
     
       @Test
    -  public void testAvroImportWithDefaultPrecisionAndScale() throws  IOException {
    -    ArgumentArrayBuilder builder = getArgsBuilder(AvroDataFile);
    -    addEnableAvroDecimal(builder);
    -    addPadding(builder);
    -    addPrecisionAndScale(builder);
    -    runImport(builder.build());
    -    verify(AvroDataFile);
    -  }
    -
    -  @Test
    -  public void testParquetImportWithoutPadding() throws IOException {
    -    configureJunitToExpectFailure(failWithoutExtraArgs);
    -    ArgumentArrayBuilder builder = getArgsBuilder(ParquetFile);
    -    addEnableParquetDecimal(builder);
    -    String[] args = builder.build();
    -    runImport(args);
    -    if (!failWithoutExtraArgs) {
    -      verify(ParquetFile);
    +  public void testImportWithPadding() throws IOException {
    +    if(failWithPadding){
    +      thrown.expect(IOException.class);
    +      thrown.expectMessage("Failure during job; return status 1");
    +      NumericTypesTestUtils.configureJunitToExpectFailure(thrown);
    --- End diff --
    
    This method seems to be doing the same thing as the previous line.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238707355
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -83,27 +89,58 @@ public static String toHiveType(int sqlType) {
           }
       }
     
    -  public static String toHiveType(Schema.Type avroType) {
    -      switch (avroType) {
    -        case BOOLEAN:
    -          return HIVE_TYPE_BOOLEAN;
    -        case INT:
    -          return HIVE_TYPE_INT;
    -        case LONG:
    -          return HIVE_TYPE_BIGINT;
    -        case FLOAT:
    -          return HIVE_TYPE_FLOAT;
    -        case DOUBLE:
    -          return HIVE_TYPE_DOUBLE;
    -        case STRING:
    -        case ENUM:
    -          return HIVE_TYPE_STRING;
    -        case BYTES:
    -        case FIXED:
    -          return HIVE_TYPE_BINARY;
    -        default:
    -          return null;
    +  public static String toHiveType(Schema schema, SqoopOptions options) {
    +    if (schema.getType() == Schema.Type.UNION) {
    +      for (Schema subSchema : schema.getTypes()) {
    +        if (subSchema.getType() != Schema.Type.NULL) {
    +          return toHiveType(subSchema, options);
    +        }
    +      }
    +    }
    +
    +    Schema.Type avroType = schema.getType();
    +    switch (avroType) {
    +      case BOOLEAN:
    +        return HIVE_TYPE_BOOLEAN;
    +      case INT:
    +        return HIVE_TYPE_INT;
    +      case LONG:
    +        return HIVE_TYPE_BIGINT;
    +      case FLOAT:
    +        return HIVE_TYPE_FLOAT;
    +      case DOUBLE:
    +        return HIVE_TYPE_DOUBLE;
    +      case STRING:
    +      case ENUM:
    +        return HIVE_TYPE_STRING;
    +      case BYTES:
    +        return mapToDecimalOrBinary(schema, options);
    +      case FIXED:
    +        return HIVE_TYPE_BINARY;
    +      default:
    +        throw new RuntimeException(String.format("There is no Hive type mapping defined for the Avro type of: %s ", avroType.getName()));
    +    }
    +  }
    +
    +  private static String mapToDecimalOrBinary(Schema schema, SqoopOptions options) {
    +    boolean logicalTypesEnabled = options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false);
    +    if (logicalTypesEnabled && schema.getLogicalType() != null && schema.getLogicalType() instanceof Decimal) {
    --- End diff --
    
    I'm learning something new every day! :) removed 


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r239057381
  
    --- Diff: src/test/org/apache/sqoop/hive/TestHiveTypesForAvroTypeMapping.java ---
    @@ -41,30 +44,49 @@
     
       private final String hiveType;
       private final Schema schema;
    +  private final SqoopOptions options;
     
    -  @Parameters(name = "hiveType = {0}, schema = {1}")
    +  @Parameters(name = "hiveType = {0}, schema = {1}, options = {2}")
       public static Iterable<? extends Object> parameters() {
         return Arrays.asList(
    -        new Object[]{"BOOLEAN", Schema.create(Schema.Type.BOOLEAN)},
    -        new Object[]{"INT", Schema.create(Schema.Type.INT)},
    -        new Object[]{"BIGINT", Schema.create(Schema.Type.LONG)},
    -        new Object[]{"FLOAT", Schema.create(Schema.Type.FLOAT)},
    -        new Object[]{"DOUBLE", Schema.create(Schema.Type.DOUBLE)},
    -        new Object[]{"STRING", Schema.createEnum("ENUM", "doc", "namespce", new ArrayList<>())}, // Schema.Type.ENUM
    -        new Object[]{"STRING", Schema.create(Schema.Type.STRING)},
    -        new Object[]{"BINARY", Schema.create(Schema.Type.BYTES)},
    -        new Object[]{"BINARY", Schema.createFixed("Fixed", "doc", "space", 1) }
    -        //, new Object[]{"DECIMAL", Schema.create(Schema.Type.UNION).}
    +        new Object[]{"BOOLEAN", Schema.create(Schema.Type.BOOLEAN), new SqoopOptions()},
    +        new Object[]{"INT", Schema.create(Schema.Type.INT), new SqoopOptions()},
    +        new Object[]{"BIGINT", Schema.create(Schema.Type.LONG), new SqoopOptions()},
    +        new Object[]{"FLOAT", Schema.create(Schema.Type.FLOAT), new SqoopOptions()},
    +        new Object[]{"DOUBLE", Schema.create(Schema.Type.DOUBLE), new SqoopOptions()},
    +        new Object[]{"STRING", Schema.createEnum("ENUM", "doc", "namespce", new ArrayList<>()), new SqoopOptions()}, // Schema.Type.ENUM
    +        new Object[]{"STRING", Schema.create(Schema.Type.STRING), new SqoopOptions()},
    +        new Object[]{"BINARY", Schema.create(Schema.Type.BYTES), new SqoopOptions()},
    --- End diff --
    
    Yeah, make sense


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238317375
  
    --- Diff: src/test/org/apache/sqoop/hive/TestHiveTypesForAvroTypeMapping.java ---
    @@ -38,29 +40,30 @@
     public class TestHiveTypesForAvroTypeMapping {
    --- End diff --
    
    Can we add more parameters to this test to cover the changes in org.apache.sqoop.hive.HiveTypes#mapToDecimalOrBinary?


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by asfgit <gi...@git.apache.org>.
Github user asfgit closed the pull request at:

    https://github.com/apache/sqoop/pull/60


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238326484
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -83,27 +89,58 @@ public static String toHiveType(int sqlType) {
           }
       }
     
    -  public static String toHiveType(Schema.Type avroType) {
    -      switch (avroType) {
    -        case BOOLEAN:
    -          return HIVE_TYPE_BOOLEAN;
    -        case INT:
    -          return HIVE_TYPE_INT;
    -        case LONG:
    -          return HIVE_TYPE_BIGINT;
    -        case FLOAT:
    -          return HIVE_TYPE_FLOAT;
    -        case DOUBLE:
    -          return HIVE_TYPE_DOUBLE;
    -        case STRING:
    -        case ENUM:
    -          return HIVE_TYPE_STRING;
    -        case BYTES:
    -        case FIXED:
    -          return HIVE_TYPE_BINARY;
    -        default:
    -          return null;
    +  public static String toHiveType(Schema schema, SqoopOptions options) {
    +    if (schema.getType() == Schema.Type.UNION) {
    +      for (Schema subSchema : schema.getTypes()) {
    +        if (subSchema.getType() != Schema.Type.NULL) {
    +          return toHiveType(subSchema, options);
    +        }
    +      }
    +    }
    +
    +    Schema.Type avroType = schema.getType();
    +    switch (avroType) {
    +      case BOOLEAN:
    +        return HIVE_TYPE_BOOLEAN;
    +      case INT:
    +        return HIVE_TYPE_INT;
    +      case LONG:
    +        return HIVE_TYPE_BIGINT;
    +      case FLOAT:
    +        return HIVE_TYPE_FLOAT;
    +      case DOUBLE:
    +        return HIVE_TYPE_DOUBLE;
    +      case STRING:
    +      case ENUM:
    +        return HIVE_TYPE_STRING;
    +      case BYTES:
    +        return mapToDecimalOrBinary(schema, options);
    +      case FIXED:
    +        return HIVE_TYPE_BINARY;
    +      default:
    +        throw new RuntimeException(String.format("There is no Hive type mapping defined for the Avro type of: %s ", avroType.getName()));
    +    }
    +  }
    +
    +  private static String mapToDecimalOrBinary(Schema schema, SqoopOptions options) {
    +    boolean logicalTypesEnabled = options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false);
    +    if (logicalTypesEnabled && schema.getLogicalType() != null && schema.getLogicalType() instanceof Decimal) {
    +      Decimal decimal = (Decimal) schema.getLogicalType();
    +
    +      // trimming precision and scale to Hive's maximum values.
    +      int precision = Math.min(HiveDecimal.MAX_PRECISION, decimal.getPrecision());
    +      if (precision < decimal.getPrecision()) {
    +        LOG.warn("Warning! Precision in the Hive table definition will be smaller than the actual precision of the column on storage! Hive may not be able to read data from this column.");
    --- End diff --
    
    I think we mention this potential problem in the documentation somewhere.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238984816
  
    --- Diff: src/test/org/apache/sqoop/hive/TestHiveTypesForAvroTypeMapping.java ---
    @@ -41,30 +44,49 @@
     
       private final String hiveType;
       private final Schema schema;
    +  private final SqoopOptions options;
     
    -  @Parameters(name = "hiveType = {0}, schema = {1}")
    +  @Parameters(name = "hiveType = {0}, schema = {1}, options = {2}")
       public static Iterable<? extends Object> parameters() {
         return Arrays.asList(
    -        new Object[]{"BOOLEAN", Schema.create(Schema.Type.BOOLEAN)},
    -        new Object[]{"INT", Schema.create(Schema.Type.INT)},
    -        new Object[]{"BIGINT", Schema.create(Schema.Type.LONG)},
    -        new Object[]{"FLOAT", Schema.create(Schema.Type.FLOAT)},
    -        new Object[]{"DOUBLE", Schema.create(Schema.Type.DOUBLE)},
    -        new Object[]{"STRING", Schema.createEnum("ENUM", "doc", "namespce", new ArrayList<>())}, // Schema.Type.ENUM
    -        new Object[]{"STRING", Schema.create(Schema.Type.STRING)},
    -        new Object[]{"BINARY", Schema.create(Schema.Type.BYTES)},
    -        new Object[]{"BINARY", Schema.createFixed("Fixed", "doc", "space", 1) }
    -        //, new Object[]{"DECIMAL", Schema.create(Schema.Type.UNION).}
    +        new Object[]{"BOOLEAN", Schema.create(Schema.Type.BOOLEAN), new SqoopOptions()},
    +        new Object[]{"INT", Schema.create(Schema.Type.INT), new SqoopOptions()},
    +        new Object[]{"BIGINT", Schema.create(Schema.Type.LONG), new SqoopOptions()},
    +        new Object[]{"FLOAT", Schema.create(Schema.Type.FLOAT), new SqoopOptions()},
    +        new Object[]{"DOUBLE", Schema.create(Schema.Type.DOUBLE), new SqoopOptions()},
    +        new Object[]{"STRING", Schema.createEnum("ENUM", "doc", "namespce", new ArrayList<>()), new SqoopOptions()}, // Schema.Type.ENUM
    +        new Object[]{"STRING", Schema.create(Schema.Type.STRING), new SqoopOptions()},
    +        new Object[]{"BINARY", Schema.create(Schema.Type.BYTES), new SqoopOptions()},
    --- End diff --
    
    I would add one more test case here:
    new Object[]{"BINARY", create(Schema.Type.BYTES), createSqoopOptionsWithLogicalTypesEnabled()},
    
    to make sure that we test the if condition org/apache/sqoop/hive/HiveTypes.java:127 fully



---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238331788
  
    --- Diff: src/test/org/apache/sqoop/importjob/numerictypes/NumericTypesAvroImportTestBase.java ---
    @@ -0,0 +1,59 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + * <p>
    + * http://www.apache.org/licenses/LICENSE-2.0
    + * <p>
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.sqoop.importjob.numerictypes;
    +
    +import org.apache.commons.logging.Log;
    +import org.apache.commons.logging.LogFactory;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.sqoop.importjob.configuration.AvroTestConfiguration;
    +import org.apache.sqoop.testutil.ArgumentArrayBuilder;
    +import org.apache.sqoop.testutil.AvroTestUtils;
    +import org.apache.sqoop.testutil.NumericTypesTestUtils;
    +import org.junit.Before;
    +
    +public abstract class NumericTypesAvroImportTestBase<T extends AvroTestConfiguration> extends NumericTypesImportTestBase<T>  {
    +
    +  public static final Log LOG = LogFactory.getLog(NumericTypesAvroImportTestBase.class.getName());
    +
    +  public NumericTypesAvroImportTestBase(T configuration, boolean failWithoutExtraArgs, boolean failWithPaddingOnly) {
    +    super(configuration, failWithoutExtraArgs, failWithPaddingOnly);
    +  }
    +
    +  @Before
    +  public void setUp() {
    +    super.setUp();
    +    tableDirPath = new Path(getWarehouseDir() + "/" + getTableName());
    --- End diff --
    
    tableDirPath seems to be initialized in super.setUp, do we need this here?


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238984324
  
    --- Diff: src/test/org/apache/sqoop/hive/TestHiveTypesForAvroTypeMapping.java ---
    @@ -41,30 +44,49 @@
     
       private final String hiveType;
       private final Schema schema;
    +  private final SqoopOptions options;
     
    -  @Parameters(name = "hiveType = {0}, schema = {1}")
    +  @Parameters(name = "hiveType = {0}, schema = {1}, options = {2}")
       public static Iterable<? extends Object> parameters() {
         return Arrays.asList(
    -        new Object[]{"BOOLEAN", Schema.create(Schema.Type.BOOLEAN)},
    -        new Object[]{"INT", Schema.create(Schema.Type.INT)},
    -        new Object[]{"BIGINT", Schema.create(Schema.Type.LONG)},
    -        new Object[]{"FLOAT", Schema.create(Schema.Type.FLOAT)},
    -        new Object[]{"DOUBLE", Schema.create(Schema.Type.DOUBLE)},
    -        new Object[]{"STRING", Schema.createEnum("ENUM", "doc", "namespce", new ArrayList<>())}, // Schema.Type.ENUM
    -        new Object[]{"STRING", Schema.create(Schema.Type.STRING)},
    -        new Object[]{"BINARY", Schema.create(Schema.Type.BYTES)},
    -        new Object[]{"BINARY", Schema.createFixed("Fixed", "doc", "space", 1) }
    -        //, new Object[]{"DECIMAL", Schema.create(Schema.Type.UNION).}
    +        new Object[]{"BOOLEAN", Schema.create(Schema.Type.BOOLEAN), new SqoopOptions()},
    --- End diff --
    
    We could static import the create methods from Schema so that would make these lines shorter.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238331922
  
    --- Diff: src/test/org/apache/sqoop/importjob/numerictypes/NumericTypesImportTestBase.java ---
    @@ -65,240 +46,79 @@
      * 2. Decimal padding during avro or parquet import
      * In case of Oracle and Postgres, Sqoop has to pad the values with 0s to avoid errors.
      */
    -public abstract class NumericTypesImportTestBase<T extends AvroTestConfiguration & ParquetTestConfiguration> extends ImportJobTestCase implements DatabaseAdapterFactory {
    +public abstract class NumericTypesImportTestBase<T extends ImportJobTestConfiguration> extends ThirdPartyTestBase<T>  {
     
       public static final Log LOG = LogFactory.getLog(NumericTypesImportTestBase.class.getName());
     
    -  private Configuration conf = new Configuration();
    -
    -  private final T configuration;
    -  private final DatabaseAdapter adapter;
       private final boolean failWithoutExtraArgs;
       private final boolean failWithPadding;
     
    -  // Constants for the basic test case, that doesn't use extra arguments
    -  // that are required to avoid errors, i.e. padding and default precision and scale.
    -  protected final static boolean SUCCEED_WITHOUT_EXTRA_ARGS = false;
    -  protected final static boolean FAIL_WITHOUT_EXTRA_ARGS = true;
    -
    -  // Constants for the test case that has padding specified but not default precision and scale.
    -  protected final static boolean SUCCEED_WITH_PADDING_ONLY = false;
    -  protected final static boolean FAIL_WITH_PADDING_ONLY = true;
    -
    -  private Path tableDirPath;
    -
       public NumericTypesImportTestBase(T configuration, boolean failWithoutExtraArgs, boolean failWithPaddingOnly) {
    -    this.adapter = createAdapter();
    -    this.configuration = configuration;
    +    super(configuration);
         this.failWithoutExtraArgs = failWithoutExtraArgs;
         this.failWithPadding = failWithPaddingOnly;
       }
     
    -  @Rule
    -  public ExpectedException thrown = ExpectedException.none();
    -
    -  @Override
    -  protected Configuration getConf() {
    -    return conf;
    -  }
    -
    -  @Override
    -  protected boolean useHsqldbTestServer() {
    -    return false;
    -  }
    -
    -  @Override
    -  protected String getConnectString() {
    -    return adapter.getConnectionString();
    -  }
    -
    -  @Override
    -  protected SqoopOptions getSqoopOptions(Configuration conf) {
    -    SqoopOptions opts = new SqoopOptions(conf);
    -    adapter.injectConnectionParameters(opts);
    -    return opts;
    -  }
    -
    -  @Override
    -  protected void dropTableIfExists(String table) throws SQLException {
    -    adapter.dropTableIfExists(table, getManager());
    -  }
    -
       @Before
       public void setUp() {
         super.setUp();
    -    String[] names = configuration.getNames();
    -    String[] types = configuration.getTypes();
    -    createTableWithColTypesAndNames(names, types, new String[0]);
    -    List<String[]> inputData = configuration.getSampleData();
    -    for (String[] input  : inputData) {
    -      insertIntoTable(names, types, input);
    -    }
         tableDirPath = new Path(getWarehouseDir() + "/" + getTableName());
       }
     
    -  @After
    -  public void tearDown() {
    -    try {
    -      dropTableIfExists(getTableName());
    -    } catch (SQLException e) {
    -      LOG.warn("Error trying to drop table on tearDown: " + e);
    -    }
    -    super.tearDown();
    -  }
    +  public Path tableDirPath;
    --- End diff --
    
    This field could be protected.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238706982
  
    --- Diff: src/test/org/apache/sqoop/importjob/numerictypes/NumericTypesImportTestBase.java ---
    @@ -65,240 +46,79 @@
      * 2. Decimal padding during avro or parquet import
      * In case of Oracle and Postgres, Sqoop has to pad the values with 0s to avoid errors.
      */
    -public abstract class NumericTypesImportTestBase<T extends AvroTestConfiguration & ParquetTestConfiguration> extends ImportJobTestCase implements DatabaseAdapterFactory {
    +public abstract class NumericTypesImportTestBase<T extends ImportJobTestConfiguration> extends ThirdPartyTestBase<T>  {
     
       public static final Log LOG = LogFactory.getLog(NumericTypesImportTestBase.class.getName());
     
    -  private Configuration conf = new Configuration();
    -
    -  private final T configuration;
    -  private final DatabaseAdapter adapter;
       private final boolean failWithoutExtraArgs;
       private final boolean failWithPadding;
     
    -  // Constants for the basic test case, that doesn't use extra arguments
    -  // that are required to avoid errors, i.e. padding and default precision and scale.
    -  protected final static boolean SUCCEED_WITHOUT_EXTRA_ARGS = false;
    -  protected final static boolean FAIL_WITHOUT_EXTRA_ARGS = true;
    -
    -  // Constants for the test case that has padding specified but not default precision and scale.
    -  protected final static boolean SUCCEED_WITH_PADDING_ONLY = false;
    -  protected final static boolean FAIL_WITH_PADDING_ONLY = true;
    -
    -  private Path tableDirPath;
    -
       public NumericTypesImportTestBase(T configuration, boolean failWithoutExtraArgs, boolean failWithPaddingOnly) {
    -    this.adapter = createAdapter();
    -    this.configuration = configuration;
    +    super(configuration);
         this.failWithoutExtraArgs = failWithoutExtraArgs;
         this.failWithPadding = failWithPaddingOnly;
       }
     
    -  @Rule
    -  public ExpectedException thrown = ExpectedException.none();
    -
    -  @Override
    -  protected Configuration getConf() {
    -    return conf;
    -  }
    -
    -  @Override
    -  protected boolean useHsqldbTestServer() {
    -    return false;
    -  }
    -
    -  @Override
    -  protected String getConnectString() {
    -    return adapter.getConnectionString();
    -  }
    -
    -  @Override
    -  protected SqoopOptions getSqoopOptions(Configuration conf) {
    -    SqoopOptions opts = new SqoopOptions(conf);
    -    adapter.injectConnectionParameters(opts);
    -    return opts;
    -  }
    -
    -  @Override
    -  protected void dropTableIfExists(String table) throws SQLException {
    -    adapter.dropTableIfExists(table, getManager());
    -  }
    -
       @Before
       public void setUp() {
         super.setUp();
    -    String[] names = configuration.getNames();
    -    String[] types = configuration.getTypes();
    -    createTableWithColTypesAndNames(names, types, new String[0]);
    -    List<String[]> inputData = configuration.getSampleData();
    -    for (String[] input  : inputData) {
    -      insertIntoTable(names, types, input);
    -    }
         tableDirPath = new Path(getWarehouseDir() + "/" + getTableName());
       }
     
    -  @After
    -  public void tearDown() {
    -    try {
    -      dropTableIfExists(getTableName());
    -    } catch (SQLException e) {
    -      LOG.warn("Error trying to drop table on tearDown: " + e);
    -    }
    -    super.tearDown();
    -  }
    +  public Path tableDirPath;
     
    -  private ArgumentArrayBuilder getArgsBuilder(SqoopOptions.FileLayout fileLayout) {
    -    ArgumentArrayBuilder builder = new ArgumentArrayBuilder();
    -    if (AvroDataFile.equals(fileLayout)) {
    -      builder.withOption("as-avrodatafile");
    -    }
    -    else if (ParquetFile.equals(fileLayout)) {
    -      builder.withOption("as-parquetfile");
    -    }
    +  @Rule
    +  public ExpectedException thrown = ExpectedException.none();
    +
    +  abstract public ArgumentArrayBuilder getArgsBuilder();
    +  abstract public void verify();
     
    +  public ArgumentArrayBuilder includeCommonOptions(ArgumentArrayBuilder builder) {
         return builder.withCommonHadoopFlags(true)
             .withOption("warehouse-dir", getWarehouseDir())
             .withOption("num-mappers", "1")
             .withOption("table", getTableName())
             .withOption("connect", getConnectString());
       }
     
    -  /**
    -   * Adds properties to the given arg builder for decimal precision and scale.
    -   * @param builder
    -   */
    -  private void addPrecisionAndScale(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.avro.logical_types.decimal.default.precision", "38");
    -    builder.withProperty("sqoop.avro.logical_types.decimal.default.scale", "3");
    -  }
    -
    -  /**
    -   * Enables padding for decimals in avro and parquet import.
    -   * @param builder
    -   */
    -  private void addPadding(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.avro.decimal_padding.enable", "true");
    -  }
    -
    -  private void addEnableAvroDecimal(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.avro.logical_types.decimal.enable", "true");
    -  }
    -
    -  private void addEnableParquetDecimal(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.parquet.logical_types.decimal.enable", "true");
    -  }
     
    -  private void configureJunitToExpectFailure(boolean failWithPadding) {
    -    if (failWithPadding) {
    +  @Test
    +  public void testImportWithoutPadding() throws IOException {
    +    if(failWithoutExtraArgs){
           thrown.expect(IOException.class);
           thrown.expectMessage("Failure during job; return status 1");
    +      NumericTypesTestUtils.configureJunitToExpectFailure(thrown);
         }
    -  }
    -
    -  @Test
    -  public void testAvroImportWithoutPadding() throws IOException {
    -    configureJunitToExpectFailure(failWithoutExtraArgs);
    -    ArgumentArrayBuilder builder = getArgsBuilder(AvroDataFile);
    -    addEnableAvroDecimal(builder);
    +    ArgumentArrayBuilder builder = getArgsBuilder();
         String[] args = builder.build();
         runImport(args);
         if (!failWithoutExtraArgs) {
    -      verify(AvroDataFile);
    -    }
    -  }
    -
    -  @Test
    -  public void testAvroImportWithPadding() throws IOException {
    -    configureJunitToExpectFailure(failWithPadding);
    -    ArgumentArrayBuilder builder = getArgsBuilder(AvroDataFile);
    -    addEnableAvroDecimal(builder);
    -    addPadding(builder);
    -    runImport(builder.build());
    -    if (!failWithPadding) {
    -      verify(AvroDataFile);
    +      verify();
         }
       }
     
       @Test
    -  public void testAvroImportWithDefaultPrecisionAndScale() throws  IOException {
    -    ArgumentArrayBuilder builder = getArgsBuilder(AvroDataFile);
    -    addEnableAvroDecimal(builder);
    -    addPadding(builder);
    -    addPrecisionAndScale(builder);
    -    runImport(builder.build());
    -    verify(AvroDataFile);
    -  }
    -
    -  @Test
    -  public void testParquetImportWithoutPadding() throws IOException {
    -    configureJunitToExpectFailure(failWithoutExtraArgs);
    -    ArgumentArrayBuilder builder = getArgsBuilder(ParquetFile);
    -    addEnableParquetDecimal(builder);
    -    String[] args = builder.build();
    -    runImport(args);
    -    if (!failWithoutExtraArgs) {
    -      verify(ParquetFile);
    +  public void testImportWithPadding() throws IOException {
    +    if(failWithPadding){
    +      thrown.expect(IOException.class);
    +      thrown.expectMessage("Failure during job; return status 1");
    +      NumericTypesTestUtils.configureJunitToExpectFailure(thrown);
    --- End diff --
    
    Indeed!


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by maugly24 <gi...@git.apache.org>.
Github user maugly24 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r237699087
  
    --- Diff: src/test/org/apache/sqoop/importjob/configuration/OracleImportJobTestConfigurationForNumber.java ---
    @@ -68,4 +68,14 @@
       public String toString() {
         return getClass().getSimpleName();
       }
    +
    +  @Override
    +  public Object[] getExpectedResultsForHive() {
    --- End diff --
    
    Same goes here for the duplication. Extract this part please!


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238984411
  
    --- Diff: src/test/org/apache/sqoop/hive/TestHiveTypesForAvroTypeMapping.java ---
    @@ -41,30 +44,49 @@
     
       private final String hiveType;
       private final Schema schema;
    +  private final SqoopOptions options;
     
    -  @Parameters(name = "hiveType = {0}, schema = {1}")
    +  @Parameters(name = "hiveType = {0}, schema = {1}, options = {2}")
       public static Iterable<? extends Object> parameters() {
         return Arrays.asList(
    -        new Object[]{"BOOLEAN", Schema.create(Schema.Type.BOOLEAN)},
    -        new Object[]{"INT", Schema.create(Schema.Type.INT)},
    -        new Object[]{"BIGINT", Schema.create(Schema.Type.LONG)},
    -        new Object[]{"FLOAT", Schema.create(Schema.Type.FLOAT)},
    -        new Object[]{"DOUBLE", Schema.create(Schema.Type.DOUBLE)},
    -        new Object[]{"STRING", Schema.createEnum("ENUM", "doc", "namespce", new ArrayList<>())}, // Schema.Type.ENUM
    -        new Object[]{"STRING", Schema.create(Schema.Type.STRING)},
    -        new Object[]{"BINARY", Schema.create(Schema.Type.BYTES)},
    -        new Object[]{"BINARY", Schema.createFixed("Fixed", "doc", "space", 1) }
    -        //, new Object[]{"DECIMAL", Schema.create(Schema.Type.UNION).}
    +        new Object[]{"BOOLEAN", Schema.create(Schema.Type.BOOLEAN), new SqoopOptions()},
    +        new Object[]{"INT", Schema.create(Schema.Type.INT), new SqoopOptions()},
    +        new Object[]{"BIGINT", Schema.create(Schema.Type.LONG), new SqoopOptions()},
    +        new Object[]{"FLOAT", Schema.create(Schema.Type.FLOAT), new SqoopOptions()},
    +        new Object[]{"DOUBLE", Schema.create(Schema.Type.DOUBLE), new SqoopOptions()},
    +        new Object[]{"STRING", Schema.createEnum("ENUM", "doc", "namespce", new ArrayList<>()), new SqoopOptions()}, // Schema.Type.ENUM
    --- End diff --
    
    Do we need the comment at the end of the line?


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238251323
  
    --- Diff: src/test/org/apache/sqoop/importjob/configuration/MysqlImportJobTestConfiguration.java ---
    @@ -65,4 +66,21 @@
       public String toString() {
         return getClass().getSimpleName();
       }
    +
    +  @Override
    +  public Object[] getExpectedResultsForHive() {
    --- End diff --
    
    Fair point!
    
    I added a couple of comments, hope it clarifies the magic numbers.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238984033
  
    --- Diff: src/test/org/apache/sqoop/hive/TestHiveTypesForAvroTypeMapping.java ---
    @@ -41,30 +44,49 @@
     
       private final String hiveType;
       private final Schema schema;
    +  private final SqoopOptions options;
     
    -  @Parameters(name = "hiveType = {0}, schema = {1}")
    +  @Parameters(name = "hiveType = {0}, schema = {1}, options = {2}")
    --- End diff --
    
    I would not add options here, because SqoopOptions does not have a nice toString() method so we won't see anything meaningful in the test output.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238318482
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -83,27 +89,58 @@ public static String toHiveType(int sqlType) {
           }
       }
     
    -  public static String toHiveType(Schema.Type avroType) {
    -      switch (avroType) {
    -        case BOOLEAN:
    -          return HIVE_TYPE_BOOLEAN;
    -        case INT:
    -          return HIVE_TYPE_INT;
    -        case LONG:
    -          return HIVE_TYPE_BIGINT;
    -        case FLOAT:
    -          return HIVE_TYPE_FLOAT;
    -        case DOUBLE:
    -          return HIVE_TYPE_DOUBLE;
    -        case STRING:
    -        case ENUM:
    -          return HIVE_TYPE_STRING;
    -        case BYTES:
    -        case FIXED:
    -          return HIVE_TYPE_BINARY;
    -        default:
    -          return null;
    +  public static String toHiveType(Schema schema, SqoopOptions options) {
    +    if (schema.getType() == Schema.Type.UNION) {
    +      for (Schema subSchema : schema.getTypes()) {
    +        if (subSchema.getType() != Schema.Type.NULL) {
    +          return toHiveType(subSchema, options);
    +        }
    +      }
    +    }
    +
    +    Schema.Type avroType = schema.getType();
    +    switch (avroType) {
    +      case BOOLEAN:
    +        return HIVE_TYPE_BOOLEAN;
    +      case INT:
    +        return HIVE_TYPE_INT;
    +      case LONG:
    +        return HIVE_TYPE_BIGINT;
    +      case FLOAT:
    +        return HIVE_TYPE_FLOAT;
    +      case DOUBLE:
    +        return HIVE_TYPE_DOUBLE;
    +      case STRING:
    +      case ENUM:
    +        return HIVE_TYPE_STRING;
    +      case BYTES:
    +        return mapToDecimalOrBinary(schema, options);
    +      case FIXED:
    +        return HIVE_TYPE_BINARY;
    +      default:
    +        throw new RuntimeException(String.format("There is no Hive type mapping defined for the Avro type of: %s ", avroType.getName()));
    +    }
    +  }
    +
    +  private static String mapToDecimalOrBinary(Schema schema, SqoopOptions options) {
    +    boolean logicalTypesEnabled = options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false);
    +    if (logicalTypesEnabled && schema.getLogicalType() != null && schema.getLogicalType() instanceof Decimal) {
    --- End diff --
    
    The null check is redundant here.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by maugly24 <gi...@git.apache.org>.
Github user maugly24 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r237698797
  
    --- Diff: src/test/org/apache/sqoop/hive/numerictypes/NumericTypesHiveImportTest.java ---
    @@ -0,0 +1,202 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + * <p>
    + * http://www.apache.org/licenses/LICENSE-2.0
    + * <p>
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.sqoop.hive.numerictypes;
    +
    +import org.apache.sqoop.hive.minicluster.HiveMiniCluster;
    +import org.apache.sqoop.hive.minicluster.NoAuthenticationConfiguration;
    +import org.apache.sqoop.importjob.configuration.HiveTestConfiguration;
    +import org.apache.sqoop.importjob.configuration.MysqlImportJobTestConfiguration;
    +import org.apache.sqoop.importjob.configuration.OracleImportJobTestConfiguration;
    +import org.apache.sqoop.importjob.configuration.OracleImportJobTestConfigurationForNumber;
    +import org.apache.sqoop.importjob.configuration.PostgresqlImportJobTestConfigurationForNumeric;
    +import org.apache.sqoop.importjob.configuration.PostgresqlImportJobTestConfigurationPaddingShouldSucceed;
    +import org.apache.sqoop.importjob.configuration.SqlServerImportJobTestConfiguration;
    +import org.apache.sqoop.testcategories.sqooptest.IntegrationTest;
    +import org.apache.sqoop.testcategories.thirdpartytest.MysqlTest;
    +import org.apache.sqoop.testcategories.thirdpartytest.OracleTest;
    +import org.apache.sqoop.testcategories.thirdpartytest.PostgresqlTest;
    +import org.apache.sqoop.testcategories.thirdpartytest.SqlServerTest;
    +import org.apache.sqoop.testutil.HiveServer2TestUtil;
    +import org.apache.sqoop.testutil.NumericTypesTestUtils;
    +import org.apache.sqoop.testutil.adapter.DatabaseAdapter;
    +import org.apache.sqoop.testutil.adapter.MysqlDatabaseAdapter;
    +import org.apache.sqoop.testutil.adapter.OracleDatabaseAdapter;
    +import org.apache.sqoop.testutil.adapter.PostgresDatabaseAdapter;
    +import org.apache.sqoop.testutil.adapter.SqlServerDatabaseAdapter;
    +import org.apache.sqoop.util.BlockJUnit4ClassRunnerWithParametersFactory;
    +import org.junit.AfterClass;
    +import org.junit.BeforeClass;
    +import org.junit.Rule;
    +import org.junit.experimental.categories.Category;
    +import org.junit.experimental.runners.Enclosed;
    +import org.junit.rules.ExpectedException;
    +import org.junit.runner.RunWith;
    +import org.junit.runners.Parameterized;
    +
    +import java.util.Arrays;
    +
    +import static org.apache.sqoop.testutil.NumericTypesTestUtils.FAIL_WITHOUT_EXTRA_ARGS;
    +import static org.apache.sqoop.testutil.NumericTypesTestUtils.FAIL_WITH_PADDING_ONLY;
    +import static org.apache.sqoop.testutil.NumericTypesTestUtils.SUCCEED_WITHOUT_EXTRA_ARGS;
    +import static org.apache.sqoop.testutil.NumericTypesTestUtils.SUCCEED_WITH_PADDING_ONLY;
    +
    +@RunWith(Enclosed.class)
    +@Category(IntegrationTest.class)
    +public class NumericTypesHiveImportTest {
    +
    +  @Rule
    +  public ExpectedException expectedException = ExpectedException.none();
    +
    +  private static HiveMiniCluster hiveMiniCluster;
    +
    +  private static HiveServer2TestUtil hiveServer2TestUtil;
    +
    +  @BeforeClass
    +  public static void beforeClass() {
    +    startHiveMiniCluster();
    +  }
    +
    +  @AfterClass
    +  public static void afterClass() {
    +    stopHiveMiniCluster();
    +  }
    +
    +  public static void startHiveMiniCluster() {
    +    hiveMiniCluster = new HiveMiniCluster(new NoAuthenticationConfiguration());
    +    hiveMiniCluster.start();
    +    hiveServer2TestUtil = new HiveServer2TestUtil(hiveMiniCluster.getUrl());
    +  }
    +
    +  public static void stopHiveMiniCluster() {
    +    hiveMiniCluster.stop();
    +  }
    +
    +  @Category(MysqlTest.class)
    +  public static class MysqlNumericTypesHiveImportTest extends NumericTypesHiveImportTestBase {
    +
    +    public MysqlNumericTypesHiveImportTest() {
    +      super(new MysqlImportJobTestConfiguration(), NumericTypesTestUtils.SUCCEED_WITHOUT_EXTRA_ARGS, NumericTypesTestUtils.SUCCEED_WITH_PADDING_ONLY);
    +    }
    +
    +    @Override
    +    public DatabaseAdapter createAdapter() {
    +      return new MysqlDatabaseAdapter();
    +    }
    +
    +    @Override
    +    protected HiveMiniCluster getHiveMiniCluster() {
    --- End diff --
    
    I wold move getHiveMiniCluster and getHiveServer2TestUtil to the base class, with the concrete implementation, and just would pass the references in the super constructor call. Otherwise you'll have to keep this duplication on all 4 underlying implementation (MySQL, Postgres, MsSQL, Oracle), which sounds quite bad.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r237890806
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -37,16 +42,28 @@
       private static final String HIVE_TYPE_STRING = "STRING";
       private static final String HIVE_TYPE_BOOLEAN = "BOOLEAN";
       private static final String HIVE_TYPE_BINARY = "BINARY";
    +  private static final String HIVE_TYPE_DECIMAL = "DECIMAL";
     
       public static final Log LOG = LogFactory.getLog(HiveTypes.class.getName());
     
       private HiveTypes() { }
     
    +
    +  public static String toHiveType(int sqlType, SqoopOptions options) {
    +
    +    if (options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false)
    --- End diff --
    
    I refactored this file.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238731504
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -83,27 +89,58 @@ public static String toHiveType(int sqlType) {
           }
       }
     
    -  public static String toHiveType(Schema.Type avroType) {
    -      switch (avroType) {
    -        case BOOLEAN:
    -          return HIVE_TYPE_BOOLEAN;
    -        case INT:
    -          return HIVE_TYPE_INT;
    -        case LONG:
    -          return HIVE_TYPE_BIGINT;
    -        case FLOAT:
    -          return HIVE_TYPE_FLOAT;
    -        case DOUBLE:
    -          return HIVE_TYPE_DOUBLE;
    -        case STRING:
    -        case ENUM:
    -          return HIVE_TYPE_STRING;
    -        case BYTES:
    -        case FIXED:
    -          return HIVE_TYPE_BINARY;
    -        default:
    -          return null;
    +  public static String toHiveType(Schema schema, SqoopOptions options) {
    +    if (schema.getType() == Schema.Type.UNION) {
    +      for (Schema subSchema : schema.getTypes()) {
    +        if (subSchema.getType() != Schema.Type.NULL) {
    +          return toHiveType(subSchema, options);
    +        }
    +      }
    +    }
    +
    +    Schema.Type avroType = schema.getType();
    +    switch (avroType) {
    +      case BOOLEAN:
    +        return HIVE_TYPE_BOOLEAN;
    +      case INT:
    +        return HIVE_TYPE_INT;
    +      case LONG:
    +        return HIVE_TYPE_BIGINT;
    +      case FLOAT:
    +        return HIVE_TYPE_FLOAT;
    +      case DOUBLE:
    +        return HIVE_TYPE_DOUBLE;
    +      case STRING:
    +      case ENUM:
    +        return HIVE_TYPE_STRING;
    +      case BYTES:
    +        return mapToDecimalOrBinary(schema, options);
    +      case FIXED:
    +        return HIVE_TYPE_BINARY;
    +      default:
    +        throw new RuntimeException(String.format("There is no Hive type mapping defined for the Avro type of: %s ", avroType.getName()));
    +    }
    +  }
    +
    +  private static String mapToDecimalOrBinary(Schema schema, SqoopOptions options) {
    +    boolean logicalTypesEnabled = options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false);
    +    if (logicalTypesEnabled && schema.getLogicalType() != null && schema.getLogicalType() instanceof Decimal) {
    +      Decimal decimal = (Decimal) schema.getLogicalType();
    +
    +      // trimming precision and scale to Hive's maximum values.
    +      int precision = Math.min(HiveDecimal.MAX_PRECISION, decimal.getPrecision());
    +      if (precision < decimal.getPrecision()) {
    +        LOG.warn("Warning! Precision in the Hive table definition will be smaller than the actual precision of the column on storage! Hive may not be able to read data from this column.");
    --- End diff --
    
    Sorry, I meant that apart from the warning messages here we should mention it in the documentation too.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238694607
  
    --- Diff: src/test/org/apache/sqoop/importjob/numerictypes/NumericTypesImportTestBase.java ---
    @@ -65,240 +46,79 @@
      * 2. Decimal padding during avro or parquet import
      * In case of Oracle and Postgres, Sqoop has to pad the values with 0s to avoid errors.
      */
    -public abstract class NumericTypesImportTestBase<T extends AvroTestConfiguration & ParquetTestConfiguration> extends ImportJobTestCase implements DatabaseAdapterFactory {
    +public abstract class NumericTypesImportTestBase<T extends ImportJobTestConfiguration> extends ThirdPartyTestBase<T>  {
     
       public static final Log LOG = LogFactory.getLog(NumericTypesImportTestBase.class.getName());
     
    -  private Configuration conf = new Configuration();
    -
    -  private final T configuration;
    -  private final DatabaseAdapter adapter;
       private final boolean failWithoutExtraArgs;
       private final boolean failWithPadding;
     
    -  // Constants for the basic test case, that doesn't use extra arguments
    -  // that are required to avoid errors, i.e. padding and default precision and scale.
    -  protected final static boolean SUCCEED_WITHOUT_EXTRA_ARGS = false;
    -  protected final static boolean FAIL_WITHOUT_EXTRA_ARGS = true;
    -
    -  // Constants for the test case that has padding specified but not default precision and scale.
    -  protected final static boolean SUCCEED_WITH_PADDING_ONLY = false;
    -  protected final static boolean FAIL_WITH_PADDING_ONLY = true;
    -
    -  private Path tableDirPath;
    -
       public NumericTypesImportTestBase(T configuration, boolean failWithoutExtraArgs, boolean failWithPaddingOnly) {
    -    this.adapter = createAdapter();
    -    this.configuration = configuration;
    +    super(configuration);
         this.failWithoutExtraArgs = failWithoutExtraArgs;
         this.failWithPadding = failWithPaddingOnly;
       }
     
    -  @Rule
    -  public ExpectedException thrown = ExpectedException.none();
    -
    -  @Override
    -  protected Configuration getConf() {
    -    return conf;
    -  }
    -
    -  @Override
    -  protected boolean useHsqldbTestServer() {
    -    return false;
    -  }
    -
    -  @Override
    -  protected String getConnectString() {
    -    return adapter.getConnectionString();
    -  }
    -
    -  @Override
    -  protected SqoopOptions getSqoopOptions(Configuration conf) {
    -    SqoopOptions opts = new SqoopOptions(conf);
    -    adapter.injectConnectionParameters(opts);
    -    return opts;
    -  }
    -
    -  @Override
    -  protected void dropTableIfExists(String table) throws SQLException {
    -    adapter.dropTableIfExists(table, getManager());
    -  }
    -
       @Before
       public void setUp() {
         super.setUp();
    -    String[] names = configuration.getNames();
    -    String[] types = configuration.getTypes();
    -    createTableWithColTypesAndNames(names, types, new String[0]);
    -    List<String[]> inputData = configuration.getSampleData();
    -    for (String[] input  : inputData) {
    -      insertIntoTable(names, types, input);
    -    }
         tableDirPath = new Path(getWarehouseDir() + "/" + getTableName());
       }
     
    -  @After
    -  public void tearDown() {
    -    try {
    -      dropTableIfExists(getTableName());
    -    } catch (SQLException e) {
    -      LOG.warn("Error trying to drop table on tearDown: " + e);
    -    }
    -    super.tearDown();
    -  }
    +  public Path tableDirPath;
    --- End diff --
    
    Makes sense


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by maugly24 <gi...@git.apache.org>.
Github user maugly24 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r237698979
  
    --- Diff: src/test/org/apache/sqoop/importjob/configuration/MysqlImportJobTestConfiguration.java ---
    @@ -65,4 +66,21 @@
       public String toString() {
         return getClass().getSimpleName();
       }
    +
    +  @Override
    +  public Object[] getExpectedResultsForHive() {
    --- End diff --
    
    Source of code duplication again!
    I would move these expected values into a helper class or a super class (choice is up to you).


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238332023
  
    --- Diff: src/test/org/apache/sqoop/importjob/numerictypes/NumericTypesParquetImportTestBase.java ---
    @@ -0,0 +1,89 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + * <p>
    + * http://www.apache.org/licenses/LICENSE-2.0
    + * <p>
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.sqoop.importjob.numerictypes;
    +
    +import org.apache.commons.logging.Log;
    +import org.apache.commons.logging.LogFactory;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.parquet.schema.MessageType;
    +import org.apache.parquet.schema.OriginalType;
    +import org.apache.sqoop.importjob.configuration.ParquetTestConfiguration;
    +import org.apache.sqoop.testutil.ArgumentArrayBuilder;
    +import org.apache.sqoop.testutil.NumericTypesTestUtils;
    +import org.apache.sqoop.util.ParquetReader;
    +import org.junit.Before;
    +
    +import java.util.Arrays;
    +
    +import static org.junit.Assert.assertEquals;
    +
    +public abstract class NumericTypesParquetImportTestBase<T extends ParquetTestConfiguration> extends NumericTypesImportTestBase<T>  {
    +
    +  public static final Log LOG = LogFactory.getLog(NumericTypesParquetImportTestBase.class.getName());
    +
    +  public NumericTypesParquetImportTestBase(T configuration, boolean failWithoutExtraArgs, boolean failWithPaddingOnly) {
    +    super(configuration, failWithoutExtraArgs, failWithPaddingOnly);
    +  }
    +
    +  @Before
    +  public void setUp() {
    +    super.setUp();
    +    tableDirPath = new Path(getWarehouseDir() + "/" + getTableName());
    --- End diff --
    
    tableDirPath seems to be initialized in super.setUp, do we need this here?


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r237795529
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -37,16 +42,28 @@
       private static final String HIVE_TYPE_STRING = "STRING";
       private static final String HIVE_TYPE_BOOLEAN = "BOOLEAN";
       private static final String HIVE_TYPE_BINARY = "BINARY";
    +  private static final String HIVE_TYPE_DECIMAL = "DECIMAL";
     
       public static final Log LOG = LogFactory.getLog(HiveTypes.class.getName());
     
       private HiveTypes() { }
     
    +
    +  public static String toHiveType(int sqlType, SqoopOptions options) {
    +
    +    if (options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false)
    --- End diff --
    
    I think this check could go to the original toHiveType to the NUMERIC/DECIMAL switch branch in a separate method.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238694488
  
    --- Diff: src/test/org/apache/sqoop/importjob/numerictypes/NumericTypesAvroImportTestBase.java ---
    @@ -0,0 +1,59 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + * <p>
    + * http://www.apache.org/licenses/LICENSE-2.0
    + * <p>
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.sqoop.importjob.numerictypes;
    +
    +import org.apache.commons.logging.Log;
    +import org.apache.commons.logging.LogFactory;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.sqoop.importjob.configuration.AvroTestConfiguration;
    +import org.apache.sqoop.testutil.ArgumentArrayBuilder;
    +import org.apache.sqoop.testutil.AvroTestUtils;
    +import org.apache.sqoop.testutil.NumericTypesTestUtils;
    +import org.junit.Before;
    +
    +public abstract class NumericTypesAvroImportTestBase<T extends AvroTestConfiguration> extends NumericTypesImportTestBase<T>  {
    +
    +  public static final Log LOG = LogFactory.getLog(NumericTypesAvroImportTestBase.class.getName());
    +
    +  public NumericTypesAvroImportTestBase(T configuration, boolean failWithoutExtraArgs, boolean failWithPaddingOnly) {
    +    super(configuration, failWithoutExtraArgs, failWithPaddingOnly);
    +  }
    +
    +  @Before
    +  public void setUp() {
    +    super.setUp();
    +    tableDirPath = new Path(getWarehouseDir() + "/" + getTableName());
    --- End diff --
    
    Nope.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r239108068
  
    --- Diff: src/test/org/apache/sqoop/importjob/numerictypes/NumericTypesParquetImportTestBase.java ---
    @@ -0,0 +1,83 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + * <p>
    + * http://www.apache.org/licenses/LICENSE-2.0
    + * <p>
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.sqoop.importjob.numerictypes;
    +
    +import org.apache.commons.logging.Log;
    +import org.apache.commons.logging.LogFactory;
    +import org.apache.hadoop.fs.Path;
    --- End diff --
    
    Unused import


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238707092
  
    --- Diff: src/test/org/apache/sqoop/importjob/numerictypes/NumericTypesImportTestBase.java ---
    @@ -65,240 +46,79 @@
      * 2. Decimal padding during avro or parquet import
      * In case of Oracle and Postgres, Sqoop has to pad the values with 0s to avoid errors.
      */
    -public abstract class NumericTypesImportTestBase<T extends AvroTestConfiguration & ParquetTestConfiguration> extends ImportJobTestCase implements DatabaseAdapterFactory {
    +public abstract class NumericTypesImportTestBase<T extends ImportJobTestConfiguration> extends ThirdPartyTestBase<T>  {
     
       public static final Log LOG = LogFactory.getLog(NumericTypesImportTestBase.class.getName());
     
    -  private Configuration conf = new Configuration();
    -
    -  private final T configuration;
    -  private final DatabaseAdapter adapter;
       private final boolean failWithoutExtraArgs;
       private final boolean failWithPadding;
     
    -  // Constants for the basic test case, that doesn't use extra arguments
    -  // that are required to avoid errors, i.e. padding and default precision and scale.
    -  protected final static boolean SUCCEED_WITHOUT_EXTRA_ARGS = false;
    -  protected final static boolean FAIL_WITHOUT_EXTRA_ARGS = true;
    -
    -  // Constants for the test case that has padding specified but not default precision and scale.
    -  protected final static boolean SUCCEED_WITH_PADDING_ONLY = false;
    -  protected final static boolean FAIL_WITH_PADDING_ONLY = true;
    -
    -  private Path tableDirPath;
    -
       public NumericTypesImportTestBase(T configuration, boolean failWithoutExtraArgs, boolean failWithPaddingOnly) {
    -    this.adapter = createAdapter();
    -    this.configuration = configuration;
    +    super(configuration);
         this.failWithoutExtraArgs = failWithoutExtraArgs;
         this.failWithPadding = failWithPaddingOnly;
       }
     
    -  @Rule
    -  public ExpectedException thrown = ExpectedException.none();
    -
    -  @Override
    -  protected Configuration getConf() {
    -    return conf;
    -  }
    -
    -  @Override
    -  protected boolean useHsqldbTestServer() {
    -    return false;
    -  }
    -
    -  @Override
    -  protected String getConnectString() {
    -    return adapter.getConnectionString();
    -  }
    -
    -  @Override
    -  protected SqoopOptions getSqoopOptions(Configuration conf) {
    -    SqoopOptions opts = new SqoopOptions(conf);
    -    adapter.injectConnectionParameters(opts);
    -    return opts;
    -  }
    -
    -  @Override
    -  protected void dropTableIfExists(String table) throws SQLException {
    -    adapter.dropTableIfExists(table, getManager());
    -  }
    -
       @Before
       public void setUp() {
         super.setUp();
    -    String[] names = configuration.getNames();
    -    String[] types = configuration.getTypes();
    -    createTableWithColTypesAndNames(names, types, new String[0]);
    -    List<String[]> inputData = configuration.getSampleData();
    -    for (String[] input  : inputData) {
    -      insertIntoTable(names, types, input);
    -    }
         tableDirPath = new Path(getWarehouseDir() + "/" + getTableName());
       }
     
    -  @After
    -  public void tearDown() {
    -    try {
    -      dropTableIfExists(getTableName());
    -    } catch (SQLException e) {
    -      LOG.warn("Error trying to drop table on tearDown: " + e);
    -    }
    -    super.tearDown();
    -  }
    +  public Path tableDirPath;
     
    -  private ArgumentArrayBuilder getArgsBuilder(SqoopOptions.FileLayout fileLayout) {
    -    ArgumentArrayBuilder builder = new ArgumentArrayBuilder();
    -    if (AvroDataFile.equals(fileLayout)) {
    -      builder.withOption("as-avrodatafile");
    -    }
    -    else if (ParquetFile.equals(fileLayout)) {
    -      builder.withOption("as-parquetfile");
    -    }
    +  @Rule
    +  public ExpectedException thrown = ExpectedException.none();
    +
    +  abstract public ArgumentArrayBuilder getArgsBuilder();
    +  abstract public void verify();
     
    +  public ArgumentArrayBuilder includeCommonOptions(ArgumentArrayBuilder builder) {
         return builder.withCommonHadoopFlags(true)
             .withOption("warehouse-dir", getWarehouseDir())
             .withOption("num-mappers", "1")
             .withOption("table", getTableName())
             .withOption("connect", getConnectString());
       }
     
    -  /**
    -   * Adds properties to the given arg builder for decimal precision and scale.
    -   * @param builder
    -   */
    -  private void addPrecisionAndScale(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.avro.logical_types.decimal.default.precision", "38");
    -    builder.withProperty("sqoop.avro.logical_types.decimal.default.scale", "3");
    -  }
    -
    -  /**
    -   * Enables padding for decimals in avro and parquet import.
    -   * @param builder
    -   */
    -  private void addPadding(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.avro.decimal_padding.enable", "true");
    -  }
    -
    -  private void addEnableAvroDecimal(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.avro.logical_types.decimal.enable", "true");
    -  }
    -
    -  private void addEnableParquetDecimal(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.parquet.logical_types.decimal.enable", "true");
    -  }
     
    -  private void configureJunitToExpectFailure(boolean failWithPadding) {
    -    if (failWithPadding) {
    +  @Test
    +  public void testImportWithoutPadding() throws IOException {
    +    if(failWithoutExtraArgs){
           thrown.expect(IOException.class);
           thrown.expectMessage("Failure during job; return status 1");
    +      NumericTypesTestUtils.configureJunitToExpectFailure(thrown);
    --- End diff --
    
    Indeed! Removed lines above.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238251774
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -37,16 +42,28 @@
       private static final String HIVE_TYPE_STRING = "STRING";
       private static final String HIVE_TYPE_BOOLEAN = "BOOLEAN";
       private static final String HIVE_TYPE_BINARY = "BINARY";
    +  private static final String HIVE_TYPE_DECIMAL = "DECIMAL";
     
       public static final Log LOG = LogFactory.getLog(HiveTypes.class.getName());
     
       private HiveTypes() { }
     
    +
    +  public static String toHiveType(int sqlType, SqoopOptions options) {
    +
    +    if (options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false)
    +        && (sqlType == Types.NUMERIC || sqlType == Types.DECIMAL)){
    +      return HIVE_TYPE_DECIMAL;
    +    }
    +    return toHiveType(sqlType);
    +  }
    +
    +
       /**
        * Given JDBC SQL types coming from another database, what is the best
        * mapping to a Hive-specific type?
        */
    -  public static String toHiveType(int sqlType) {
    +  private static String toHiveType(int sqlType) {
    --- End diff --
    
    After taking another look at this file, it turns out that this method is only called textfile import. Since my only intended use case is parquetfile, I reverted this part of the change.
    
    The implementation will be throwing a RuntimeException (for mapping error) and log warnings in case of a parquet files, as you've suggested.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r239008539
  
    --- Diff: src/test/org/apache/sqoop/hive/TestHiveTypesForAvroTypeMapping.java ---
    @@ -41,30 +44,49 @@
     
       private final String hiveType;
       private final Schema schema;
    +  private final SqoopOptions options;
     
    -  @Parameters(name = "hiveType = {0}, schema = {1}")
    +  @Parameters(name = "hiveType = {0}, schema = {1}, options = {2}")
    --- End diff --
    
    removed


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r239008265
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -83,27 +89,58 @@ public static String toHiveType(int sqlType) {
           }
       }
     
    -  public static String toHiveType(Schema.Type avroType) {
    -      switch (avroType) {
    -        case BOOLEAN:
    -          return HIVE_TYPE_BOOLEAN;
    -        case INT:
    -          return HIVE_TYPE_INT;
    -        case LONG:
    -          return HIVE_TYPE_BIGINT;
    -        case FLOAT:
    -          return HIVE_TYPE_FLOAT;
    -        case DOUBLE:
    -          return HIVE_TYPE_DOUBLE;
    -        case STRING:
    -        case ENUM:
    -          return HIVE_TYPE_STRING;
    -        case BYTES:
    -        case FIXED:
    -          return HIVE_TYPE_BINARY;
    -        default:
    -          return null;
    +  public static String toHiveType(Schema schema, SqoopOptions options) {
    +    if (schema.getType() == Schema.Type.UNION) {
    +      for (Schema subSchema : schema.getTypes()) {
    +        if (subSchema.getType() != Schema.Type.NULL) {
    +          return toHiveType(subSchema, options);
    +        }
    +      }
    +    }
    +
    +    Schema.Type avroType = schema.getType();
    +    switch (avroType) {
    +      case BOOLEAN:
    +        return HIVE_TYPE_BOOLEAN;
    +      case INT:
    +        return HIVE_TYPE_INT;
    +      case LONG:
    +        return HIVE_TYPE_BIGINT;
    +      case FLOAT:
    +        return HIVE_TYPE_FLOAT;
    +      case DOUBLE:
    +        return HIVE_TYPE_DOUBLE;
    +      case STRING:
    +      case ENUM:
    +        return HIVE_TYPE_STRING;
    +      case BYTES:
    +        return mapToDecimalOrBinary(schema, options);
    +      case FIXED:
    +        return HIVE_TYPE_BINARY;
    +      default:
    +        throw new RuntimeException(String.format("There is no Hive type mapping defined for the Avro type of: %s ", avroType.getName()));
    +    }
    +  }
    +
    +  private static String mapToDecimalOrBinary(Schema schema, SqoopOptions options) {
    +    boolean logicalTypesEnabled = options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false);
    +    if (logicalTypesEnabled && schema.getLogicalType() != null && schema.getLogicalType() instanceof Decimal) {
    +      Decimal decimal = (Decimal) schema.getLogicalType();
    +
    +      // trimming precision and scale to Hive's maximum values.
    +      int precision = Math.min(HiveDecimal.MAX_PRECISION, decimal.getPrecision());
    +      if (precision < decimal.getPrecision()) {
    +        LOG.warn("Warning! Precision in the Hive table definition will be smaller than the actual precision of the column on storage! Hive may not be able to read data from this column.");
    --- End diff --
    
    I created SQOOP-3418 to cover the documentation part.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238693735
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -83,27 +89,58 @@ public static String toHiveType(int sqlType) {
           }
       }
     
    -  public static String toHiveType(Schema.Type avroType) {
    -      switch (avroType) {
    -        case BOOLEAN:
    -          return HIVE_TYPE_BOOLEAN;
    -        case INT:
    -          return HIVE_TYPE_INT;
    -        case LONG:
    -          return HIVE_TYPE_BIGINT;
    -        case FLOAT:
    -          return HIVE_TYPE_FLOAT;
    -        case DOUBLE:
    -          return HIVE_TYPE_DOUBLE;
    -        case STRING:
    -        case ENUM:
    -          return HIVE_TYPE_STRING;
    -        case BYTES:
    -        case FIXED:
    -          return HIVE_TYPE_BINARY;
    -        default:
    -          return null;
    +  public static String toHiveType(Schema schema, SqoopOptions options) {
    +    if (schema.getType() == Schema.Type.UNION) {
    +      for (Schema subSchema : schema.getTypes()) {
    +        if (subSchema.getType() != Schema.Type.NULL) {
    +          return toHiveType(subSchema, options);
    +        }
    +      }
    +    }
    +
    +    Schema.Type avroType = schema.getType();
    +    switch (avroType) {
    +      case BOOLEAN:
    +        return HIVE_TYPE_BOOLEAN;
    +      case INT:
    +        return HIVE_TYPE_INT;
    +      case LONG:
    +        return HIVE_TYPE_BIGINT;
    +      case FLOAT:
    +        return HIVE_TYPE_FLOAT;
    +      case DOUBLE:
    +        return HIVE_TYPE_DOUBLE;
    +      case STRING:
    +      case ENUM:
    +        return HIVE_TYPE_STRING;
    +      case BYTES:
    +        return mapToDecimalOrBinary(schema, options);
    +      case FIXED:
    +        return HIVE_TYPE_BINARY;
    +      default:
    +        throw new RuntimeException(String.format("There is no Hive type mapping defined for the Avro type of: %s ", avroType.getName()));
    +    }
    +  }
    +
    +  private static String mapToDecimalOrBinary(Schema schema, SqoopOptions options) {
    +    boolean logicalTypesEnabled = options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false);
    +    if (logicalTypesEnabled && schema.getLogicalType() != null && schema.getLogicalType() instanceof Decimal) {
    +      Decimal decimal = (Decimal) schema.getLogicalType();
    +
    +      // trimming precision and scale to Hive's maximum values.
    +      int precision = Math.min(HiveDecimal.MAX_PRECISION, decimal.getPrecision());
    +      if (precision < decimal.getPrecision()) {
    +        LOG.warn("Warning! Precision in the Hive table definition will be smaller than the actual precision of the column on storage! Hive may not be able to read data from this column.");
    --- End diff --
    
    Do you think we should remove this warning? (I think, even if it's redundant, it's useful to write this out.)


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r237907910
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -79,8 +85,42 @@ public static String toHiveType(int sqlType) {
               default:
             // TODO(aaron): Support BINARY, VARBINARY, LONGVARBINARY, DISTINCT,
             // BLOB, ARRAY, STRUCT, REF, JAVA_OBJECT.
    -        return null;
    +            return null;
    +      }
    +  }
    +
    +  private static String mapDecimalsToHiveType(int sqlType, SqoopOptions options) {
    +    if (options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false)
    +        && (sqlType == Types.NUMERIC || sqlType == Types.DECIMAL)){
    +      return HIVE_TYPE_DECIMAL;
    +    }
    +    return HIVE_TYPE_DOUBLE;
    +  }
    +
    +
    +  public static String toHiveType(Schema schema) {
    +    if (schema.getType() == Schema.Type.UNION) {
    +      for (Schema subSchema : schema.getTypes()) {
    +        if (subSchema.getLogicalType() != null && subSchema.getLogicalType() instanceof Decimal) {
    --- End diff --
    
    Redundant null check: if subSchema.getLogicalType() is null then subSchema.getLogicalType() instanceof Decimal will be false


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r237894381
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -37,16 +42,28 @@
       private static final String HIVE_TYPE_STRING = "STRING";
       private static final String HIVE_TYPE_BOOLEAN = "BOOLEAN";
       private static final String HIVE_TYPE_BINARY = "BINARY";
    +  private static final String HIVE_TYPE_DECIMAL = "DECIMAL";
     
       public static final Log LOG = LogFactory.getLog(HiveTypes.class.getName());
     
       private HiveTypes() { }
     
    +
    +  public static String toHiveType(int sqlType, SqoopOptions options) {
    +
    +    if (options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false)
    +        && (sqlType == Types.NUMERIC || sqlType == Types.DECIMAL)){
    +      return HIVE_TYPE_DECIMAL;
    +    }
    +    return toHiveType(sqlType);
    +  }
    +
    +
       /**
        * Given JDBC SQL types coming from another database, what is the best
        * mapping to a Hive-specific type?
        */
    -  public static String toHiveType(int sqlType) {
    +  private static String toHiveType(int sqlType) {
    --- End diff --
    
    Actually, there is a method that defines extra mappings for hive types in OraOop:
    org.apache.sqoop.manager.oracle.OraOopConnManager#toHiveType 
    
    So we need to keep the "null" return value here, in order to allow for that to work. 
    
    I think it still makes sense to log a warning though.
    
    On the long run: it might make sense to refactor the ora oop connector, but I think it's out of the scope of this change.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238694723
  
    --- Diff: src/test/org/apache/sqoop/importjob/numerictypes/NumericTypesParquetImportTestBase.java ---
    @@ -0,0 +1,89 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + * <p>
    + * http://www.apache.org/licenses/LICENSE-2.0
    + * <p>
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.sqoop.importjob.numerictypes;
    +
    +import org.apache.commons.logging.Log;
    +import org.apache.commons.logging.LogFactory;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.parquet.schema.MessageType;
    +import org.apache.parquet.schema.OriginalType;
    +import org.apache.sqoop.importjob.configuration.ParquetTestConfiguration;
    +import org.apache.sqoop.testutil.ArgumentArrayBuilder;
    +import org.apache.sqoop.testutil.NumericTypesTestUtils;
    +import org.apache.sqoop.util.ParquetReader;
    +import org.junit.Before;
    +
    +import java.util.Arrays;
    +
    +import static org.junit.Assert.assertEquals;
    +
    +public abstract class NumericTypesParquetImportTestBase<T extends ParquetTestConfiguration> extends NumericTypesImportTestBase<T>  {
    +
    +  public static final Log LOG = LogFactory.getLog(NumericTypesParquetImportTestBase.class.getName());
    +
    +  public NumericTypesParquetImportTestBase(T configuration, boolean failWithoutExtraArgs, boolean failWithPaddingOnly) {
    +    super(configuration, failWithoutExtraArgs, failWithPaddingOnly);
    +  }
    +
    +  @Before
    +  public void setUp() {
    +    super.setUp();
    +    tableDirPath = new Path(getWarehouseDir() + "/" + getTableName());
    --- End diff --
    
    nope.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by maugly24 <gi...@git.apache.org>.
Github user maugly24 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r237701227
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -37,16 +42,28 @@
       private static final String HIVE_TYPE_STRING = "STRING";
       private static final String HIVE_TYPE_BOOLEAN = "BOOLEAN";
       private static final String HIVE_TYPE_BINARY = "BINARY";
    +  private static final String HIVE_TYPE_DECIMAL = "DECIMAL";
     
       public static final Log LOG = LogFactory.getLog(HiveTypes.class.getName());
     
       private HiveTypes() { }
     
    +
    +  public static String toHiveType(int sqlType, SqoopOptions options) {
    +
    +    if (options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false)
    +        && (sqlType == Types.NUMERIC || sqlType == Types.DECIMAL)){
    +      return HIVE_TYPE_DECIMAL;
    +    }
    +    return toHiveType(sqlType);
    +  }
    +
    +
       /**
        * Given JDBC SQL types coming from another database, what is the best
        * mapping to a Hive-specific type?
        */
    -  public static String toHiveType(int sqlType) {
    +  private static String toHiveType(int sqlType) {
    --- End diff --
    
    If you touch this class+method anyway would you mind modifying the underlying design in a way not to return null value?
    
    It would be quite error prone e.g. in case of TableDefWriter if the FileLayout is Parquet and by any mistake the type of a given column is not mapped then the execution path would not end up in "not supported" exception, but TableDefWriter would try to create the table with "NULL" datatype.
    
    IMHO this would be a great improvement here.
    
    I would also standardize it in a way not to throw IOException (e.g. getHiveColumnTypeForTextTable) and RuntimeException for the same problem, but go only with RuntimeException.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r239107983
  
    --- Diff: src/test/org/apache/sqoop/importjob/numerictypes/NumericTypesParquetImportTestBase.java ---
    @@ -0,0 +1,83 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + * <p>
    + * http://www.apache.org/licenses/LICENSE-2.0
    + * <p>
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.sqoop.importjob.numerictypes;
    +
    +import org.apache.commons.logging.Log;
    +import org.apache.commons.logging.LogFactory;
    +import org.apache.hadoop.fs.Path;
    +import org.apache.parquet.schema.MessageType;
    +import org.apache.parquet.schema.OriginalType;
    +import org.apache.sqoop.importjob.configuration.ParquetTestConfiguration;
    +import org.apache.sqoop.testutil.ArgumentArrayBuilder;
    +import org.apache.sqoop.testutil.NumericTypesTestUtils;
    +import org.apache.sqoop.util.ParquetReader;
    +import org.junit.Before;
    --- End diff --
    
    Unused import


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238693021
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -79,8 +85,42 @@ public static String toHiveType(int sqlType) {
               default:
             // TODO(aaron): Support BINARY, VARBINARY, LONGVARBINARY, DISTINCT,
             // BLOB, ARRAY, STRUCT, REF, JAVA_OBJECT.
    -        return null;
    +            return null;
    +      }
    +  }
    +
    +  private static String mapDecimalsToHiveType(int sqlType, SqoopOptions options) {
    +    if (options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false)
    +        && (sqlType == Types.NUMERIC || sqlType == Types.DECIMAL)){
    +      return HIVE_TYPE_DECIMAL;
    +    }
    +    return HIVE_TYPE_DOUBLE;
    +  }
    +
    +
    +  public static String toHiveType(Schema schema) {
    +    if (schema.getType() == Schema.Type.UNION) {
    +      for (Schema subSchema : schema.getTypes()) {
    +        if (subSchema.getLogicalType() != null && subSchema.getLogicalType() instanceof Decimal) {
    --- End diff --
    
    I learn something new every day :), removed.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r237907664
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -79,8 +85,42 @@ public static String toHiveType(int sqlType) {
               default:
             // TODO(aaron): Support BINARY, VARBINARY, LONGVARBINARY, DISTINCT,
             // BLOB, ARRAY, STRUCT, REF, JAVA_OBJECT.
    -        return null;
    +            return null;
    +      }
    +  }
    +
    +  private static String mapDecimalsToHiveType(int sqlType, SqoopOptions options) {
    +    if (options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false)
    +        && (sqlType == Types.NUMERIC || sqlType == Types.DECIMAL)){
    --- End diff --
    
    Do we need to test for NUMERIC and DECIMAL types again here?


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by maugly24 <gi...@git.apache.org>.
Github user maugly24 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238013139
  
    --- Diff: src/test/org/apache/sqoop/importjob/configuration/MysqlImportJobTestConfiguration.java ---
    @@ -65,4 +66,21 @@
       public String toString() {
         return getClass().getSimpleName();
       }
    +
    +  @Override
    +  public Object[] getExpectedResultsForHive() {
    --- End diff --
    
    Hi @fszabo2 ,
    
    If that's the case:
    Could you please at least add some information about the intention behind the values (A.K.A. explanatory comments by Uncle Bob), b/c right now for example if this test would fail I would not have much clue about what values do we have here, and why.
    
    I'm thinking about something like:
    this value is an int, XXX precision, so it should be YYYYY in Parquet/Hive
    
    or something like
    this value represents the biggest the longest possible values stored as type ZZZ, thus it contains N digits, and should be KKKK in Hive.
    
    Could we add these information, thus put these "Magic numbers" into a context?


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r237881838
  
    --- Diff: src/test/org/apache/sqoop/importjob/configuration/MysqlImportJobTestConfiguration.java ---
    @@ -65,4 +66,21 @@
       public String toString() {
         return getClass().getSimpleName();
       }
    +
    +  @Override
    +  public Object[] getExpectedResultsForHive() {
    --- End diff --
    
    Hi Attila, 
    
    I'd like to leave these as is.
    
    I'd argue that these don't actually belong together, so it's not really code duplication but rather an accidental match.
    
    I added a new column to the sample data of the mysql configuration to cover an edge case. The expected result in Hive changed for this configuration, and, on the other hand this edge case does not make sense to be added to the SQL server tests.
    



---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r239057124
  
    --- Diff: src/test/org/apache/sqoop/hive/TestHiveTypesForAvroTypeMapping.java ---
    @@ -41,30 +44,49 @@
     
       private final String hiveType;
       private final Schema schema;
    +  private final SqoopOptions options;
     
    -  @Parameters(name = "hiveType = {0}, schema = {1}")
    +  @Parameters(name = "hiveType = {0}, schema = {1}, options = {2}")
       public static Iterable<? extends Object> parameters() {
         return Arrays.asList(
    -        new Object[]{"BOOLEAN", Schema.create(Schema.Type.BOOLEAN)},
    -        new Object[]{"INT", Schema.create(Schema.Type.INT)},
    -        new Object[]{"BIGINT", Schema.create(Schema.Type.LONG)},
    -        new Object[]{"FLOAT", Schema.create(Schema.Type.FLOAT)},
    -        new Object[]{"DOUBLE", Schema.create(Schema.Type.DOUBLE)},
    -        new Object[]{"STRING", Schema.createEnum("ENUM", "doc", "namespce", new ArrayList<>())}, // Schema.Type.ENUM
    -        new Object[]{"STRING", Schema.create(Schema.Type.STRING)},
    -        new Object[]{"BINARY", Schema.create(Schema.Type.BYTES)},
    -        new Object[]{"BINARY", Schema.createFixed("Fixed", "doc", "space", 1) }
    -        //, new Object[]{"DECIMAL", Schema.create(Schema.Type.UNION).}
    +        new Object[]{"BOOLEAN", Schema.create(Schema.Type.BOOLEAN), new SqoopOptions()},
    +        new Object[]{"INT", Schema.create(Schema.Type.INT), new SqoopOptions()},
    +        new Object[]{"BIGINT", Schema.create(Schema.Type.LONG), new SqoopOptions()},
    +        new Object[]{"FLOAT", Schema.create(Schema.Type.FLOAT), new SqoopOptions()},
    +        new Object[]{"DOUBLE", Schema.create(Schema.Type.DOUBLE), new SqoopOptions()},
    +        new Object[]{"STRING", Schema.createEnum("ENUM", "doc", "namespce", new ArrayList<>()), new SqoopOptions()}, // Schema.Type.ENUM
    --- End diff --
    
    removed


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by szvasas <gi...@git.apache.org>.
Github user szvasas commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238334580
  
    --- Diff: src/test/org/apache/sqoop/importjob/numerictypes/NumericTypesImportTestBase.java ---
    @@ -65,240 +46,79 @@
      * 2. Decimal padding during avro or parquet import
      * In case of Oracle and Postgres, Sqoop has to pad the values with 0s to avoid errors.
      */
    -public abstract class NumericTypesImportTestBase<T extends AvroTestConfiguration & ParquetTestConfiguration> extends ImportJobTestCase implements DatabaseAdapterFactory {
    +public abstract class NumericTypesImportTestBase<T extends ImportJobTestConfiguration> extends ThirdPartyTestBase<T>  {
     
       public static final Log LOG = LogFactory.getLog(NumericTypesImportTestBase.class.getName());
     
    -  private Configuration conf = new Configuration();
    -
    -  private final T configuration;
    -  private final DatabaseAdapter adapter;
       private final boolean failWithoutExtraArgs;
       private final boolean failWithPadding;
     
    -  // Constants for the basic test case, that doesn't use extra arguments
    -  // that are required to avoid errors, i.e. padding and default precision and scale.
    -  protected final static boolean SUCCEED_WITHOUT_EXTRA_ARGS = false;
    -  protected final static boolean FAIL_WITHOUT_EXTRA_ARGS = true;
    -
    -  // Constants for the test case that has padding specified but not default precision and scale.
    -  protected final static boolean SUCCEED_WITH_PADDING_ONLY = false;
    -  protected final static boolean FAIL_WITH_PADDING_ONLY = true;
    -
    -  private Path tableDirPath;
    -
       public NumericTypesImportTestBase(T configuration, boolean failWithoutExtraArgs, boolean failWithPaddingOnly) {
    -    this.adapter = createAdapter();
    -    this.configuration = configuration;
    +    super(configuration);
         this.failWithoutExtraArgs = failWithoutExtraArgs;
         this.failWithPadding = failWithPaddingOnly;
       }
     
    -  @Rule
    -  public ExpectedException thrown = ExpectedException.none();
    -
    -  @Override
    -  protected Configuration getConf() {
    -    return conf;
    -  }
    -
    -  @Override
    -  protected boolean useHsqldbTestServer() {
    -    return false;
    -  }
    -
    -  @Override
    -  protected String getConnectString() {
    -    return adapter.getConnectionString();
    -  }
    -
    -  @Override
    -  protected SqoopOptions getSqoopOptions(Configuration conf) {
    -    SqoopOptions opts = new SqoopOptions(conf);
    -    adapter.injectConnectionParameters(opts);
    -    return opts;
    -  }
    -
    -  @Override
    -  protected void dropTableIfExists(String table) throws SQLException {
    -    adapter.dropTableIfExists(table, getManager());
    -  }
    -
       @Before
       public void setUp() {
         super.setUp();
    -    String[] names = configuration.getNames();
    -    String[] types = configuration.getTypes();
    -    createTableWithColTypesAndNames(names, types, new String[0]);
    -    List<String[]> inputData = configuration.getSampleData();
    -    for (String[] input  : inputData) {
    -      insertIntoTable(names, types, input);
    -    }
         tableDirPath = new Path(getWarehouseDir() + "/" + getTableName());
       }
     
    -  @After
    -  public void tearDown() {
    -    try {
    -      dropTableIfExists(getTableName());
    -    } catch (SQLException e) {
    -      LOG.warn("Error trying to drop table on tearDown: " + e);
    -    }
    -    super.tearDown();
    -  }
    +  public Path tableDirPath;
     
    -  private ArgumentArrayBuilder getArgsBuilder(SqoopOptions.FileLayout fileLayout) {
    -    ArgumentArrayBuilder builder = new ArgumentArrayBuilder();
    -    if (AvroDataFile.equals(fileLayout)) {
    -      builder.withOption("as-avrodatafile");
    -    }
    -    else if (ParquetFile.equals(fileLayout)) {
    -      builder.withOption("as-parquetfile");
    -    }
    +  @Rule
    +  public ExpectedException thrown = ExpectedException.none();
    +
    +  abstract public ArgumentArrayBuilder getArgsBuilder();
    +  abstract public void verify();
     
    +  public ArgumentArrayBuilder includeCommonOptions(ArgumentArrayBuilder builder) {
         return builder.withCommonHadoopFlags(true)
             .withOption("warehouse-dir", getWarehouseDir())
             .withOption("num-mappers", "1")
             .withOption("table", getTableName())
             .withOption("connect", getConnectString());
       }
     
    -  /**
    -   * Adds properties to the given arg builder for decimal precision and scale.
    -   * @param builder
    -   */
    -  private void addPrecisionAndScale(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.avro.logical_types.decimal.default.precision", "38");
    -    builder.withProperty("sqoop.avro.logical_types.decimal.default.scale", "3");
    -  }
    -
    -  /**
    -   * Enables padding for decimals in avro and parquet import.
    -   * @param builder
    -   */
    -  private void addPadding(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.avro.decimal_padding.enable", "true");
    -  }
    -
    -  private void addEnableAvroDecimal(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.avro.logical_types.decimal.enable", "true");
    -  }
    -
    -  private void addEnableParquetDecimal(ArgumentArrayBuilder builder) {
    -    builder.withProperty("sqoop.parquet.logical_types.decimal.enable", "true");
    -  }
     
    -  private void configureJunitToExpectFailure(boolean failWithPadding) {
    -    if (failWithPadding) {
    +  @Test
    +  public void testImportWithoutPadding() throws IOException {
    +    if(failWithoutExtraArgs){
           thrown.expect(IOException.class);
           thrown.expectMessage("Failure during job; return status 1");
    +      NumericTypesTestUtils.configureJunitToExpectFailure(thrown);
    --- End diff --
    
    This method seems to be doing the same thing as the previous line.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r238691269
  
    --- Diff: src/java/org/apache/sqoop/hive/HiveTypes.java ---
    @@ -79,8 +85,42 @@ public static String toHiveType(int sqlType) {
               default:
             // TODO(aaron): Support BINARY, VARBINARY, LONGVARBINARY, DISTINCT,
             // BLOB, ARRAY, STRUCT, REF, JAVA_OBJECT.
    -        return null;
    +            return null;
    +      }
    +  }
    +
    +  private static String mapDecimalsToHiveType(int sqlType, SqoopOptions options) {
    +    if (options.getConf().getBoolean(ConfigurationConstants.PROP_ENABLE_PARQUET_LOGICAL_TYPE_DECIMAL, false)
    +        && (sqlType == Types.NUMERIC || sqlType == Types.DECIMAL)){
    --- End diff --
    
    This piece of code was reverted. 


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r239057294
  
    --- Diff: src/test/org/apache/sqoop/hive/TestHiveTypesForAvroTypeMapping.java ---
    @@ -41,30 +44,49 @@
     
       private final String hiveType;
       private final Schema schema;
    +  private final SqoopOptions options;
     
    -  @Parameters(name = "hiveType = {0}, schema = {1}")
    +  @Parameters(name = "hiveType = {0}, schema = {1}, options = {2}")
       public static Iterable<? extends Object> parameters() {
         return Arrays.asList(
    -        new Object[]{"BOOLEAN", Schema.create(Schema.Type.BOOLEAN)},
    -        new Object[]{"INT", Schema.create(Schema.Type.INT)},
    -        new Object[]{"BIGINT", Schema.create(Schema.Type.LONG)},
    -        new Object[]{"FLOAT", Schema.create(Schema.Type.FLOAT)},
    -        new Object[]{"DOUBLE", Schema.create(Schema.Type.DOUBLE)},
    -        new Object[]{"STRING", Schema.createEnum("ENUM", "doc", "namespce", new ArrayList<>())}, // Schema.Type.ENUM
    -        new Object[]{"STRING", Schema.create(Schema.Type.STRING)},
    -        new Object[]{"BINARY", Schema.create(Schema.Type.BYTES)},
    -        new Object[]{"BINARY", Schema.createFixed("Fixed", "doc", "space", 1) }
    -        //, new Object[]{"DECIMAL", Schema.create(Schema.Type.UNION).}
    +        new Object[]{"BOOLEAN", Schema.create(Schema.Type.BOOLEAN), new SqoopOptions()},
    --- End diff --
    
    I've added a bunch of static imports (also for Schema.Type) so now the whole thing is a lot more readable.


---

[GitHub] sqoop pull request #60: SQOOP-3396: Add parquet numeric support for Parquet ...

Posted by fszabo2 <gi...@git.apache.org>.
Github user fszabo2 commented on a diff in the pull request:

    https://github.com/apache/sqoop/pull/60#discussion_r237817782
  
    --- Diff: src/test/org/apache/sqoop/hive/numerictypes/NumericTypesHiveImportTest.java ---
    @@ -0,0 +1,202 @@
    +/**
    + * Licensed to the Apache Software Foundation (ASF) under one
    + * or more contributor license agreements.  See the NOTICE file
    + * distributed with this work for additional information
    + * regarding copyright ownership.  The ASF licenses this file
    + * to you under the Apache License, Version 2.0 (the
    + * "License"); you may not use this file except in compliance
    + * with the License.  You may obtain a copy of the License at
    + * <p>
    + * http://www.apache.org/licenses/LICENSE-2.0
    + * <p>
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.sqoop.hive.numerictypes;
    +
    +import org.apache.sqoop.hive.minicluster.HiveMiniCluster;
    +import org.apache.sqoop.hive.minicluster.NoAuthenticationConfiguration;
    +import org.apache.sqoop.importjob.configuration.HiveTestConfiguration;
    +import org.apache.sqoop.importjob.configuration.MysqlImportJobTestConfiguration;
    +import org.apache.sqoop.importjob.configuration.OracleImportJobTestConfiguration;
    +import org.apache.sqoop.importjob.configuration.OracleImportJobTestConfigurationForNumber;
    +import org.apache.sqoop.importjob.configuration.PostgresqlImportJobTestConfigurationForNumeric;
    +import org.apache.sqoop.importjob.configuration.PostgresqlImportJobTestConfigurationPaddingShouldSucceed;
    +import org.apache.sqoop.importjob.configuration.SqlServerImportJobTestConfiguration;
    +import org.apache.sqoop.testcategories.sqooptest.IntegrationTest;
    +import org.apache.sqoop.testcategories.thirdpartytest.MysqlTest;
    +import org.apache.sqoop.testcategories.thirdpartytest.OracleTest;
    +import org.apache.sqoop.testcategories.thirdpartytest.PostgresqlTest;
    +import org.apache.sqoop.testcategories.thirdpartytest.SqlServerTest;
    +import org.apache.sqoop.testutil.HiveServer2TestUtil;
    +import org.apache.sqoop.testutil.NumericTypesTestUtils;
    +import org.apache.sqoop.testutil.adapter.DatabaseAdapter;
    +import org.apache.sqoop.testutil.adapter.MysqlDatabaseAdapter;
    +import org.apache.sqoop.testutil.adapter.OracleDatabaseAdapter;
    +import org.apache.sqoop.testutil.adapter.PostgresDatabaseAdapter;
    +import org.apache.sqoop.testutil.adapter.SqlServerDatabaseAdapter;
    +import org.apache.sqoop.util.BlockJUnit4ClassRunnerWithParametersFactory;
    +import org.junit.AfterClass;
    +import org.junit.BeforeClass;
    +import org.junit.Rule;
    +import org.junit.experimental.categories.Category;
    +import org.junit.experimental.runners.Enclosed;
    +import org.junit.rules.ExpectedException;
    +import org.junit.runner.RunWith;
    +import org.junit.runners.Parameterized;
    +
    +import java.util.Arrays;
    +
    +import static org.apache.sqoop.testutil.NumericTypesTestUtils.FAIL_WITHOUT_EXTRA_ARGS;
    +import static org.apache.sqoop.testutil.NumericTypesTestUtils.FAIL_WITH_PADDING_ONLY;
    +import static org.apache.sqoop.testutil.NumericTypesTestUtils.SUCCEED_WITHOUT_EXTRA_ARGS;
    +import static org.apache.sqoop.testutil.NumericTypesTestUtils.SUCCEED_WITH_PADDING_ONLY;
    +
    +@RunWith(Enclosed.class)
    +@Category(IntegrationTest.class)
    +public class NumericTypesHiveImportTest {
    +
    +  @Rule
    +  public ExpectedException expectedException = ExpectedException.none();
    +
    +  private static HiveMiniCluster hiveMiniCluster;
    +
    +  private static HiveServer2TestUtil hiveServer2TestUtil;
    +
    +  @BeforeClass
    +  public static void beforeClass() {
    +    startHiveMiniCluster();
    +  }
    +
    +  @AfterClass
    +  public static void afterClass() {
    +    stopHiveMiniCluster();
    +  }
    +
    +  public static void startHiveMiniCluster() {
    +    hiveMiniCluster = new HiveMiniCluster(new NoAuthenticationConfiguration());
    +    hiveMiniCluster.start();
    +    hiveServer2TestUtil = new HiveServer2TestUtil(hiveMiniCluster.getUrl());
    +  }
    +
    +  public static void stopHiveMiniCluster() {
    +    hiveMiniCluster.stop();
    +  }
    +
    +  @Category(MysqlTest.class)
    +  public static class MysqlNumericTypesHiveImportTest extends NumericTypesHiveImportTestBase {
    +
    +    public MysqlNumericTypesHiveImportTest() {
    +      super(new MysqlImportJobTestConfiguration(), NumericTypesTestUtils.SUCCEED_WITHOUT_EXTRA_ARGS, NumericTypesTestUtils.SUCCEED_WITH_PADDING_ONLY);
    +    }
    +
    +    @Override
    +    public DatabaseAdapter createAdapter() {
    +      return new MysqlDatabaseAdapter();
    +    }
    +
    +    @Override
    +    protected HiveMiniCluster getHiveMiniCluster() {
    --- End diff --
    
    Makes sense!


---