You are viewing a plain text version of this content. The canonical link for it is here.
Posted to reviews@spark.apache.org by jaceklaskowski <gi...@git.apache.org> on 2018/11/16 06:12:38 UTC

[GitHub] spark pull request #21838: [SPARK-24811][SQL]Avro: add new function from_avr...

Github user jaceklaskowski commented on a diff in the pull request:

    https://github.com/apache/spark/pull/21838#discussion_r234099158
  
    --- Diff: external/avro/src/test/scala/org/apache/spark/sql/avro/AvroCatalystDataConversionSuite.scala ---
    @@ -0,0 +1,175 @@
    +/*
    + * Licensed to the Apache Software Foundation (ASF) under one or more
    + * contributor license agreements.  See the NOTICE file distributed with
    + * this work for additional information regarding copyright ownership.
    + * The ASF licenses this file to You under the Apache License, Version 2.0
    + * (the "License"); you may not use this file except in compliance with
    + * the License.  You may obtain a copy of the License at
    + *
    + *    http://www.apache.org/licenses/LICENSE-2.0
    + *
    + * Unless required by applicable law or agreed to in writing, software
    + * distributed under the License is distributed on an "AS IS" BASIS,
    + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    + * See the License for the specific language governing permissions and
    + * limitations under the License.
    + */
    +
    +package org.apache.spark.sql.avro
    +
    +import org.apache.avro.Schema
    +
    +import org.apache.spark.SparkFunSuite
    +import org.apache.spark.sql.{AvroDataToCatalyst, CatalystDataToAvro, RandomDataGenerator}
    +import org.apache.spark.sql.catalyst.{CatalystTypeConverters, InternalRow}
    +import org.apache.spark.sql.catalyst.expressions.{ExpressionEvalHelper, GenericInternalRow, Literal}
    +import org.apache.spark.sql.catalyst.util.{ArrayBasedMapData, GenericArrayData, MapData}
    +import org.apache.spark.sql.types._
    +import org.apache.spark.unsafe.types.UTF8String
    +
    +class AvroCatalystDataConversionSuite extends SparkFunSuite with ExpressionEvalHelper {
    +
    +  private def roundTripTest(data: Literal): Unit = {
    +    val avroType = SchemaConverters.toAvroType(data.dataType, data.nullable)
    +    checkResult(data, avroType.toString, data.eval())
    +  }
    +
    +  private def checkResult(data: Literal, schema: String, expected: Any): Unit = {
    +    checkEvaluation(
    +      AvroDataToCatalyst(CatalystDataToAvro(data), schema),
    +      prepareExpectedResult(expected))
    +  }
    +
    +  private def assertFail(data: Literal, schema: String): Unit = {
    +    intercept[java.io.EOFException] {
    +      AvroDataToCatalyst(CatalystDataToAvro(data), schema).eval()
    +    }
    +  }
    +
    +  private val testingTypes = Seq(
    +    BooleanType,
    +    ByteType,
    +    ShortType,
    +    IntegerType,
    +    LongType,
    +    FloatType,
    +    DoubleType,
    +    DecimalType(8, 0),   // 32 bits decimal without fraction
    +    DecimalType(8, 4),   // 32 bits decimal
    +    DecimalType(16, 0),  // 64 bits decimal without fraction
    +    DecimalType(16, 11), // 64 bits decimal
    +    DecimalType(38, 0),
    +    DecimalType(38, 38),
    +    StringType,
    +    BinaryType)
    +
    +  protected def prepareExpectedResult(expected: Any): Any = expected match {
    +    // Spark decimal is converted to avro string=
    +    case d: Decimal => UTF8String.fromString(d.toString)
    +    // Spark byte and short both map to avro int
    +    case b: Byte => b.toInt
    +    case s: Short => s.toInt
    +    case row: GenericInternalRow => InternalRow.fromSeq(row.values.map(prepareExpectedResult))
    +    case array: GenericArrayData => new GenericArrayData(array.array.map(prepareExpectedResult))
    +    case map: MapData =>
    +      val keys = new GenericArrayData(
    +        map.keyArray().asInstanceOf[GenericArrayData].array.map(prepareExpectedResult))
    +      val values = new GenericArrayData(
    +        map.valueArray().asInstanceOf[GenericArrayData].array.map(prepareExpectedResult))
    +      new ArrayBasedMapData(keys, values)
    +    case other => other
    +  }
    +
    +  testingTypes.foreach { dt =>
    +    val seed = scala.util.Random.nextLong()
    +    test(s"single $dt with seed $seed") {
    +      val rand = new scala.util.Random(seed)
    +      val data = RandomDataGenerator.forType(dt, rand = rand).get.apply()
    +      val converter = CatalystTypeConverters.createToCatalystConverter(dt)
    +      val input = Literal.create(converter(data), dt)
    +      roundTripTest(input)
    +    }
    +  }
    +
    +  for (_ <- 1 to 5) {
    --- End diff --
    
    Why not `(1 to 5).foreach`?


---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscribe@spark.apache.org
For additional commands, e-mail: reviews-help@spark.apache.org