You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@phoenix.apache.org by "Dawid Wysakowicz (JIRA)" <ji...@apache.org> on 2015/12/01 09:39:10 UTC

[jira] [Updated] (PHOENIX-2469) Problem with ARRAYs in Phoenix-spark

     [ https://issues.apache.org/jira/browse/PHOENIX-2469?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Dawid Wysakowicz updated PHOENIX-2469:
--------------------------------------
    Description: 
I've recently found some behaviour that I found buggy when working with phoenix-spark and arrays.

Take a look at those unit tests:

{code:java}
  test("Can save arrays from custom dataframes back to phoenix") {
    val dataSet = List(Row(2L, Array("String1", "String2", "String3")))

    val sqlContext = new SQLContext(sc)

    val schema = StructType(
        Seq(StructField("ID", LongType, nullable = false),
            StructField("VCARRAY", ArrayType(StringType))))

    val rowRDD = sc.parallelize(dataSet)

    // Apply the schema to the RDD.
    val df = sqlContext.createDataFrame(rowRDD, schema)

    df.write
      .format("org.apache.phoenix.spark")
      .options(Map("table" -> "ARRAY_TEST_TABLE", "zkUrl" -> quorumAddress))
      .mode(SaveMode.Overwrite)
      .save()
  }
{code}

{code:java}
  test("Can save arrays of AnyVal type back to phoenix") {
    val dataSet = List((2L, Array(1, 2, 3), Array(1L, 2L, 3L)))

    sc
      .parallelize(dataSet)
      .saveToPhoenix(
        "ARRAY_ANYVAL_TEST_TABLE",
        Seq("ID", "INTARRAY", "BIGINTARRAY"),
        zkUrl = Some(quorumAddress)
      )

    // Load the results back
    val stmt = conn.createStatement()
    val rs = stmt.executeQuery("SELECT INTARRAY, BIGINTARRAY FROM ARRAY_ANYVAL_TEST_TABLE WHERE ID = 2")
    rs.next()
    val intArray = rs.getArray(1).getArray().asInstanceOf[Array[Int]]
    val longArray = rs.getArray(2).getArray().asInstanceOf[Array[Long]]

    // Verify the arrays are equal
    intArray shouldEqual dataSet(0)._2
    longArray shouldEqual dataSet(0)._3
  }
{code}

Both fail with some ClassCastExceptions.


  was:
I've recently found some behaviour that I found buggy when working with phoenix-spark and arrays.

Take a look at those unit tests:

  test("Can save arrays from custom dataframes back to phoenix") {
    val dataSet = List(Row(2L, Array("String1", "String2", "String3")))

    val sqlContext = new SQLContext(sc)

    val schema = StructType(
        Seq(StructField("ID", LongType, nullable = false),
            StructField("VCARRAY", ArrayType(StringType))))

    val rowRDD = sc.parallelize(dataSet)

    // Apply the schema to the RDD.
    val df = sqlContext.createDataFrame(rowRDD, schema)

    df.write
      .format("org.apache.phoenix.spark")
      .options(Map("table" -> "ARRAY_TEST_TABLE", "zkUrl" -> quorumAddress))
      .mode(SaveMode.Overwrite)
      .save()
  }

  test("Can save arrays of AnyVal type back to phoenix") {
    val dataSet = List((2L, Array(1, 2, 3), Array(1L, 2L, 3L)))

    sc
      .parallelize(dataSet)
      .saveToPhoenix(
        "ARRAY_ANYVAL_TEST_TABLE",
        Seq("ID", "INTARRAY", "BIGINTARRAY"),
        zkUrl = Some(quorumAddress)
      )

    // Load the results back
    val stmt = conn.createStatement()
    val rs = stmt.executeQuery("SELECT INTARRAY, BIGINTARRAY FROM ARRAY_ANYVAL_TEST_TABLE WHERE ID = 2")
    rs.next()
    val intArray = rs.getArray(1).getArray().asInstanceOf[Array[Int]]
    val longArray = rs.getArray(2).getArray().asInstanceOf[Array[Long]]

    // Verify the arrays are equal
    intArray shouldEqual dataSet(0)._2
    longArray shouldEqual dataSet(0)._3
  }

Both fail with some ClassCastExceptions.



> Problem with ARRAYs in Phoenix-spark
> ------------------------------------
>
>                 Key: PHOENIX-2469
>                 URL: https://issues.apache.org/jira/browse/PHOENIX-2469
>             Project: Phoenix
>          Issue Type: Bug
>            Reporter: Dawid Wysakowicz
>
> I've recently found some behaviour that I found buggy when working with phoenix-spark and arrays.
> Take a look at those unit tests:
> {code:java}
>   test("Can save arrays from custom dataframes back to phoenix") {
>     val dataSet = List(Row(2L, Array("String1", "String2", "String3")))
>     val sqlContext = new SQLContext(sc)
>     val schema = StructType(
>         Seq(StructField("ID", LongType, nullable = false),
>             StructField("VCARRAY", ArrayType(StringType))))
>     val rowRDD = sc.parallelize(dataSet)
>     // Apply the schema to the RDD.
>     val df = sqlContext.createDataFrame(rowRDD, schema)
>     df.write
>       .format("org.apache.phoenix.spark")
>       .options(Map("table" -> "ARRAY_TEST_TABLE", "zkUrl" -> quorumAddress))
>       .mode(SaveMode.Overwrite)
>       .save()
>   }
> {code}
> {code:java}
>   test("Can save arrays of AnyVal type back to phoenix") {
>     val dataSet = List((2L, Array(1, 2, 3), Array(1L, 2L, 3L)))
>     sc
>       .parallelize(dataSet)
>       .saveToPhoenix(
>         "ARRAY_ANYVAL_TEST_TABLE",
>         Seq("ID", "INTARRAY", "BIGINTARRAY"),
>         zkUrl = Some(quorumAddress)
>       )
>     // Load the results back
>     val stmt = conn.createStatement()
>     val rs = stmt.executeQuery("SELECT INTARRAY, BIGINTARRAY FROM ARRAY_ANYVAL_TEST_TABLE WHERE ID = 2")
>     rs.next()
>     val intArray = rs.getArray(1).getArray().asInstanceOf[Array[Int]]
>     val longArray = rs.getArray(2).getArray().asInstanceOf[Array[Long]]
>     // Verify the arrays are equal
>     intArray shouldEqual dataSet(0)._2
>     longArray shouldEqual dataSet(0)._3
>   }
> {code}
> Both fail with some ClassCastExceptions.



--
This message was sent by Atlassian JIRA
(v6.3.4#6332)