You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@phoenix.apache.org by "Dawid Wysakowicz (JIRA)" <ji...@apache.org> on 2015/12/01 09:39:10 UTC
[jira] [Updated] (PHOENIX-2469) Problem with ARRAYs in
Phoenix-spark
[ https://issues.apache.org/jira/browse/PHOENIX-2469?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Dawid Wysakowicz updated PHOENIX-2469:
--------------------------------------
Description:
I've recently found some behaviour that I found buggy when working with phoenix-spark and arrays.
Take a look at those unit tests:
{code:java}
test("Can save arrays from custom dataframes back to phoenix") {
val dataSet = List(Row(2L, Array("String1", "String2", "String3")))
val sqlContext = new SQLContext(sc)
val schema = StructType(
Seq(StructField("ID", LongType, nullable = false),
StructField("VCARRAY", ArrayType(StringType))))
val rowRDD = sc.parallelize(dataSet)
// Apply the schema to the RDD.
val df = sqlContext.createDataFrame(rowRDD, schema)
df.write
.format("org.apache.phoenix.spark")
.options(Map("table" -> "ARRAY_TEST_TABLE", "zkUrl" -> quorumAddress))
.mode(SaveMode.Overwrite)
.save()
}
{code}
{code:java}
test("Can save arrays of AnyVal type back to phoenix") {
val dataSet = List((2L, Array(1, 2, 3), Array(1L, 2L, 3L)))
sc
.parallelize(dataSet)
.saveToPhoenix(
"ARRAY_ANYVAL_TEST_TABLE",
Seq("ID", "INTARRAY", "BIGINTARRAY"),
zkUrl = Some(quorumAddress)
)
// Load the results back
val stmt = conn.createStatement()
val rs = stmt.executeQuery("SELECT INTARRAY, BIGINTARRAY FROM ARRAY_ANYVAL_TEST_TABLE WHERE ID = 2")
rs.next()
val intArray = rs.getArray(1).getArray().asInstanceOf[Array[Int]]
val longArray = rs.getArray(2).getArray().asInstanceOf[Array[Long]]
// Verify the arrays are equal
intArray shouldEqual dataSet(0)._2
longArray shouldEqual dataSet(0)._3
}
{code}
Both fail with some ClassCastExceptions.
was:
I've recently found some behaviour that I found buggy when working with phoenix-spark and arrays.
Take a look at those unit tests:
test("Can save arrays from custom dataframes back to phoenix") {
val dataSet = List(Row(2L, Array("String1", "String2", "String3")))
val sqlContext = new SQLContext(sc)
val schema = StructType(
Seq(StructField("ID", LongType, nullable = false),
StructField("VCARRAY", ArrayType(StringType))))
val rowRDD = sc.parallelize(dataSet)
// Apply the schema to the RDD.
val df = sqlContext.createDataFrame(rowRDD, schema)
df.write
.format("org.apache.phoenix.spark")
.options(Map("table" -> "ARRAY_TEST_TABLE", "zkUrl" -> quorumAddress))
.mode(SaveMode.Overwrite)
.save()
}
test("Can save arrays of AnyVal type back to phoenix") {
val dataSet = List((2L, Array(1, 2, 3), Array(1L, 2L, 3L)))
sc
.parallelize(dataSet)
.saveToPhoenix(
"ARRAY_ANYVAL_TEST_TABLE",
Seq("ID", "INTARRAY", "BIGINTARRAY"),
zkUrl = Some(quorumAddress)
)
// Load the results back
val stmt = conn.createStatement()
val rs = stmt.executeQuery("SELECT INTARRAY, BIGINTARRAY FROM ARRAY_ANYVAL_TEST_TABLE WHERE ID = 2")
rs.next()
val intArray = rs.getArray(1).getArray().asInstanceOf[Array[Int]]
val longArray = rs.getArray(2).getArray().asInstanceOf[Array[Long]]
// Verify the arrays are equal
intArray shouldEqual dataSet(0)._2
longArray shouldEqual dataSet(0)._3
}
Both fail with some ClassCastExceptions.
> Problem with ARRAYs in Phoenix-spark
> ------------------------------------
>
> Key: PHOENIX-2469
> URL: https://issues.apache.org/jira/browse/PHOENIX-2469
> Project: Phoenix
> Issue Type: Bug
> Reporter: Dawid Wysakowicz
>
> I've recently found some behaviour that I found buggy when working with phoenix-spark and arrays.
> Take a look at those unit tests:
> {code:java}
> test("Can save arrays from custom dataframes back to phoenix") {
> val dataSet = List(Row(2L, Array("String1", "String2", "String3")))
> val sqlContext = new SQLContext(sc)
> val schema = StructType(
> Seq(StructField("ID", LongType, nullable = false),
> StructField("VCARRAY", ArrayType(StringType))))
> val rowRDD = sc.parallelize(dataSet)
> // Apply the schema to the RDD.
> val df = sqlContext.createDataFrame(rowRDD, schema)
> df.write
> .format("org.apache.phoenix.spark")
> .options(Map("table" -> "ARRAY_TEST_TABLE", "zkUrl" -> quorumAddress))
> .mode(SaveMode.Overwrite)
> .save()
> }
> {code}
> {code:java}
> test("Can save arrays of AnyVal type back to phoenix") {
> val dataSet = List((2L, Array(1, 2, 3), Array(1L, 2L, 3L)))
> sc
> .parallelize(dataSet)
> .saveToPhoenix(
> "ARRAY_ANYVAL_TEST_TABLE",
> Seq("ID", "INTARRAY", "BIGINTARRAY"),
> zkUrl = Some(quorumAddress)
> )
> // Load the results back
> val stmt = conn.createStatement()
> val rs = stmt.executeQuery("SELECT INTARRAY, BIGINTARRAY FROM ARRAY_ANYVAL_TEST_TABLE WHERE ID = 2")
> rs.next()
> val intArray = rs.getArray(1).getArray().asInstanceOf[Array[Int]]
> val longArray = rs.getArray(2).getArray().asInstanceOf[Array[Long]]
> // Verify the arrays are equal
> intArray shouldEqual dataSet(0)._2
> longArray shouldEqual dataSet(0)._3
> }
> {code}
> Both fail with some ClassCastExceptions.
--
This message was sent by Atlassian JIRA
(v6.3.4#6332)