You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by ra...@apache.org on 2015/04/07 09:20:07 UTC
phoenix git commit: PHOENIX-1818 - Move cluster-required tests to src/it

Repository: phoenix
Updated Branches:
  refs/heads/master 9ddb484aa -> f666baa27


PHOENIX-1818 - Move cluster-required tests to src/it


Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/f666baa2
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/f666baa2
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/f666baa2

Branch: refs/heads/master
Commit: f666baa27ed97cb08ba964c53df74907a53ce001
Parents: 9ddb484
Author: ravimagham <ra...@apache.org>
Authored: Tue Apr 7 00:19:21 2015 -0700
Committer: ravimagham <ra...@apache.org>
Committed: Tue Apr 7 00:19:21 2015 -0700

----------------------------------------------------------------------
 phoenix-spark/src/it/resources/log4j.xml        |  41 +++
 phoenix-spark/src/it/resources/setup.sql        |  18 +
 .../apache/phoenix/spark/PhoenixRDDTest.scala   | 333 +++++++++++++++++++
 phoenix-spark/src/test/resources/log4j.xml      |  41 ---
 phoenix-spark/src/test/resources/setup.sql      |  18 -
 .../apache/phoenix/spark/PhoenixRDDTest.scala   | 333 -------------------
 6 files changed, 392 insertions(+), 392 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/phoenix/blob/f666baa2/phoenix-spark/src/it/resources/log4j.xml
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/it/resources/log4j.xml b/phoenix-spark/src/it/resources/log4j.xml
new file mode 100644
index 0000000..d4799da
--- /dev/null
+++ b/phoenix-spark/src/it/resources/log4j.xml
@@ -0,0 +1,41 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
+
+<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
+  <appender name="console" class="org.apache.log4j.ConsoleAppender">
+    <param name="Target" value="System.out"/>
+
+    <layout class="org.apache.log4j.PatternLayout">
+      <param name="ConversionPattern" value="%-4r [%t] %-5p %c %x - %m%n"/>
+    </layout>
+  </appender>
+
+  <logger name="org.eclipse">
+    <level value="ERROR"/>
+  </logger>
+
+  <logger name="org.apache">
+    <level value="ERROR"/>
+  </logger>
+
+  <logger name = "org.apache.phoenix.mapreduce">
+    <level value="FATAL"/>
+  </logger>
+
+  <logger name="org.mortbay">
+    <level value="ERROR"/>
+  </logger>
+
+  <logger name="BlockStateChange">
+    <level value="ERROR"/>
+  </logger>
+
+  <logger name="io.netty">
+    <level value="ERROR"/>
+  </logger>
+
+  <root>
+    <priority value="INFO"/>
+    <appender-ref ref="console"/>
+  </root>
+</log4j:configuration>

http://git-wip-us.apache.org/repos/asf/phoenix/blob/f666baa2/phoenix-spark/src/it/resources/setup.sql
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/it/resources/setup.sql b/phoenix-spark/src/it/resources/setup.sql
new file mode 100644
index 0000000..14a7e7e
--- /dev/null
+++ b/phoenix-spark/src/it/resources/setup.sql
@@ -0,0 +1,18 @@
+CREATE TABLE table1 (id BIGINT NOT NULL PRIMARY KEY, col1 VARCHAR)
+CREATE TABLE table2 (id BIGINT NOT NULL PRIMARY KEY, table1_id BIGINT, "t2col1" VARCHAR)
+UPSERT INTO table1 (id, col1) VALUES (1, 'test_row_1')
+UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (1, 1, 'test_child_1')
+UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (2, 1, 'test_child_2')
+UPSERT INTO table1 (id, col1) VALUES (2, 'test_row_2')
+UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (3, 2, 'test_child_1')
+UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (4, 2, 'test_child_2')
+UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (5, 2, 'test_child_3')
+UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (6, 2, 'test_child_4')
+CREATE TABLE "table3" ("id" BIGINT NOT NULL PRIMARY KEY, "col1" VARCHAR)
+UPSERT INTO "table3" ("id", "col1") VALUES (1, 'foo')
+UPSERT INTO "table3" ("id", "col1") VALUES (2, 'bar')
+CREATE TABLE ARRAY_TEST_TABLE (ID BIGINT NOT NULL PRIMARY KEY, VCARRAY VARCHAR[])
+UPSERT INTO ARRAY_TEST_TABLE (ID, VCARRAY) VALUES (1, ARRAY['String1', 'String2', 'String3'])
+CREATE TABLE DATE_PREDICATE_TEST_TABLE (ID BIGINT NOT NULL, TIMESERIES_KEY TIMESTAMP NOT NULL CONSTRAINT pk PRIMARY KEY (ID, TIMESERIES_KEY))
+UPSERT INTO DATE_PREDICATE_TEST_TABLE (ID, TIMESERIES_KEY) VALUES (1, CAST(CURRENT_TIME() AS TIMESTAMP))
+CREATE TABLE OUTPUT_TEST_TABLE (id BIGINT NOT NULL PRIMARY KEY, col1 VARCHAR, col2 INTEGER, col3 DATE)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/phoenix/blob/f666baa2/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixRDDTest.scala
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixRDDTest.scala b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixRDDTest.scala
new file mode 100644
index 0000000..63cb6e4
--- /dev/null
+++ b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixRDDTest.scala
@@ -0,0 +1,333 @@
+/*
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.
+ */
+package org.apache.phoenix.spark
+
+import java.sql.{Connection, DriverManager}
+import java.util.Date
+
+import org.apache.hadoop.conf.Configuration
+import org.apache.hadoop.hbase.{HConstants, HBaseTestingUtility}
+import org.apache.phoenix.schema.ColumnNotFoundException
+import org.apache.phoenix.schema.types.PVarchar
+import org.apache.phoenix.util.ColumnInfo
+import org.apache.spark.sql.SQLContext
+import org.apache.spark.sql.types.{StringType, StructField}
+import org.apache.spark.{SparkConf, SparkContext}
+import org.joda.time.DateTime
+import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers}
+import org.apache.phoenix.spark._
+
+import scala.collection.mutable.ListBuffer
+
+class PhoenixRDDTest extends FunSuite with Matchers with BeforeAndAfterAll {
+  lazy val hbaseTestingUtility = {
+    new HBaseTestingUtility()
+  }
+
+  lazy val hbaseConfiguration = {
+    val conf = hbaseTestingUtility.getConfiguration
+
+    val quorum = conf.get("hbase.zookeeper.quorum")
+    val clientPort = conf.get("hbase.zookeeper.property.clientPort")
+    val znodeParent = conf.get("zookeeper.znode.parent")
+
+    // This is an odd one - the Zookeeper Quorum entry in the config is totally wrong. It's
+    // just reporting localhost.
+    conf.set(org.apache.hadoop.hbase.HConstants.ZOOKEEPER_QUORUM, s"$quorum:$clientPort:$znodeParent")
+
+    conf
+  }
+
+  lazy val quorumAddress = {
+    hbaseConfiguration.get("hbase.zookeeper.quorum")
+  }
+
+  lazy val zookeeperClientPort = {
+    hbaseConfiguration.get("hbase.zookeeper.property.clientPort")
+  }
+
+  lazy val zookeeperZnodeParent = {
+    hbaseConfiguration.get("zookeeper.znode.parent")
+  }
+
+  lazy val hbaseConnectionString = {
+    s"$quorumAddress:$zookeeperClientPort:$zookeeperZnodeParent"
+  }
+
+  var conn: Connection = _
+
+  override def beforeAll() {
+    hbaseTestingUtility.startMiniCluster()
+
+    conn = DriverManager.getConnection(s"jdbc:phoenix:$hbaseConnectionString")
+
+    conn.setAutoCommit(true)
+
+    // each SQL statement used to set up Phoenix must be on a single line. Yes, that
+    // can potentially make large lines.
+    val setupSqlSource = getClass.getClassLoader.getResourceAsStream("setup.sql")
+
+    val setupSql = scala.io.Source.fromInputStream(setupSqlSource).getLines()
+
+    for (sql <- setupSql) {
+      val stmt = conn.createStatement()
+
+      stmt.execute(sql)
+
+      stmt.close()
+    }
+
+    conn.commit()
+  }
+
+  override def afterAll() {
+    conn.close()
+    hbaseTestingUtility.shutdownMiniCluster()
+  }
+
+  val conf = new SparkConf().set("spark.ui.showConsoleProgress", "false")
+
+  val sc = new SparkContext("local[1]", "PhoenixSparkTest", conf)
+
+  def buildSql(table: String, columns: Seq[String], predicate: Option[String]): String = {
+    val query = "SELECT %s FROM \"%s\"" format(columns.map(f => "\"" + f + "\"").mkString(", "), table)
+
+    query + (predicate match {
+      case Some(p: String) => " WHERE " + p
+      case _ => ""
+    })
+  }
+
+  test("Can create valid SQL") {
+    val rdd = new PhoenixRDD(sc, "MyTable", Array("Foo", "Bar"),
+      conf = hbaseConfiguration)
+
+    rdd.buildSql("MyTable", Array("Foo", "Bar"), None) should
+      equal("SELECT \"Foo\", \"Bar\" FROM \"MyTable\"")
+  }
+
+  test("Can convert Phoenix schema") {
+    val phoenixSchema = List(
+      new ColumnInfo("varcharColumn", PVarchar.INSTANCE.getSqlType)
+    )
+
+    val rdd = new PhoenixRDD(sc, "MyTable", Array("Foo", "Bar"),
+      conf = hbaseConfiguration)
+
+    val catalystSchema = rdd.phoenixSchemaToCatalystSchema(phoenixSchema)
+
+    val expected = List(StructField("varcharColumn", StringType, nullable = true))
+
+    catalystSchema shouldEqual expected
+  }
+
+  test("Can create schema RDD and execute query") {
+    val sqlContext = new SQLContext(sc)
+
+    val df1 = sqlContext.phoenixTableAsDataFrame("TABLE1", Array("ID", "COL1"), conf = hbaseConfiguration)
+
+    df1.registerTempTable("sql_table_1")
+
+    val df2 = sqlContext.phoenixTableAsDataFrame("TABLE2", Array("ID", "TABLE1_ID"),
+      conf = hbaseConfiguration)
+
+    df2.registerTempTable("sql_table_2")
+
+    val sqlRdd = sqlContext.sql("SELECT t1.ID, t1.COL1, t2.ID, t2.TABLE1_ID FROM sql_table_1 AS t1 INNER JOIN sql_table_2 AS t2 ON (t2.TABLE1_ID = t1.ID)")
+
+    val count = sqlRdd.count()
+
+    count shouldEqual 6L
+  }
+
+  test("Can create schema RDD and execute query on case sensitive table (no config)") {
+    val sqlContext = new SQLContext(sc)
+
+    val df1 = sqlContext.phoenixTableAsDataFrame("table3", Array("id", "col1"), zkUrl = Some(hbaseConnectionString))
+
+    df1.registerTempTable("table3")
+
+    val sqlRdd = sqlContext.sql("SELECT * FROM table3")
+
+    val count = sqlRdd.count()
+
+    count shouldEqual 2L
+  }
+
+  test("Can create schema RDD and execute constrained query") {
+    val sqlContext = new SQLContext(sc)
+
+    val df1 = sqlContext.phoenixTableAsDataFrame("TABLE1", Array("ID", "COL1"), conf = hbaseConfiguration)
+
+    df1.registerTempTable("sql_table_1")
+
+    val df2 = sqlContext.phoenixTableAsDataFrame("TABLE2", Array("ID", "TABLE1_ID"),
+      predicate = Some("\"ID\" = 1"),
+      conf = hbaseConfiguration)
+
+    df2.registerTempTable("sql_table_2")
+
+    val sqlRdd = sqlContext.sql("SELECT t1.ID, t1.COL1, t2.ID, t2.TABLE1_ID FROM sql_table_1 AS t1 INNER JOIN sql_table_2 AS t2 ON (t2.TABLE1_ID = t1.ID)")
+
+    val count = sqlRdd.count()
+
+    count shouldEqual 1L
+  }
+
+  test("Using a predicate referring to a non-existent column should fail") {
+    intercept[RuntimeException] {
+      val sqlContext = new SQLContext(sc)
+
+      val df1 = sqlContext.phoenixTableAsDataFrame("table3", Array("id", "col1"),
+        predicate = Some("foo = bar"),
+        conf = hbaseConfiguration)
+
+      df1.registerTempTable("table3")
+
+      val sqlRdd = sqlContext.sql("SELECT * FROM table3")
+
+      // we have to execute an action before the predicate failure can occur
+      val count = sqlRdd.count()
+    }.getCause shouldBe a [ColumnNotFoundException]
+  }
+
+  test("Can create schema RDD with predicate that will never match") {
+    val sqlContext = new SQLContext(sc)
+
+    val df1 = sqlContext.phoenixTableAsDataFrame("table3", Array("id", "col1"),
+      predicate = Some("\"id\" = -1"),
+      conf = hbaseConfiguration)
+
+    df1.registerTempTable("table3")
+
+    val sqlRdd = sqlContext.sql("SELECT * FROM table3")
+
+    val count = sqlRdd.count()
+
+    count shouldEqual 0L
+  }
+
+  test("Can create schema RDD with complex predicate") {
+    val sqlContext = new SQLContext(sc)
+
+    val df1 = sqlContext.phoenixTableAsDataFrame("DATE_PREDICATE_TEST_TABLE", Array("ID", "TIMESERIES_KEY"),
+      predicate = Some("ID > 0 AND TIMESERIES_KEY BETWEEN CAST(TO_DATE('1990-01-01 00:00:01', 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP) AND CAST(TO_DATE('1990-01-30 00:00:01', 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP)"),
+      conf = hbaseConfiguration)
+    
+    df1.registerTempTable("date_predicate_test_table")
+
+    val sqlRdd = df1.sqlContext.sql("SELECT * FROM date_predicate_test_table")
+
+    val count = sqlRdd.count()
+
+    count shouldEqual 0L
+  }
+
+  test("Can query an array table") {
+    val sqlContext = new SQLContext(sc)
+
+    val df1 = sqlContext.phoenixTableAsDataFrame("ARRAY_TEST_TABLE", Array("ID", "VCARRAY"),
+      conf = hbaseConfiguration)
+
+    df1.registerTempTable("ARRAY_TEST_TABLE")
+
+    val sqlRdd = sqlContext.sql("SELECT * FROM ARRAY_TEST_TABLE")
+
+    val count = sqlRdd.count()
+
+    // get row 0, column 1, which should be "VCARRAY"
+    val arrayValues = sqlRdd.collect().apply(0).apply(1)
+
+    arrayValues should equal(Array("String1", "String2", "String3"))
+
+    count shouldEqual 1L
+  }
+  
+  test("Can read a table as an RDD") {
+    val rdd1 = sc.phoenixTableAsRDD("ARRAY_TEST_TABLE", Seq("ID", "VCARRAY"),
+      conf = hbaseConfiguration)
+
+    val count = rdd1.count()
+
+    val arrayValues = rdd1.take(1)(0)("VCARRAY")
+
+    arrayValues should equal(Array("String1", "String2", "String3"))
+
+    count shouldEqual 1L
+  }
+
+  test("Can save to phoenix table") {
+    val sqlContext = new SQLContext(sc)
+
+    val dataSet = List((1L, "1", 1), (2L, "2", 2), (3L, "3", 3))
+
+    sc
+      .parallelize(dataSet)
+      .saveToPhoenix(
+        "OUTPUT_TEST_TABLE",
+        Seq("ID","COL1","COL2"),
+        hbaseConfiguration
+      )
+
+    // Load the results back
+    val stmt = conn.createStatement()
+    val rs = stmt.executeQuery("SELECT ID, COL1, COL2 FROM OUTPUT_TEST_TABLE")
+    val results = ListBuffer[(Long, String, Int)]()
+    while(rs.next()) {
+      results.append((rs.getLong(1), rs.getString(2), rs.getInt(3)))
+    }
+    stmt.close()
+
+    // Verify they match
+    (0 to results.size - 1).foreach { i =>
+      dataSet(i) shouldEqual results(i)
+    }
+  }
+
+  test("Can save Java and Joda dates to Phoenix (no config)") {
+    val dt = new DateTime()
+    val date = new Date()
+
+    val dataSet = List((1L, "1", 1, dt), (2L, "2", 2, date))
+    sc
+      .parallelize(dataSet)
+      .saveToPhoenix(
+        "OUTPUT_TEST_TABLE",
+        Seq("ID","COL1","COL2","COL3"),
+        zkUrl = Some(hbaseConnectionString)
+      )
+
+    // Load the results back
+    val stmt = conn.createStatement()
+    val rs = stmt.executeQuery("SELECT COL3 FROM OUTPUT_TEST_TABLE WHERE ID = 1 OR ID = 2 ORDER BY ID ASC")
+    val results = ListBuffer[java.sql.Date]()
+    while(rs.next()) {
+      results.append(rs.getDate(1))
+    }
+    stmt.close()
+
+    // Verify the epochs are equal
+    results(0).getTime shouldEqual dt.getMillis
+    results(1).getTime shouldEqual date.getTime
+  }
+
+  test("Not specifying a zkUrl or a config quorum URL should fail") {
+    intercept[UnsupportedOperationException] {
+      val sqlContext = new SQLContext(sc)
+      val badConf = new Configuration(hbaseConfiguration)
+      badConf.unset(HConstants.ZOOKEEPER_QUORUM)
+      sqlContext.phoenixTableAsDataFrame("TABLE1", Array("ID", "COL1"), conf = badConf)
+    }
+  }
+}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/phoenix/blob/f666baa2/phoenix-spark/src/test/resources/log4j.xml
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/test/resources/log4j.xml b/phoenix-spark/src/test/resources/log4j.xml
deleted file mode 100644
index d4799da..0000000
--- a/phoenix-spark/src/test/resources/log4j.xml
+++ /dev/null
@@ -1,41 +0,0 @@
-<?xml version="1.0" encoding="UTF-8"?>
-<!DOCTYPE log4j:configuration SYSTEM "log4j.dtd">
-
-<log4j:configuration xmlns:log4j="http://jakarta.apache.org/log4j/">
-  <appender name="console" class="org.apache.log4j.ConsoleAppender">
-    <param name="Target" value="System.out"/>
-
-    <layout class="org.apache.log4j.PatternLayout">
-      <param name="ConversionPattern" value="%-4r [%t] %-5p %c %x - %m%n"/>
-    </layout>
-  </appender>
-
-  <logger name="org.eclipse">
-    <level value="ERROR"/>
-  </logger>
-
-  <logger name="org.apache">
-    <level value="ERROR"/>
-  </logger>
-
-  <logger name = "org.apache.phoenix.mapreduce">
-    <level value="FATAL"/>
-  </logger>
-
-  <logger name="org.mortbay">
-    <level value="ERROR"/>
-  </logger>
-
-  <logger name="BlockStateChange">
-    <level value="ERROR"/>
-  </logger>
-
-  <logger name="io.netty">
-    <level value="ERROR"/>
-  </logger>
-
-  <root>
-    <priority value="INFO"/>
-    <appender-ref ref="console"/>
-  </root>
-</log4j:configuration>

http://git-wip-us.apache.org/repos/asf/phoenix/blob/f666baa2/phoenix-spark/src/test/resources/setup.sql
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/test/resources/setup.sql b/phoenix-spark/src/test/resources/setup.sql
deleted file mode 100644
index 14a7e7e..0000000
--- a/phoenix-spark/src/test/resources/setup.sql
+++ /dev/null
@@ -1,18 +0,0 @@
-CREATE TABLE table1 (id BIGINT NOT NULL PRIMARY KEY, col1 VARCHAR)
-CREATE TABLE table2 (id BIGINT NOT NULL PRIMARY KEY, table1_id BIGINT, "t2col1" VARCHAR)
-UPSERT INTO table1 (id, col1) VALUES (1, 'test_row_1')
-UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (1, 1, 'test_child_1')
-UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (2, 1, 'test_child_2')
-UPSERT INTO table1 (id, col1) VALUES (2, 'test_row_2')
-UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (3, 2, 'test_child_1')
-UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (4, 2, 'test_child_2')
-UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (5, 2, 'test_child_3')
-UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (6, 2, 'test_child_4')
-CREATE TABLE "table3" ("id" BIGINT NOT NULL PRIMARY KEY, "col1" VARCHAR)
-UPSERT INTO "table3" ("id", "col1") VALUES (1, 'foo')
-UPSERT INTO "table3" ("id", "col1") VALUES (2, 'bar')
-CREATE TABLE ARRAY_TEST_TABLE (ID BIGINT NOT NULL PRIMARY KEY, VCARRAY VARCHAR[])
-UPSERT INTO ARRAY_TEST_TABLE (ID, VCARRAY) VALUES (1, ARRAY['String1', 'String2', 'String3'])
-CREATE TABLE DATE_PREDICATE_TEST_TABLE (ID BIGINT NOT NULL, TIMESERIES_KEY TIMESTAMP NOT NULL CONSTRAINT pk PRIMARY KEY (ID, TIMESERIES_KEY))
-UPSERT INTO DATE_PREDICATE_TEST_TABLE (ID, TIMESERIES_KEY) VALUES (1, CAST(CURRENT_TIME() AS TIMESTAMP))
-CREATE TABLE OUTPUT_TEST_TABLE (id BIGINT NOT NULL PRIMARY KEY, col1 VARCHAR, col2 INTEGER, col3 DATE)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/phoenix/blob/f666baa2/phoenix-spark/src/test/scala/org/apache/phoenix/spark/PhoenixRDDTest.scala
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/test/scala/org/apache/phoenix/spark/PhoenixRDDTest.scala b/phoenix-spark/src/test/scala/org/apache/phoenix/spark/PhoenixRDDTest.scala
deleted file mode 100644
index 63cb6e4..0000000
--- a/phoenix-spark/src/test/scala/org/apache/phoenix/spark/PhoenixRDDTest.scala
+++ /dev/null
@@ -1,333 +0,0 @@
-/*
-   Licensed under the Apache License, Version 2.0 (the "License");
-   you may not use this file except in compliance with the License.
-   You may obtain a copy of the License at
-
-       http://www.apache.org/licenses/LICENSE-2.0
-
-   Unless required by applicable law or agreed to in writing, software
-   distributed under the License is distributed on an "AS IS" BASIS,
-   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-   See the License for the specific language governing permissions and
-   limitations under the License.
- */
-package org.apache.phoenix.spark
-
-import java.sql.{Connection, DriverManager}
-import java.util.Date
-
-import org.apache.hadoop.conf.Configuration
-import org.apache.hadoop.hbase.{HConstants, HBaseTestingUtility}
-import org.apache.phoenix.schema.ColumnNotFoundException
-import org.apache.phoenix.schema.types.PVarchar
-import org.apache.phoenix.util.ColumnInfo
-import org.apache.spark.sql.SQLContext
-import org.apache.spark.sql.types.{StringType, StructField}
-import org.apache.spark.{SparkConf, SparkContext}
-import org.joda.time.DateTime
-import org.scalatest.{BeforeAndAfterAll, FunSuite, Matchers}
-import org.apache.phoenix.spark._
-
-import scala.collection.mutable.ListBuffer
-
-class PhoenixRDDTest extends FunSuite with Matchers with BeforeAndAfterAll {
-  lazy val hbaseTestingUtility = {
-    new HBaseTestingUtility()
-  }
-
-  lazy val hbaseConfiguration = {
-    val conf = hbaseTestingUtility.getConfiguration
-
-    val quorum = conf.get("hbase.zookeeper.quorum")
-    val clientPort = conf.get("hbase.zookeeper.property.clientPort")
-    val znodeParent = conf.get("zookeeper.znode.parent")
-
-    // This is an odd one - the Zookeeper Quorum entry in the config is totally wrong. It's
-    // just reporting localhost.
-    conf.set(org.apache.hadoop.hbase.HConstants.ZOOKEEPER_QUORUM, s"$quorum:$clientPort:$znodeParent")
-
-    conf
-  }
-
-  lazy val quorumAddress = {
-    hbaseConfiguration.get("hbase.zookeeper.quorum")
-  }
-
-  lazy val zookeeperClientPort = {
-    hbaseConfiguration.get("hbase.zookeeper.property.clientPort")
-  }
-
-  lazy val zookeeperZnodeParent = {
-    hbaseConfiguration.get("zookeeper.znode.parent")
-  }
-
-  lazy val hbaseConnectionString = {
-    s"$quorumAddress:$zookeeperClientPort:$zookeeperZnodeParent"
-  }
-
-  var conn: Connection = _
-
-  override def beforeAll() {
-    hbaseTestingUtility.startMiniCluster()
-
-    conn = DriverManager.getConnection(s"jdbc:phoenix:$hbaseConnectionString")
-
-    conn.setAutoCommit(true)
-
-    // each SQL statement used to set up Phoenix must be on a single line. Yes, that
-    // can potentially make large lines.
-    val setupSqlSource = getClass.getClassLoader.getResourceAsStream("setup.sql")
-
-    val setupSql = scala.io.Source.fromInputStream(setupSqlSource).getLines()
-
-    for (sql <- setupSql) {
-      val stmt = conn.createStatement()
-
-      stmt.execute(sql)
-
-      stmt.close()
-    }
-
-    conn.commit()
-  }
-
-  override def afterAll() {
-    conn.close()
-    hbaseTestingUtility.shutdownMiniCluster()
-  }
-
-  val conf = new SparkConf().set("spark.ui.showConsoleProgress", "false")
-
-  val sc = new SparkContext("local[1]", "PhoenixSparkTest", conf)
-
-  def buildSql(table: String, columns: Seq[String], predicate: Option[String]): String = {
-    val query = "SELECT %s FROM \"%s\"" format(columns.map(f => "\"" + f + "\"").mkString(", "), table)
-
-    query + (predicate match {
-      case Some(p: String) => " WHERE " + p
-      case _ => ""
-    })
-  }
-
-  test("Can create valid SQL") {
-    val rdd = new PhoenixRDD(sc, "MyTable", Array("Foo", "Bar"),
-      conf = hbaseConfiguration)
-
-    rdd.buildSql("MyTable", Array("Foo", "Bar"), None) should
-      equal("SELECT \"Foo\", \"Bar\" FROM \"MyTable\"")
-  }
-
-  test("Can convert Phoenix schema") {
-    val phoenixSchema = List(
-      new ColumnInfo("varcharColumn", PVarchar.INSTANCE.getSqlType)
-    )
-
-    val rdd = new PhoenixRDD(sc, "MyTable", Array("Foo", "Bar"),
-      conf = hbaseConfiguration)
-
-    val catalystSchema = rdd.phoenixSchemaToCatalystSchema(phoenixSchema)
-
-    val expected = List(StructField("varcharColumn", StringType, nullable = true))
-
-    catalystSchema shouldEqual expected
-  }
-
-  test("Can create schema RDD and execute query") {
-    val sqlContext = new SQLContext(sc)
-
-    val df1 = sqlContext.phoenixTableAsDataFrame("TABLE1", Array("ID", "COL1"), conf = hbaseConfiguration)
-
-    df1.registerTempTable("sql_table_1")
-
-    val df2 = sqlContext.phoenixTableAsDataFrame("TABLE2", Array("ID", "TABLE1_ID"),
-      conf = hbaseConfiguration)
-
-    df2.registerTempTable("sql_table_2")
-
-    val sqlRdd = sqlContext.sql("SELECT t1.ID, t1.COL1, t2.ID, t2.TABLE1_ID FROM sql_table_1 AS t1 INNER JOIN sql_table_2 AS t2 ON (t2.TABLE1_ID = t1.ID)")
-
-    val count = sqlRdd.count()
-
-    count shouldEqual 6L
-  }
-
-  test("Can create schema RDD and execute query on case sensitive table (no config)") {
-    val sqlContext = new SQLContext(sc)
-
-    val df1 = sqlContext.phoenixTableAsDataFrame("table3", Array("id", "col1"), zkUrl = Some(hbaseConnectionString))
-
-    df1.registerTempTable("table3")
-
-    val sqlRdd = sqlContext.sql("SELECT * FROM table3")
-
-    val count = sqlRdd.count()
-
-    count shouldEqual 2L
-  }
-
-  test("Can create schema RDD and execute constrained query") {
-    val sqlContext = new SQLContext(sc)
-
-    val df1 = sqlContext.phoenixTableAsDataFrame("TABLE1", Array("ID", "COL1"), conf = hbaseConfiguration)
-
-    df1.registerTempTable("sql_table_1")
-
-    val df2 = sqlContext.phoenixTableAsDataFrame("TABLE2", Array("ID", "TABLE1_ID"),
-      predicate = Some("\"ID\" = 1"),
-      conf = hbaseConfiguration)
-
-    df2.registerTempTable("sql_table_2")
-
-    val sqlRdd = sqlContext.sql("SELECT t1.ID, t1.COL1, t2.ID, t2.TABLE1_ID FROM sql_table_1 AS t1 INNER JOIN sql_table_2 AS t2 ON (t2.TABLE1_ID = t1.ID)")
-
-    val count = sqlRdd.count()
-
-    count shouldEqual 1L
-  }
-
-  test("Using a predicate referring to a non-existent column should fail") {
-    intercept[RuntimeException] {
-      val sqlContext = new SQLContext(sc)
-
-      val df1 = sqlContext.phoenixTableAsDataFrame("table3", Array("id", "col1"),
-        predicate = Some("foo = bar"),
-        conf = hbaseConfiguration)
-
-      df1.registerTempTable("table3")
-
-      val sqlRdd = sqlContext.sql("SELECT * FROM table3")
-
-      // we have to execute an action before the predicate failure can occur
-      val count = sqlRdd.count()
-    }.getCause shouldBe a [ColumnNotFoundException]
-  }
-
-  test("Can create schema RDD with predicate that will never match") {
-    val sqlContext = new SQLContext(sc)
-
-    val df1 = sqlContext.phoenixTableAsDataFrame("table3", Array("id", "col1"),
-      predicate = Some("\"id\" = -1"),
-      conf = hbaseConfiguration)
-
-    df1.registerTempTable("table3")
-
-    val sqlRdd = sqlContext.sql("SELECT * FROM table3")
-
-    val count = sqlRdd.count()
-
-    count shouldEqual 0L
-  }
-
-  test("Can create schema RDD with complex predicate") {
-    val sqlContext = new SQLContext(sc)
-
-    val df1 = sqlContext.phoenixTableAsDataFrame("DATE_PREDICATE_TEST_TABLE", Array("ID", "TIMESERIES_KEY"),
-      predicate = Some("ID > 0 AND TIMESERIES_KEY BETWEEN CAST(TO_DATE('1990-01-01 00:00:01', 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP) AND CAST(TO_DATE('1990-01-30 00:00:01', 'yyyy-MM-dd HH:mm:ss') AS TIMESTAMP)"),
-      conf = hbaseConfiguration)
-    
-    df1.registerTempTable("date_predicate_test_table")
-
-    val sqlRdd = df1.sqlContext.sql("SELECT * FROM date_predicate_test_table")
-
-    val count = sqlRdd.count()
-
-    count shouldEqual 0L
-  }
-
-  test("Can query an array table") {
-    val sqlContext = new SQLContext(sc)
-
-    val df1 = sqlContext.phoenixTableAsDataFrame("ARRAY_TEST_TABLE", Array("ID", "VCARRAY"),
-      conf = hbaseConfiguration)
-
-    df1.registerTempTable("ARRAY_TEST_TABLE")
-
-    val sqlRdd = sqlContext.sql("SELECT * FROM ARRAY_TEST_TABLE")
-
-    val count = sqlRdd.count()
-
-    // get row 0, column 1, which should be "VCARRAY"
-    val arrayValues = sqlRdd.collect().apply(0).apply(1)
-
-    arrayValues should equal(Array("String1", "String2", "String3"))
-
-    count shouldEqual 1L
-  }
-  
-  test("Can read a table as an RDD") {
-    val rdd1 = sc.phoenixTableAsRDD("ARRAY_TEST_TABLE", Seq("ID", "VCARRAY"),
-      conf = hbaseConfiguration)
-
-    val count = rdd1.count()
-
-    val arrayValues = rdd1.take(1)(0)("VCARRAY")
-
-    arrayValues should equal(Array("String1", "String2", "String3"))
-
-    count shouldEqual 1L
-  }
-
-  test("Can save to phoenix table") {
-    val sqlContext = new SQLContext(sc)
-
-    val dataSet = List((1L, "1", 1), (2L, "2", 2), (3L, "3", 3))
-
-    sc
-      .parallelize(dataSet)
-      .saveToPhoenix(
-        "OUTPUT_TEST_TABLE",
-        Seq("ID","COL1","COL2"),
-        hbaseConfiguration
-      )
-
-    // Load the results back
-    val stmt = conn.createStatement()
-    val rs = stmt.executeQuery("SELECT ID, COL1, COL2 FROM OUTPUT_TEST_TABLE")
-    val results = ListBuffer[(Long, String, Int)]()
-    while(rs.next()) {
-      results.append((rs.getLong(1), rs.getString(2), rs.getInt(3)))
-    }
-    stmt.close()
-
-    // Verify they match
-    (0 to results.size - 1).foreach { i =>
-      dataSet(i) shouldEqual results(i)
-    }
-  }
-
-  test("Can save Java and Joda dates to Phoenix (no config)") {
-    val dt = new DateTime()
-    val date = new Date()
-
-    val dataSet = List((1L, "1", 1, dt), (2L, "2", 2, date))
-    sc
-      .parallelize(dataSet)
-      .saveToPhoenix(
-        "OUTPUT_TEST_TABLE",
-        Seq("ID","COL1","COL2","COL3"),
-        zkUrl = Some(hbaseConnectionString)
-      )
-
-    // Load the results back
-    val stmt = conn.createStatement()
-    val rs = stmt.executeQuery("SELECT COL3 FROM OUTPUT_TEST_TABLE WHERE ID = 1 OR ID = 2 ORDER BY ID ASC")
-    val results = ListBuffer[java.sql.Date]()
-    while(rs.next()) {
-      results.append(rs.getDate(1))
-    }
-    stmt.close()
-
-    // Verify the epochs are equal
-    results(0).getTime shouldEqual dt.getMillis
-    results(1).getTime shouldEqual date.getTime
-  }
-
-  test("Not specifying a zkUrl or a config quorum URL should fail") {
-    intercept[UnsupportedOperationException] {
-      val sqlContext = new SQLContext(sc)
-      val badConf = new Configuration(hbaseConfiguration)
-      badConf.unset(HConstants.ZOOKEEPER_QUORUM)
-      sqlContext.phoenixTableAsDataFrame("TABLE1", Array("ID", "COL1"), conf = badConf)
-    }
-  }
-}
\ No newline at end of file