You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by an...@apache.org on 2017/04/21 06:19:03 UTC
[1/3] phoenix git commit: PHOENIX-3751 spark 2.1 with Phoenix 4.10
load data as dataframe fail, NullPointerException
Repository: phoenix
Updated Branches:
refs/heads/master 679ff21b7 -> 92b951e53
PHOENIX-3751 spark 2.1 with Phoenix 4.10 load data as dataframe fail, NullPointerException
Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/28af89c4
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/28af89c4
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/28af89c4
Branch: refs/heads/master
Commit: 28af89c46fa54d7f60adc8be88fdf559cad811d2
Parents: 679ff21
Author: Ankit Singhal <an...@gmail.com>
Authored: Fri Apr 21 11:47:27 2017 +0530
Committer: Ankit Singhal <an...@gmail.com>
Committed: Fri Apr 21 11:47:27 2017 +0530
----------------------------------------------------------------------
phoenix-spark/src/it/resources/globalSetup.sql | 2 +-
.../src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala | 4 ++--
2 files changed, 3 insertions(+), 3 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/phoenix/blob/28af89c4/phoenix-spark/src/it/resources/globalSetup.sql
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/it/resources/globalSetup.sql b/phoenix-spark/src/it/resources/globalSetup.sql
index 28eb0f7..dc24da7 100644
--- a/phoenix-spark/src/it/resources/globalSetup.sql
+++ b/phoenix-spark/src/it/resources/globalSetup.sql
@@ -60,4 +60,4 @@ UPSERT INTO "small" VALUES ('key3', 'xyz', 30000)
CREATE TABLE MULTITENANT_TEST_TABLE (TENANT_ID VARCHAR NOT NULL, ORGANIZATION_ID VARCHAR, GLOBAL_COL1 VARCHAR CONSTRAINT pk PRIMARY KEY (TENANT_ID, ORGANIZATION_ID)) MULTI_TENANT=true
CREATE TABLE IF NOT EXISTS GIGANTIC_TABLE (ID INTEGER PRIMARY KEY,unsig_id UNSIGNED_INT,big_id BIGINT,unsig_long_id UNSIGNED_LONG,tiny_id TINYINT,unsig_tiny_id UNSIGNED_TINYINT,small_id SMALLINT,unsig_small_id UNSIGNED_SMALLINT,float_id FLOAT,unsig_float_id UNSIGNED_FLOAT,double_id DOUBLE,unsig_double_id UNSIGNED_DOUBLE,decimal_id DECIMAL,boolean_id BOOLEAN,time_id TIME,date_id DATE,timestamp_id TIMESTAMP,unsig_time_id UNSIGNED_TIME,unsig_date_id UNSIGNED_DATE,unsig_timestamp_id UNSIGNED_TIMESTAMP,varchar_id VARCHAR (30),char_id CHAR (30),binary_id BINARY (100),varbinary_id VARBINARY (100))
CREATE TABLE IF NOT EXISTS OUTPUT_GIGANTIC_TABLE (ID INTEGER PRIMARY KEY,unsig_id UNSIGNED_INT,big_id BIGINT,unsig_long_id UNSIGNED_LONG,tiny_id TINYINT,unsig_tiny_id UNSIGNED_TINYINT,small_id SMALLINT,unsig_small_id UNSIGNED_SMALLINT,float_id FLOAT,unsig_float_id UNSIGNED_FLOAT,double_id DOUBLE,unsig_double_id UNSIGNED_DOUBLE,decimal_id DECIMAL,boolean_id BOOLEAN,time_id TIME,date_id DATE,timestamp_id TIMESTAMP,unsig_time_id UNSIGNED_TIME,unsig_date_id UNSIGNED_DATE,unsig_timestamp_id UNSIGNED_TIMESTAMP,varchar_id VARCHAR (30),char_id CHAR (30),binary_id BINARY (100),varbinary_id VARBINARY (100))
- upsert into GIGANTIC_TABLE values(0,2,3,4,-5,6,7,8,9.3,10.4,11.5,12.6,13.7,true,CURRENT_TIME(),CURRENT_DATE(),CURRENT_TIME(),CURRENT_TIME(),CURRENT_DATE(),CURRENT_TIME(),'This is random textA','a','a','a')
+ upsert into GIGANTIC_TABLE values(0,2,3,4,-5,6,7,8,9.3,10.4,11.5,12.6,13.7,true,null,null,CURRENT_TIME(),CURRENT_TIME(),CURRENT_DATE(),CURRENT_TIME(),'This is random textA','a','a','a')
http://git-wip-us.apache.org/repos/asf/phoenix/blob/28af89c4/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
index 63547d2..2c2c6e1 100644
--- a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/PhoenixRDD.scala
@@ -134,9 +134,9 @@ class PhoenixRDD(sc: SparkContext, table: String, columns: Seq[String],
val rowSeq = columns.map { case (name, sqlType) =>
val res = pr.resultMap(name)
// Special handling for data types
- if (dateAsTimestamp && (sqlType == 91 || sqlType == 19)) { // 91 is the defined type for Date and 19 for UNSIGNED_DATE
+ if (dateAsTimestamp && (sqlType == 91 || sqlType == 19) && res!=null) { // 91 is the defined type for Date and 19 for UNSIGNED_DATE
new java.sql.Timestamp(res.asInstanceOf[java.sql.Date].getTime)
- } else if (sqlType == 92 || sqlType == 18) { // 92 is the defined type for Time and 18 for UNSIGNED_TIME
+ } else if ((sqlType == 92 || sqlType == 18) && res!=null) { // 92 is the defined type for Time and 18 for UNSIGNED_TIME
new java.sql.Timestamp(res.asInstanceOf[java.sql.Time].getTime)
} else {
res
[2/3] phoenix git commit: PHOENIX-3792 Provide way to skip
normalization of column names in phoenix-spark integration
Posted by an...@apache.org.
PHOENIX-3792 Provide way to skip normalization of column names in phoenix-spark integration
Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/90e32c01
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/90e32c01
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/90e32c01
Branch: refs/heads/master
Commit: 90e32c015207b39330ed7496db7a73dbc7b634f4
Parents: 28af89c
Author: Ankit Singhal <an...@gmail.com>
Authored: Fri Apr 21 11:48:16 2017 +0530
Committer: Ankit Singhal <an...@gmail.com>
Committed: Fri Apr 21 11:48:16 2017 +0530
----------------------------------------------------------------------
phoenix-spark/src/it/resources/globalSetup.sql | 1 +
.../apache/phoenix/spark/PhoenixSparkIT.scala | 27 ++++++++++++++++++--
.../phoenix/spark/DataFrameFunctions.scala | 19 +++++++++++---
.../apache/phoenix/spark/DefaultSource.scala | 2 +-
4 files changed, 42 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/phoenix/blob/90e32c01/phoenix-spark/src/it/resources/globalSetup.sql
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/it/resources/globalSetup.sql b/phoenix-spark/src/it/resources/globalSetup.sql
index dc24da7..7ac0039 100644
--- a/phoenix-spark/src/it/resources/globalSetup.sql
+++ b/phoenix-spark/src/it/resources/globalSetup.sql
@@ -17,6 +17,7 @@
CREATE TABLE table1 (id BIGINT NOT NULL PRIMARY KEY, col1 VARCHAR)
CREATE TABLE table1_copy (id BIGINT NOT NULL PRIMARY KEY, col1 VARCHAR)
CREATE TABLE table2 (id BIGINT NOT NULL PRIMARY KEY, table1_id BIGINT, "t2col1" VARCHAR)
+CREATE TABLE table3 (id BIGINT NOT NULL PRIMARY KEY, table3_id BIGINT, "t2col1" VARCHAR)
UPSERT INTO table1 (id, col1) VALUES (1, 'test_row_1')
UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (1, 1, 'test_child_1')
UPSERT INTO table2 (id, table1_id, "t2col1") VALUES (2, 1, 'test_child_2')
http://git-wip-us.apache.org/repos/asf/phoenix/blob/90e32c01/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala
index d53b5ee..b8e44fe 100644
--- a/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala
+++ b/phoenix-spark/src/it/scala/org/apache/phoenix/spark/PhoenixSparkIT.scala
@@ -20,15 +20,38 @@ import org.apache.phoenix.util.{ColumnInfo, SchemaUtil}
import org.apache.spark.sql.types._
import org.apache.spark.sql.{Row, SQLContext, SaveMode}
import org.joda.time.DateTime
-
+import org.apache.spark.{SparkConf, SparkContext}
import scala.collection.mutable.ListBuffer
-
+import org.apache.hadoop.conf.Configuration
/**
* Note: If running directly from an IDE, these are the recommended VM parameters:
* -Xmx1536m -XX:MaxPermSize=512m -XX:ReservedCodeCacheSize=512m
*/
class PhoenixSparkIT extends AbstractPhoenixSparkIT {
+ test("Can persist data with case senstive columns (like in avro schema) using 'DataFrame.saveToPhoenix'") {
+ val sqlContext = new SQLContext(sc)
+ val df = sqlContext.createDataFrame(
+ Seq(
+ (1, 1, "test_child_1"),
+ (2, 1, "test_child_2"))).toDF("ID", "TABLE3_ID", "t2col1")
+ df.saveToPhoenix("TABLE3", zkUrl = Some(quorumAddress),skipNormalizingIdentifier=true)
+
+ // Verify results
+ val stmt = conn.createStatement()
+ val rs = stmt.executeQuery("SELECT * FROM TABLE3")
+
+ val checkResults = List((1, 1, "test_child_1"), (2, 1, "test_child_2"))
+ val results = ListBuffer[(Long, Long, String)]()
+ while (rs.next()) {
+ results.append((rs.getLong(1), rs.getLong(2), rs.getString(3)))
+ }
+ stmt.close()
+
+ results.toList shouldEqual checkResults
+
+ }
+
test("Can convert Phoenix schema") {
val phoenixSchema = List(
new ColumnInfo("varcharColumn", PVarchar.INSTANCE.getSqlType)
http://git-wip-us.apache.org/repos/asf/phoenix/blob/90e32c01/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DataFrameFunctions.scala
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DataFrameFunctions.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DataFrameFunctions.scala
index ddf4fab..92f4c58 100644
--- a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DataFrameFunctions.scala
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DataFrameFunctions.scala
@@ -24,13 +24,16 @@ import scala.collection.JavaConversions._
class DataFrameFunctions(data: DataFrame) extends Serializable {
-
+ def saveToPhoenix(parameters: Map[String, String]): Unit = {
+ saveToPhoenix(parameters("table"), zkUrl = parameters.get("zkUrl"), tenantId = parameters.get("TenantId"),
+ skipNormalizingIdentifier=parameters.contains("skipNormalizingIdentifier"))
+ }
def saveToPhoenix(tableName: String, conf: Configuration = new Configuration,
- zkUrl: Option[String] = None, tenantId: Option[String] = None): Unit = {
-
+ zkUrl: Option[String] = None, tenantId: Option[String] = None, skipNormalizingIdentifier: Boolean = false): Unit = {
// Retrieve the schema field names and normalize to Phoenix, need to do this outside of mapPartitions
- val fieldArray = data.schema.fieldNames.map(x => SchemaUtil.normalizeIdentifier(x))
+ val fieldArray = getFieldArray(skipNormalizingIdentifier, data)
+
// Create a configuration object to use for saving
@transient val outConfig = ConfigurationUtil.getOutputConfiguration(tableName, fieldArray, zkUrl, tenantId, Some(conf))
@@ -61,4 +64,12 @@ class DataFrameFunctions(data: DataFrame) extends Serializable {
outConfig
)
}
+
+ def getFieldArray(skipNormalizingIdentifier: Boolean = false, data: DataFrame) = {
+ if (skipNormalizingIdentifier) {
+ data.schema.fieldNames.map(x => x)
+ } else {
+ data.schema.fieldNames.map(x => SchemaUtil.normalizeIdentifier(x))
+ }
+ }
}
http://git-wip-us.apache.org/repos/asf/phoenix/blob/90e32c01/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DefaultSource.scala
----------------------------------------------------------------------
diff --git a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DefaultSource.scala b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DefaultSource.scala
index 743d196..e000b74 100644
--- a/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DefaultSource.scala
+++ b/phoenix-spark/src/main/scala/org/apache/phoenix/spark/DefaultSource.scala
@@ -44,7 +44,7 @@ class DefaultSource extends RelationProvider with CreatableRelationProvider {
verifyParameters(parameters)
// Save the DataFrame to Phoenix
- data.saveToPhoenix(parameters("table"), zkUrl = parameters.get("zkUrl"), tenantId = parameters.get("TenantId"))
+ data.saveToPhoenix(parameters)
// Return a relation of the saved data
createRelation(sqlContext, parameters)
[3/3] phoenix git commit: PHOENIX-3759 Dropping a local index causes
NPE
Posted by an...@apache.org.
PHOENIX-3759 Dropping a local index causes NPE
Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo
Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/92b951e5
Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/92b951e5
Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/92b951e5
Branch: refs/heads/master
Commit: 92b951e5387768e084ed09729884a59160cd81d3
Parents: 90e32c0
Author: Ankit Singhal <an...@gmail.com>
Authored: Fri Apr 21 11:48:54 2017 +0530
Committer: Ankit Singhal <an...@gmail.com>
Committed: Fri Apr 21 11:48:54 2017 +0530
----------------------------------------------------------------------
.../apache/phoenix/end2end/index/LocalIndexIT.java | 15 ++++++++++++---
.../java/org/apache/phoenix/util/RepairUtil.java | 11 +++++++----
2 files changed, 19 insertions(+), 7 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/phoenix/blob/92b951e5/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java
index 8d3316b..ea4780b 100644
--- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java
+++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java
@@ -599,21 +599,30 @@ public class LocalIndexIT extends BaseLocalIndexIT {
admin.disableTable(tableName);
copyLocalIndexHFiles(config, tableRegions.get(0), tableRegions.get(1), false);
copyLocalIndexHFiles(config, tableRegions.get(3), tableRegions.get(0), false);
-
admin.enableTable(tableName);
int count=getCount(conn, tableName, "L#0");
assertTrue(count > 14);
- admin.majorCompact(tableName);
+ admin.majorCompact(TableName.valueOf(tableName));
int tryCount = 5;// need to wait for rebuilding of corrupted local index region
while (tryCount-- > 0 && count != 14) {
- Thread.sleep(30000);
+ Thread.sleep(15000);
count = getCount(conn, tableName, "L#0");
}
assertEquals(14, count);
rs = statement.executeQuery("SELECT COUNT(*) FROM " + indexName1);
assertTrue(rs.next());
assertEquals(7, rs.getLong(1));
+ statement.execute("DROP INDEX " + indexName1 + " ON " + tableName);
+ admin.majorCompact(TableName.valueOf(tableName));
+ statement.execute("DROP INDEX " + indexName + " ON " + tableName);
+ admin.majorCompact(TableName.valueOf(tableName));
+ Thread.sleep(15000);
+ admin.majorCompact(TableName.valueOf(tableName));
+ Thread.sleep(15000);
+ rs = statement.executeQuery("SELECT COUNT(*) FROM " + tableName);
+ assertTrue(rs.next());
+
}
}
http://git-wip-us.apache.org/repos/asf/phoenix/blob/92b951e5/phoenix-core/src/main/java/org/apache/phoenix/util/RepairUtil.java
----------------------------------------------------------------------
diff --git a/phoenix-core/src/main/java/org/apache/phoenix/util/RepairUtil.java b/phoenix-core/src/main/java/org/apache/phoenix/util/RepairUtil.java
index b9b7526..ea14715 100644
--- a/phoenix-core/src/main/java/org/apache/phoenix/util/RepairUtil.java
+++ b/phoenix-core/src/main/java/org/apache/phoenix/util/RepairUtil.java
@@ -29,10 +29,13 @@ public class RepairUtil {
byte[] endKey = environment.getRegion().getRegionInfo().getEndKey();
byte[] indexKeyEmbedded = startKey.length == 0 ? new byte[endKey.length] : startKey;
for (StoreFile file : store.getStorefiles()) {
- byte[] fileFirstRowKey = KeyValue.createKeyValueFromKey(file.getReader().getFirstKey()).getRow();;
- if ((fileFirstRowKey != null && Bytes.compareTo(file.getReader().getFirstKey(), 0, indexKeyEmbedded.length,
- indexKeyEmbedded, 0, indexKeyEmbedded.length) != 0)
- /*|| (endKey.length > 0 && Bytes.compareTo(file.getLastKey(), endKey) < 0)*/) { return false; }
+ if (file.getReader() != null && file.getReader().getFirstKey() != null) {
+ byte[] fileFirstRowKey = KeyValue.createKeyValueFromKey(file.getReader().getFirstKey()).getRow();
+ ;
+ if ((fileFirstRowKey != null && Bytes.compareTo(file.getReader().getFirstKey(), 0,
+ indexKeyEmbedded.length, indexKeyEmbedded, 0, indexKeyEmbedded.length) != 0)
+ /* || (endKey.length > 0 && Bytes.compareTo(file.getLastKey(), endKey) < 0) */) { return false; }
+ }
}
return true;
}