You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@phoenix.apache.org by ja...@apache.org on 2016/09/12 17:07:41 UTC

svn commit: r1760409 [2/2] - in /phoenix/site: publish/ publish/language/ source/src/site/ source/src/site/markdown/

Modified: phoenix/site/publish/upgrading.html
URL: http://svn.apache.org/viewvc/phoenix/site/publish/upgrading.html?rev=1760409&r1=1760408&r2=1760409&view=diff
==============================================================================
--- phoenix/site/publish/upgrading.html (original)
+++ phoenix/site/publish/upgrading.html Mon Sep 12 17:07:40 2016
@@ -1,7 +1,7 @@
 
 <!DOCTYPE html>
 <!--
- Generated by Apache Maven Doxia at 2016-08-11
+ Generated by Apache Maven Doxia at 2016-09-12
  Rendered using Reflow Maven Skin 1.1.0 (http://andriusvelykis.github.io/reflow-maven-skin)
 -->
 <html  xml:lang="en" lang="en">
@@ -83,6 +83,7 @@
 									<li ><a href="pherf.html" title="Performance Testing">Performance Testing</a></li>
 									<li class="divider"/>
 									<li ><a href="phoenix_spark.html" title="Apache Spark Integration">Apache Spark Integration</a></li>
+									<li ><a href="hive_storage_handler.html" title="Apache Hive Storage Handler">Apache Hive Storage Handler</a></li>
 									<li ><a href="pig_integration.html" title="Apache Pig Integration">Apache Pig Integration</a></li>
 									<li ><a href="phoenix_mr.html" title="Map Reduce Integration">Map Reduce Integration</a></li>
 									<li ><a href="flume.html" title="Apache Flume Plugin">Apache Flume Plugin</a></li>
@@ -281,6 +282,9 @@
 							<a href="phoenix_spark.html" title="Apache Spark Integration">Apache Spark Integration</a>
 						</li>
 						<li >
+							<a href="hive_storage_handler.html" title="Apache Hive Storage Handler">Apache Hive Storage Handler</a>
+						</li>
+						<li >
 							<a href="pig_integration.html" title="Apache Pig Integration">Apache Pig Integration</a>
 						</li>
 						<li >

Modified: phoenix/site/publish/views.html
URL: http://svn.apache.org/viewvc/phoenix/site/publish/views.html?rev=1760409&r1=1760408&r2=1760409&view=diff
==============================================================================
--- phoenix/site/publish/views.html (original)
+++ phoenix/site/publish/views.html Mon Sep 12 17:07:40 2016
@@ -1,7 +1,7 @@
 
 <!DOCTYPE html>
 <!--
- Generated by Apache Maven Doxia at 2016-08-19
+ Generated by Apache Maven Doxia at 2016-09-12
  Rendered using Reflow Maven Skin 1.1.0 (http://andriusvelykis.github.io/reflow-maven-skin)
 -->
 <html  xml:lang="en" lang="en">
@@ -83,6 +83,7 @@
 									<li ><a href="pherf.html" title="Performance Testing">Performance Testing</a></li>
 									<li class="divider"/>
 									<li ><a href="phoenix_spark.html" title="Apache Spark Integration">Apache Spark Integration</a></li>
+									<li ><a href="hive_storage_handler.html" title="Apache Hive Storage Handler">Apache Hive Storage Handler</a></li>
 									<li ><a href="pig_integration.html" title="Apache Pig Integration">Apache Pig Integration</a></li>
 									<li ><a href="phoenix_mr.html" title="Map Reduce Integration">Map Reduce Integration</a></li>
 									<li ><a href="flume.html" title="Apache Flume Plugin">Apache Flume Plugin</a></li>
@@ -310,6 +311,9 @@ VALUES('John Doe', CURRENT_DATE(), NEXT
 							<a href="phoenix_spark.html" title="Apache Spark Integration">Apache Spark Integration</a>
 						</li>
 						<li >
+							<a href="hive_storage_handler.html" title="Apache Hive Storage Handler">Apache Hive Storage Handler</a>
+						</li>
+						<li >
 							<a href="pig_integration.html" title="Apache Pig Integration">Apache Pig Integration</a>
 						</li>
 						<li >

Modified: phoenix/site/publish/who_is_using.html
URL: http://svn.apache.org/viewvc/phoenix/site/publish/who_is_using.html?rev=1760409&r1=1760408&r2=1760409&view=diff
==============================================================================
--- phoenix/site/publish/who_is_using.html (original)
+++ phoenix/site/publish/who_is_using.html Mon Sep 12 17:07:40 2016
@@ -1,7 +1,7 @@
 
 <!DOCTYPE html>
 <!--
- Generated by Apache Maven Doxia at 2016-08-11
+ Generated by Apache Maven Doxia at 2016-09-12
  Rendered using Reflow Maven Skin 1.1.0 (http://andriusvelykis.github.io/reflow-maven-skin)
 -->
 <html  xml:lang="en" lang="en">
@@ -83,6 +83,7 @@
 									<li ><a href="pherf.html" title="Performance Testing">Performance Testing</a></li>
 									<li class="divider"/>
 									<li ><a href="phoenix_spark.html" title="Apache Spark Integration">Apache Spark Integration</a></li>
+									<li ><a href="hive_storage_handler.html" title="Apache Hive Storage Handler">Apache Hive Storage Handler</a></li>
 									<li ><a href="pig_integration.html" title="Apache Pig Integration">Apache Pig Integration</a></li>
 									<li ><a href="phoenix_mr.html" title="Map Reduce Integration">Map Reduce Integration</a></li>
 									<li ><a href="flume.html" title="Apache Flume Plugin">Apache Flume Plugin</a></li>
@@ -356,6 +357,9 @@
 							<a href="phoenix_spark.html" title="Apache Spark Integration">Apache Spark Integration</a>
 						</li>
 						<li >
+							<a href="hive_storage_handler.html" title="Apache Hive Storage Handler">Apache Hive Storage Handler</a>
+						</li>
+						<li >
 							<a href="pig_integration.html" title="Apache Pig Integration">Apache Pig Integration</a>
 						</li>
 						<li >

Added: phoenix/site/source/src/site/markdown/hive_storage_handler.md
URL: http://svn.apache.org/viewvc/phoenix/site/source/src/site/markdown/hive_storage_handler.md?rev=1760409&view=auto
==============================================================================
--- phoenix/site/source/src/site/markdown/hive_storage_handler.md (added)
+++ phoenix/site/source/src/site/markdown/hive_storage_handler.md Mon Sep 12 17:07:40 2016
@@ -0,0 +1,146 @@
+# Hive Storage Handler 
+
+Hive Storage Handler is a Apache Phoenix plugin that allows access to Phoenix tables from Apache Hive CLI using HiveQL.
+
+## Prerequisites
+
+* Phoenix 4.8.0+
+* Hive 1.2.1+ 
+
+## Hive Setup
+
+Make phoenix-version-hive.jar available for Hive:
+
+1. Add to hive-env.sh:
+
+```
+HIVE_AUX_JARS_PATH=<path to jar>
+```
+
+2. Add property to hive-site.xml. That will allow Hive Map-Reduce jobs to use this jar:
+
+```
+<property> 
+  <name>hive.aux.jars.path</name> 
+  <value>file://<path></value>
+</property>
+```
+
+## Table creation and deletion
+Phoenix Storage Handler supports both INTERNAL and EXTERNAL Hive tables. 
+
+### Create INTERNAL table. 
+For internal tables Hive manages the lifecycle of the table and data. When hive table is created, a corresponding Phoenix table will be created as well. 
+Once the hive table is dropped, the Phoenix table will be deleted too. 
+
+```sql
+	create table phoenix_table (
+	  s1 string,
+	  i1 int,
+	  f1 float,
+	  d1 double
+	)
+	STORED BY 'org.apache.phoenix.hive.PhoenixStorageHandler'
+	TBLPROPERTIES (
+	  "phoenix.table.name" = "phoenix_table",
+	  "phoenix.zookeeper.quorum" = "localhost",
+	  "phoenix.zookeeper.znode.parent" = "/hbase",
+	  "phoenix.zookeeper.client.port" = "2181",
+	  "phoenix.rowkeys" = "s1, i1",
+	  "phoenix.column.mapping" = "s1:s1, i1:i1, f1:f1, d1:d1",
+	  "phoenix.table.options" = "SALT_BUCKETS=10, DATA_BLOCK_ENCODING='DIFF'"
+	);
+```
+
+### Create EXTERNAL table
+For external tables Hive works with an existing Phoenix table and manages only Hive metadata. Deleting an external table from Hive only deletes Hive metadata and keeps Phoenix table 
+
+```sql
+create external table ext_table (
+  i1 int,
+  s1 string,
+  f1 float,
+  d1 decimal
+)
+STORED BY 'org.apache.phoenix.hive.PhoenixStorageHandler'
+TBLPROPERTIES (
+  "phoenix.table.name" = "ext_table",
+  "phoenix.zookeeper.quorum" = "localhost",
+  "phoenix.zookeeper.znode.parent" = "/hbase",
+  "phoenix.zookeeper.client.port" = "2181",
+  "phoenix.rowkeys" = "i1",
+  "phoenix.column.mapping" = "i1:i1, s1:s1, f1:f1, d1:d1"
+);
+```
+
+### Properties
+
+1. phoenix.table.name
+    * Specified the Phoenix table name 
+    * Default : the same as hive table                
+2. phoenix.zookeeper.quorum           
+    * Specified the ZK quorum for HBase
+    * Default : localhost
+3. phoenix.zookeeper.znode.parent    
+    * Specified the ZK parent node for HBase
+    * Default : /hbase
+4. phoenix.zookeeper.client.port 
+    * Specified the ZK port
+    * Default : 2181   
+5. phoenix.rowkeys                 
+    * The list of columns that would match the RowKey in Phoenix table
+    * Required
+6. phoenix.column.mapping         
+    * mappings between column names for hive and phoenix. See Limitations for details.
+ 
+
+
+## Data ingestion/delete/update 
+Data ingestion can be done by all ways that supported by Hive or Phoenix:
+Hive: 
+
+```
+	 insert into table T values (....);
+	 inseet into table T select c1,c2,c3 from source_table;
+```
+
+Phoenix: 
+
+```
+	 upsert into table T values (.....);
+         Phoenix CSV BulkLoad tools
+```
+
+All delete/update should be performed on Phoenix side. See *Limitation* for more details
+
+## Additinal configuration options
+
+Those options can be set in Hive CLI 
+
+### Performance tuning
+
+Parameters | Default Value | Description
+------------ | ------------- | -------------
+phoenix.upsert.batch.size | 1000 | Batch size for upsert.
+[phoenix-table-name].disable.wal | false | It temporarily sets table attribute  `DISABLE_WAL = true`. May be used to improve the performance
+[phoenix-table-name].auto.flush | false | When WAL is disabled and if this value is true. Then flush memstore to hfile.
+
+### Query Data
+You can use HiveQL for querying data on phoenix table. A single table query as fast as Phoenix CLI when `hive.fetch.task.conversion=more` and `hive.exec.parallel=true`.
+
+Parameters | Default Value | Description
+------------ | ------------- | -------------
+hbase.scan.cache | 100 | Read row size for an unit request.
+hbase.scan.cacheblock | false | Whether or not cache block.
+split.by.stats | false | If true, mappers will use table statistics. One mapper per guide post.
+[hive-table-name].reducer.count | 1 | Number of reducer. In tez mode is affected only single-table query. See Limitations
+[phoenix-table-name].query.hint | | Hint for phoenix query (like NO_INDEX)
+
+## Limitations
+1. Hive update/delete requires transaction manager support on Hive side as well as using transaction engine on Phoenix side. Futher Hive/Phoenix JIRAs will be listed in *Resource* section.
+2. Column mapping doesn't work correctly with mapping row key columns
+3. Currently MR and Tez jobs always have a single reducer.  
+ 
+## Resources
+* [PHOENIX-2743] (https://issues.apache.org/jira/browse/PHOENIX-2743) : Implementation, accepted by Apache Phoenix community. Original pull request contains modification for Hive classes.
+* [PHOENIX-331] (https://issues.apache.org/jira/browse/PHOENIX-331) : Another implementation with support of Hive 0.98. Outdated

Modified: phoenix/site/source/src/site/site.xml
URL: http://svn.apache.org/viewvc/phoenix/site/source/src/site/site.xml?rev=1760409&r1=1760408&r2=1760409&view=diff
==============================================================================
--- phoenix/site/source/src/site/site.xml (original)
+++ phoenix/site/source/src/site/site.xml Mon Sep 12 17:07:40 2016
@@ -85,6 +85,7 @@
             <item href="pherf.html" name="Performance Testing"/>
             <item href="http:divider" name=""/>
             <item href="phoenix_spark.html" name="Apache Spark Integration"/>
+            <item href="hive_storage_handler.html" name="Apache Hive Storage Handler"/>
             <item href="pig_integration.html" name="Apache Pig Integration"/>
             <item href="phoenix_mr.html" name="Map Reduce Integration"/>
             <item href="flume.html" name="Apache Flume Plugin"/>