You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by da...@apache.org on 2016/06/01 06:13:40 UTC

svn commit: r1746396 - in /pig/branches/branch-0.16: ./ conf/ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/ src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/ test/org/apache/pig/test/

Author: daijy
Date: Wed Jun  1 06:13:40 2016
New Revision: 1746396

URL: http://svn.apache.org/viewvc?rev=1746396&view=rev
Log:
PIG-4719: Documentation for PIG-4704: Customizable Error Handling for Storers in Pig

Modified:
    pig/branches/branch-0.16/CHANGES.txt
    pig/branches/branch-0.16/conf/pig.properties
    pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/perf.xml
    pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/start.xml
    pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/test.xml
    pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/udf.xml
    pig/branches/branch-0.16/src/org/apache/pig/CounterBasedErrorHandler.java
    pig/branches/branch-0.16/src/org/apache/pig/PigConfiguration.java
    pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/StoreFuncDecorator.java
    pig/branches/branch-0.16/test/org/apache/pig/test/TestErrorHandlingStoreFunc.java

Modified: pig/branches/branch-0.16/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/CHANGES.txt?rev=1746396&r1=1746395&r2=1746396&view=diff
==============================================================================
--- pig/branches/branch-0.16/CHANGES.txt (original)
+++ pig/branches/branch-0.16/CHANGES.txt Wed Jun  1 06:13:40 2016
@@ -24,6 +24,8 @@ INCOMPATIBLE CHANGES
 
 IMPROVEMENTS
 
+PIG-4719: Documentation for PIG-4704: Customizable Error Handling for Storers in Pig (daijy)
+
 PIG-4714: Improve logging across multiple components with callerId (daijy)
 
 PIG-4885: Turn off union optimizer if there is PARALLEL clause in union in Tez (rohini)

Modified: pig/branches/branch-0.16/conf/pig.properties
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/conf/pig.properties?rev=1746396&r1=1746395&r2=1746396&view=diff
==============================================================================
--- pig/branches/branch-0.16/conf/pig.properties (original)
+++ pig/branches/branch-0.16/conf/pig.properties Wed Jun  1 06:13:40 2016
@@ -614,13 +614,13 @@ pig.ats.enabled=true
 # If you want Pig to allow certain errors before failing you can set this property.
 # If the propery is set to true and the StoreFunc implements ErrorHandling if will allow configurable errors 
 # based on the OutputErrorHandler implementation  
-# pig.allow.store.errors = false
+# pig.error-handling.enabled = false
 #
 # Controls the minimum number of errors for store
-# pig.errors.min.records = 0
+# pig.error-handling.min.error.records = 0
 #
 # Set the threshold for percentage of errors
-# pig.error.threshold.percent = 0.0f
+# pig.error-handling.error.threshold = 0.0f
 
 ###########################################################################
 #

Modified: pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/perf.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/perf.xml?rev=1746396&r1=1746395&r2=1746396&view=diff
==============================================================================
--- pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/perf.xml (original)
+++ pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/perf.xml Wed Jun  1 06:13:40 2016
@@ -42,7 +42,7 @@
   </section>
   <section id="container-reuse">
     <title>Tez session/container reuse</title>
-    <p>One downside of MapReduce is the startup cost for a job is very high. That hurts the performance especially for small job. Tez alleviate the problem by using session and container reuse, so it is not necessary to start an application master for every job, and start a JVM for every task. By default, session/container reuse is on and we usually shall not turn it off. JVM reuse might cause some side effect if static variable is used since static variable might live across different jobs. So if static variable is used in EvalFunc/LoadFunc/StoreFunc, be sure to implement a cleanup function and register with <a href="http://pig.apache.org/docs/r0.14.0/api/org/apache/pig/JVMReuseManager.html">JVMReuseManager</a>.</p>
+    <p>One downside of MapReduce is the startup cost for a job is very high. That hurts the performance especially for small job. Tez alleviate the problem by using session and container reuse, so it is not necessary to start an application master for every job, and start a JVM for every task. By default, session/container reuse is on and we usually shall not turn it off. JVM reuse might cause some side effect if static variable is used since static variable might live across different jobs. So if static variable is used in EvalFunc/LoadFunc/StoreFunc, be sure to implement a cleanup function and register with <a href="http://pig.apache.org/docs/r0.16.0/api/org/apache/pig/JVMReuseManager.html">JVMReuseManager</a>.</p>
   </section>
   <section id="auto-parallelism">
     <title>Automatic parallelism</title>

Modified: pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/start.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/start.xml?rev=1746396&r1=1746395&r2=1746396&view=diff
==============================================================================
--- pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/start.xml (original)
+++ pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/start.xml Wed Jun  1 06:13:40 2016
@@ -532,16 +532,16 @@ However, in a production environment you
 <li>Make sure the JAVA_HOME environment variable is set the root of your Java installation.</li>
 <li>Make sure your PATH includes bin/pig (this enables you to run the tutorials using the "pig" command). 
 <source>
-$ export PATH=/&lt;my-path-to-pig&gt;/pig-0.14.0/bin:$PATH 
+$ export PATH=/&lt;my-path-to-pig&gt;/pig-0.16.0/bin:$PATH 
 </source>
 </li>
 <li>Set the PIG_HOME environment variable:
 <source>
-$ export PIG_HOME=/&lt;my-path-to-pig&gt;/pig-0.14.0 
+$ export PIG_HOME=/&lt;my-path-to-pig&gt;/pig-0.16.0 
 </source></li>
 <li>Create the pigtutorial.tar.gz file:
 <ul>
-    <li>Move to the Pig tutorial directory (.../pig-0.14.0/tutorial).</li>
+    <li>Move to the Pig tutorial directory (.../pig-0.16.0/tutorial).</li>
 	<li>Run the "ant" command from the tutorial directory. This will create the pigtutorial.tar.gz file.
 	</li>
 </ul>

Modified: pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/test.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/test.xml?rev=1746396&r1=1746395&r2=1746396&view=diff
==============================================================================
--- pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/test.xml (original)
+++ pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/test.xml Wed Jun  1 06:13:40 2016
@@ -548,7 +548,7 @@ job_201004271216_12714 1 1 3 3 3 12 12 1
 
 <p>Several new public classes make it easier for external tools such as Oozie to integrate with Pig statistics. </p>
 
-<p>The Pig statistics are available here: <a href="http://pig.apache.org/docs/r0.14.0/api/">http://pig.apache.org/docs/r0.14.0/api/</a></p>
+<p>The Pig statistics are available here: <a href="http://pig.apache.org/docs/r0.16.0/api/">http://pig.apache.org/docs/r0.16.0/api/</a></p>
 
 <p id="stats-classes">The stats classes are in the package: org.apache.pig.tools.pigstats</p>
 <ul>

Modified: pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/udf.xml
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/udf.xml?rev=1746396&r1=1746395&r2=1746396&view=diff
==============================================================================
--- pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/udf.xml (original)
+++ pig/branches/branch-0.16/src/docs/src/documentation/content/xdocs/udf.xml Wed Jun  1 06:13:40 2016
@@ -1192,6 +1192,8 @@ abstract class has the main methods for
 This interface has methods to interact with metadata systems to store schema and store statistics. This interface is optional and should only be implemented if metadata needs to stored. </li>
 <li id="storeresources"><a href="http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/StoreResources.java?view=markup">StoreResources:</a> 
 This interface has methods to put hdfs files or local files to distributed cache. </li>
+<li id="errorhandling"><a href="http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/ErrorHandling.java?view=markup">ErrorHandling:</a> 
+This interface allow you to skip bad records in the storer so the storer will not throw exception and terminate the job. You can implement your own error handler by overriding <a href="http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/ErrorHandler.java?view=markup">ErrorHandler</a> interface, or use predefined error handler: <a href="http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/CounterBasedErrorHandler.java?view=markup">CounterBasedErrorHandler</a>. ErrorHandling can be turned on by setting the property pig.error-handling.enabled to true in pig.properties. Default is false.  CounterBasedErrorHandler uses two settings - pig.error-handling.min.error.records (the minimum number of errors to trigger error handling) and pig.error-handling.error.threshold (percentage of the number of records as a fraction exceeding which error is thrown).</li>
 </ul>
 
 <p id="storefunc-override">The methods which need to be overridden in StoreFunc are explained below: </p>

Modified: pig/branches/branch-0.16/src/org/apache/pig/CounterBasedErrorHandler.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/org/apache/pig/CounterBasedErrorHandler.java?rev=1746396&r1=1746395&r2=1746396&view=diff
==============================================================================
--- pig/branches/branch-0.16/src/org/apache/pig/CounterBasedErrorHandler.java (original)
+++ pig/branches/branch-0.16/src/org/apache/pig/CounterBasedErrorHandler.java Wed Jun  1 06:13:40 2016
@@ -34,10 +34,10 @@ public class CounterBasedErrorHandler im
 
     public CounterBasedErrorHandler() {
         Configuration conf = UDFContext.getUDFContext().getJobConf();
-        this.minErrors = conf.getLong(PigConfiguration.PIG_ERRORS_MIN_RECORDS,
+        this.minErrors = conf.getLong(PigConfiguration.PIG_ERROR_HANDLING_MIN_ERROR_RECORDS,
                 0);
         this.errorThreshold = conf.getFloat(
-                PigConfiguration.PIG_ERROR_THRESHOLD_PERCENT, 0.0f);
+                PigConfiguration.PIG_ERROR_HANDLING_THRESHOLD_PERCENT, 0.0f);
     }
 
     @Override

Modified: pig/branches/branch-0.16/src/org/apache/pig/PigConfiguration.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/org/apache/pig/PigConfiguration.java?rev=1746396&r1=1746395&r2=1746396&view=diff
==============================================================================
--- pig/branches/branch-0.16/src/org/apache/pig/PigConfiguration.java (original)
+++ pig/branches/branch-0.16/src/org/apache/pig/PigConfiguration.java Wed Jun  1 06:13:40 2016
@@ -331,17 +331,17 @@ public class PigConfiguration {
     /**
      * Boolean value used to enable or disable error handling for storers
      */
-    public static final String PIG_ALLOW_STORE_ERRORS = "pig.allow.store.errors";
+    public static final String PIG_ERROR_HANDLING_ENABLED = "pig.error-handling.enabled";
 
     /**
      * Controls the minimum number of errors
      */
-    public static final String PIG_ERRORS_MIN_RECORDS = "pig.errors.min.records";
+    public static final String PIG_ERROR_HANDLING_MIN_ERROR_RECORDS = "pig.error-handling.min.error.records";
 
     /**
      * Set the threshold for percentage of errors
      */
-    public static final String PIG_ERROR_THRESHOLD_PERCENT = "pig.error.threshold.percent";
+    public static final String PIG_ERROR_HANDLING_THRESHOLD_PERCENT = "pig.error-handling.error.threshold";
 
     /**
      * Comma-delimited entries of commands/operators that must be disallowed.

Modified: pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/StoreFuncDecorator.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/StoreFuncDecorator.java?rev=1746396&r1=1746395&r2=1746396&view=diff
==============================================================================
--- pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/StoreFuncDecorator.java (original)
+++ pig/branches/branch-0.16/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/relationalOperators/StoreFuncDecorator.java Wed Jun  1 06:13:40 2016
@@ -60,7 +60,7 @@ public class StoreFuncDecorator {
 
     private boolean allowErrors() {
         return UDFContext.getUDFContext().getJobConf()
-                .getBoolean(PigConfiguration.PIG_ALLOW_STORE_ERRORS, false);
+                .getBoolean(PigConfiguration.PIG_ERROR_HANDLING_ENABLED, false);
     }
 
     /**

Modified: pig/branches/branch-0.16/test/org/apache/pig/test/TestErrorHandlingStoreFunc.java
URL: http://svn.apache.org/viewvc/pig/branches/branch-0.16/test/org/apache/pig/test/TestErrorHandlingStoreFunc.java?rev=1746396&r1=1746395&r2=1746396&view=diff
==============================================================================
--- pig/branches/branch-0.16/test/org/apache/pig/test/TestErrorHandlingStoreFunc.java (original)
+++ pig/branches/branch-0.16/test/org/apache/pig/test/TestErrorHandlingStoreFunc.java Wed Jun  1 06:13:40 2016
@@ -200,11 +200,11 @@ public class TestErrorHandlingStoreFunc
     private void updatePigProperties(boolean allowErrors, long minErrors,
             double errorThreshold) {
         Properties properties = pigServer.getPigContext().getProperties();
-        properties.put(PigConfiguration.PIG_ALLOW_STORE_ERRORS,
+        properties.put(PigConfiguration.PIG_ERROR_HANDLING_ENABLED,
                 Boolean.toString(allowErrors));
-        properties.put(PigConfiguration.PIG_ERRORS_MIN_RECORDS,
+        properties.put(PigConfiguration.PIG_ERROR_HANDLING_MIN_ERROR_RECORDS,
                 Long.toString(minErrors));
-        properties.put(PigConfiguration.PIG_ERROR_THRESHOLD_PERCENT,
+        properties.put(PigConfiguration.PIG_ERROR_HANDLING_THRESHOLD_PERCENT,
                 Double.toString(errorThreshold));
     }
 }