You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ol...@apache.org on 2009/03/12 01:29:11 UTC
svn commit: r752725 [2/4] - in /hadoop/pig/trunk: ./ src/docs/ src/docs/src/documentation/ src/docs/src/documentation/content/xdocs/ src/docs/src/documentation/content/xdocs/images/

Added: hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/piglatin.xml
URL: http://svn.apache.org/viewvc/hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/piglatin.xml?rev=752725&view=auto
==============================================================================
--- hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/piglatin.xml (added)
+++ hadoop/pig/trunk/src/docs/src/documentation/content/xdocs/piglatin.xml Thu Mar 12 00:29:11 2009
@@ -0,0 +1,10094 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--  Copyright 2002-2004 The Apache Software Foundation
+  Licensed under the Apache License, Version 2.0 (the "License");
+  you may not use this file except in compliance with the License.
+  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+
+<!DOCTYPE article PUBLIC "-//OASIS//DTD Simplified DocBook XML V1.0//EN" 
+"http://www.oasis-open.org/docbook/xml/simple/1.0/sdocbook.dtd">
+  
+  <!-- BEGIN ARTICLE -->
+  
+<article>
+<title>Pig Latin Manual</title>
+
+  <!-- OVERVIEW -->
+<section><title>Conventions</title>
+   <para>Conventions for the syntax and code examples included in the Pig Latin Reference Manual are described here.</para>
+   <informaltable frame="all">
+      <tgroup cols="3"><tbody><row>
+            <entry>
+               <para>Convention</para>
+            </entry>
+            <entry>
+               <para>Description</para>
+            </entry>
+            <entry>
+               <para>Example</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>( )</para>
+            </entry>
+            <entry>
+               <para>Parentheses enclose one or more items.</para>
+               <para>Parentheses are also used to indicate the tuple data type.</para>
+            </entry>
+            <entry>
+               <para>Multiple items:</para>
+               <para>(1, abc, (2,4,6) )</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>[ ]</para>
+            </entry>
+            <entry>
+               <para>Straight brackets enclose one or more optional items.</para>
+               <para>Straight brackets are also used to indicate the map data type. In this case &lt;&gt; is used to indicate optional items.</para>
+            </entry>
+            <entry>
+               <para>Optional items:</para>
+               <para>[INNER | OUTER]</para>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>{ }</para>
+            </entry>
+            <entry>
+               <para>Curly brackets enclose two or more items, one of which is required. </para>
+               <para>Curly brackets also used to indicate the bag data type. In this case &lt;&gt; is used to indicate required items.</para>
+            </entry>
+            <entry>
+               <para>Two items, one required:</para>
+               <para>{ gen_blk | nested_gen_blk }</para>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>â¦</para>
+            </entry>
+            <entry>
+               <para>Horizontal ellipsis points indicate that you can repeat a portion of the code.</para>
+            </entry>
+            <entry>
+               <para>Pig Latin syntax statement:</para>
+               <para>cat path [path â¦]</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>UPPERCASE</para>
+               <para/>
+               <para>lowercase</para>
+            </entry>
+            <entry>
+               <para>In general, uppercase type indicates elements the system supplies.</para>
+               <para>In general, lowercase type indicates elements that you supply.</para>
+               <para>Note: The names (aliases) of relations and fields are case sensitive. The names of Pig Latin functions are case sensitive. All other Pig Latin keywords are case insensitive.</para>
+            </entry>
+            <entry>
+               <para>Pig Latin statement:</para>
+               <para>A = LOAD 'data' AS (f1:int);</para>
+               <para/>
+               <orderedlist>
+                  <listitem>
+                     <para>LOAD, AS supplied BY system</para>
+                  </listitem>
+                  <listitem>
+                     <para>A, f1 are names (aliases)</para>
+                  </listitem>
+                  <listitem>
+                     <para>data supplied by you</para>
+                  </listitem>
+               </orderedlist>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>italics</para>
+            </entry>
+            <entry>
+               <para>Italic type indicates placeholders or variables for which you must supply values.</para>
+            </entry>
+            <entry>
+               <para>Pig Latin syntax:</para>
+               <para>alias = LIMIT alias Â n;</para>
+               <para/>
+               <para>You supply the values for placeholder alias and variable n.</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Pig Latin Statements</title>
+   <para>A Pig Latin statement is an operator that takes a relation as input and produces another relation as output. (This definition applies to all Pig Latin operators except LOAD and STORE which read data from and write data to the file system.) Pig Latin statements can span multiple lines and must end with a semi-colon ( ; ). Pig Latin statements are generally organized in the following manner. </para>
+   <orderedlist>
+      <listitem>
+         <para>A LOAD statement reads data from the file system. </para>
+      </listitem>
+      <listitem>
+         <para>A series of "transformation" statements process the data. </para>
+      </listitem>
+      <listitem>
+         <para>A STORE statement writes output to the file system; or, a DUMP statement displays output to the screen.</para>
+      </listitem>
+   </orderedlist>
+   
+   <section>
+   <title>Processing Pig Latin Statements</title>
+   <para>You can execute Pig Latin statements interactively using the Grunt shell or you can place Pig Latin statements in a script and run the script. Either way, Pig processes Pig Latin statements as follows:</para>
+   <orderedlist>
+      <listitem>
+         <para>First, Pig validates the syntax and semantics of all statements. </para>
+      </listitem>
+      <listitem>
+         <para>Next, if Pig has encountered a DUMP or STORE, Pig will execute all statements connected to the specified DUMP or STORE.</para>
+      </listitem>
+   </orderedlist>
+   <para>In this example Pig will validate, but not execute, the LOAD and FOREACH statements.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'student' USING PigStorage() AS (name:chararray, age:int, gpa:float);</para>
+               <para>B = FOREACH A GENERATE name;</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>In this example, Pig will validate the LOAD, FOREACH, and DUMP statements. Then, if there are no errors, Pig will execute these statements.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'student' USING PigStorage() AS (name:chararray, age:int, gpa:float);</para>
+               <para>B = FOREACH A GENERATE name;</para>
+               <para>DUMP B;</para>
+               <para>(John)</para>
+               <para>(Mary)</para>
+               <para>(Bill)</para>
+               <para>(Joe)</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Using Comments in Scripts</title>
+   <para>If you place Pig Latin statements in a script, the script can include comments. </para>
+   <orderedlist>
+      <listitem>
+         <para>For multi-line comments use /* â¦. */</para>
+      </listitem>
+      <listitem>
+         <para>For single line comments use --</para>
+      </listitem>
+   </orderedlist>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>/* myscript.pig</para>
+               <para>My script includes three simple Pig Latin Statements.</para>
+               <para>*/</para>
+               <para>A = LOAD 'student' USING PigStorage() AS (name:chararray, age:int, gpa:float); -- load statement</para>
+               <para>B = FOREACH A GENERATE name; Â -- foreach statement</para>
+               <para>DUMP B; Â --dump statement</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section></section>
+   
+   <section>
+   <title>Relations, Bags, Tuples, and Fields</title>
+      <para>As noted, Pig Latin statements work with relations. A relation can be defined as follows:</para>
+   <orderedlist>
+      <listitem>
+         <para>A relation is a bag (more specifically, an outer bag).</para>
+      </listitem>
+      <listitem>
+         <para>A bag is a collection of tuples. </para>
+      </listitem>
+      <listitem>
+         <para>A tuple is an ordered set of fields.</para>
+      </listitem>
+      <listitem>
+         <para>A field is a piece of data.</para>
+      </listitem>
+   </orderedlist>
+   <para/>
+   <para>A Pig relation is a bag of tuples. A Pig relation is similar to a table in a relational database, where the tuples in the bag correspond to the rows in a table. Unlike a relational table, however, Pig relations don't require that every tuple contain the same number of fields or that the fields in the same position (column) have the same type.</para>
+   <para>Also note that relations are unordered which means there is no guarantee that tuples are processed in any particular order. Furthermore, processing may be parallelized in which case tuples are not processed according to any total ordering.</para>
+   
+   <section><title>Referencing Relations</title>
+   <para>Relations are referred to by name (or alias). Names are assigned by you as part of the Pig Latin statement. In this example the name (alias) of the relation is A.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'student' USING PigStorage() AS (name:chararray, age:int, gpa:float);</para>
+               <para>DUMP A;</para>
+               <para>(John,18,4.0F)</para>
+               <para>(Mary,19,3.8F)</para>
+               <para>(Bill,20,3.9F)</para>
+               <para>(Joe,18,3.8F)</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Referencing Fields</title>
+   <para>Fields are referred to by positional notation or by name (or alias). </para>
+   <orderedlist>
+      <listitem>
+         <para>Positional notation is generated by the system. Positional notation is indicated with the dollar sign ($) and begins with zero (0); for example, $0, $1, $2. </para>
+      </listitem>
+      <listitem>
+         <para>Names are assigned by you using schemas (or, in the case of the GROUP operator and some functions, by the system). You can use any name that is not a Pig keyword; for example, f1, f2, f3 or a, b, c or name, age, gpa.</para>
+      </listitem>
+   </orderedlist>
+   <para>Given relation A above, the three fields are separated out in this table. </para>
+   <informaltable frame="all">
+      <tgroup cols="4"><tbody><row>
+            <entry>
+               <para/>
+            </entry>
+            <entry>
+               <para>First Field</para>
+            </entry>
+            <entry>
+               <para>Second Field</para>
+            </entry>
+            <entry>
+               <para>Third Field </para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Data type</para>
+            </entry>
+            <entry>
+               <para>chararray</para>
+            </entry>
+            <entry>
+               <para>int</para>
+            </entry>
+            <entry>
+               <para>float</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Positional notation (generated by system) </para>
+            </entry>
+            <entry>
+               <para>$0</para>
+            </entry>
+            <entry>
+               <para>$1</para>
+            </entry>
+            <entry>
+               <para>$2</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Possible name (assigned by you using a schema)</para>
+            </entry>
+            <entry>
+               <para>name</para>
+            </entry>
+            <entry>
+               <para>age</para>
+            </entry>
+            <entry>
+               <para>gpa</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Field value (for the first tuple)</para>
+            </entry>
+            <entry>
+               <para>John</para>
+            </entry>
+            <entry>
+               <para>18</para>
+            </entry>
+            <entry>
+               <para>4.0</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>As shown in this example when you assign names to fields you can still refer to the fields using positional notation. However, for debugging purposes and ease of comprehension, it is better to use names.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'student' USING PigStorage() AS (name:chararray, age:int, gpa:float);</para>
+               <para>X = FOREACH A GENERATE name,$2;</para>
+               <para>DUMP X;</para>
+               <para>(John,4.0F)</para>
+               <para>(Mary,3.8F)</para>
+               <para>(Bill,3.9F)</para>
+               <para>(Joe,3.8F)</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>In this example an error is generated because the requested column ($3) is outside of the declared schema (positional notation begins with $0). Note that the error is caught before the statements are executed.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'data' AS (f1:int,f2:int,f3:int);</para>
+               <para>B = FOREACH A GENERATE $3;</para>
+               <para>DUMP B;</para>
+               <para/>
+               <para>2009-01-21 23:03:46,715 [main] ERROR org.apache.pig.tools.grunt.GruntParser - java.io.IOException: Out of bound access. Trying to access non-existent Â : 3. Schema {f1: bytearray,f2: bytearray,f3: bytearray} has 3 column(s). etc â¦</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section><section><title>Referencing Fields that are Complex Data Types</title>
+   <para>As noted, the fields in a tuple can be any data type, including the complex data types: bags, tuples, and maps. </para>
+   <orderedlist>
+      <listitem>
+         <para>Use the schemas for complex data types to name fields that are complex data types. </para>
+      </listitem>
+      <listitem>
+         <para>Use the dereference operators to reference and work with fields that are complex data types.</para>
+      </listitem>
+   </orderedlist>
+   <para>In this example the data file contains tuples. A schema for complex data types (in this case, tuples) is used to load the data. Then, dereference operators (the dot in t1.t1a and t2.$0) are used to access the fields in the tuples. Note that when you assign names to fields you can still refer to these fields using positional notation.</para>
+   <orderedlist>
+      <listitem>
+         <para/>
+      </listitem>
+   </orderedlist>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>cat data;</para>
+               <para>(3,8,9) (4,5,6)</para>
+               <para>(1,4,7) (3,7,5)</para>
+               <para>(2,5,8) (9,5,8)</para>
+               <para/>
+               <para>A = LOAD 'data' AS (t1:tuple(t1a:int, t1b:int,t1c:int),t2:tuple(t2a:int,t2b:int,t2c:int));</para>
+               <para>DUMP A;</para>
+               <para>((3,8,9),(4,5,6))</para>
+               <para>((1,4,7),(3,7,5))</para>
+               <para>((2,5,8),(9,5,8))</para>
+               <para/>
+               <para>X = FOREACH A GENERATE t1.t1a,t2.$0;</para>
+               <para>DUMP X;</para>
+               <para>(3,4)</para>
+               <para>(1,3)</para>
+               <para>(2,9)</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <orderedlist>
+      <listitem>
+         <para/>
+      </listitem>
+   </orderedlist></section></section>
+   
+   <section>
+   <title>Case Sensitivity</title>
+   <para>The names (aliases) of relations and fields are case sensitive. The names of Pig Latin functions are case sensitive. The names of parameters (see Parameter Substitution) and all other Pig Latin keywords are case insensitive.</para>
+   <para>In the example below, note the following:</para>
+   <orderedlist>
+      <listitem>
+         <para>The names (aliases) of relations A, B, and C are case sensitive.</para>
+      </listitem>
+      <listitem>
+         <para>The names (aliases) of fields f1, f2, and f3 are case sensitive.</para>
+      </listitem>
+      <listitem>
+         <para>Function names PigStorage and COUNT are case sensitive.</para>
+      </listitem>
+      <listitem>
+         <para>Keywords LOAD, USING, AS, GROUP, BY, FOREACH, GENERATE, and DUMP are case insensitive. They can also be written as load, using, as, group, by, etc.</para>
+      </listitem>
+      <listitem>
+         <para>In the FOREACH statement, the field in relation B is referred to by positional notation ($0).</para>
+      </listitem>
+   </orderedlist>
+   <para/>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>grunt&gt; A = LOAD 'data' USING PigStorage() AS (f1:int, f2:int, f3:int);</para>
+               <para>grunt&gt; B = GROUP A BY f1;</para>
+               <para>grunt&gt; C = FOREACH B GENERATE COUNT ($0);</para>
+               <para>grunt&gt; DUMP C;</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Working with Data</title>
+   <para>Pig Latin allows you to work with data in many ways. In general, and as a starting point:</para>
+   <orderedlist>
+      <listitem>
+         <para>Use the FILTER operator to work with tuples or rows of data. Use the FOREACH operator to work with columns of data.</para>
+      </listitem>
+      <listitem>
+         <para>Use the GROUP operator to group data in a single relation. Use the COGROUP and JOIN operators to group or join data in two or more relations.</para>
+      </listitem>
+      <listitem>
+         <para>Use the UNION operator to merge the contents of two or more relations. Use the SPLIT operator to partition the contents of a relation into multiple relations.</para>
+      </listitem>
+   </orderedlist></section>
+   
+   <section>
+   <title>Increasing Parallelism</title>
+   <para>To increase the parallelism of a job, include the PARALLEL clause with the COGROUP, CROSS, DISTINCT, GROUP, JOIN and ORDER operators. PARALLEL controls the number of reducers only; the number of maps is determined by the input data (see the <ulink url="http://wiki.apache.org/pig/PigUserCookbook">Pig User Cookbook</ulink>).</para></section><section><title>Increasing Performance</title>
+   <para>You can increase or optimize the performance of your Pig Latin scripts by following a few simple rules (see the Pig User Cookbook).</para>
+   </section>
+   
+   <section>
+   <title>Retrieving Results</title>
+   <para>Pig Latin includes operators you can use to retrieve the results of your Pig Latin statements: </para>
+   <orderedlist>
+      <listitem>
+         <para>Use the DUMP operator to display results to a screen. </para>
+      </listitem>
+      <listitem>
+         <para>Use the STORE operator to write results to a file on the file system.</para>
+      </listitem>
+   </orderedlist></section><section><title>Debugging Pig Latin Scripts</title>
+   <para>Pig Latin includes operators that can help you debug your Pig Latin statements:</para>
+   <orderedlist>
+      <listitem>
+         <para>Use the DESCRIBE operator to review the schema of a relation.</para>
+      </listitem>
+      <listitem>
+         <para>Use the EXPLAIN operator to view the logical, physical, or map reduce execution plans to compute a relation.</para>
+      </listitem>
+      <listitem>
+         <para>Use the ILLUSTRATE operator to view the step-by-step execution of a series of statements.</para>
+      </listitem>
+   </orderedlist>
+   
+   </section>
+   
+
+<!-- DATA TYPES -->
+
+<section>
+<title>Data Types</title>
+   <informaltable frame="all">
+      <tgroup cols="3"><tbody><row>
+            <entry>
+               <para>Simple Data Types</para>
+            </entry>
+            <entry>
+               <para>Description</para>
+            </entry>
+            <entry>
+               <para>Example </para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Scalars</para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>int</para>
+            </entry>
+            <entry>
+               <para>Signed 32-bit integer</para>
+            </entry>
+            <entry>
+               <para>10</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>long</para>
+            </entry>
+            <entry>
+               <para>Signed 64-bit integer</para>
+            </entry>
+            <entry>
+               <para>Data: Â  Â  10L or 10l </para>
+               <para>Display: 10L </para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>float</para>
+            </entry>
+            <entry>
+               <para>32-bit floating point</para>
+            </entry>
+            <entry>
+               <para>Data: Â  Â  10.5F or 10.5f or 10.5e2f or 10.5E2F</para>
+               <para>Display: 10.5F or 1050.0F</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>double</para>
+            </entry>
+            <entry>
+               <para>64-bit floating point</para>
+            </entry>
+            <entry>
+               <para>Data: Â  Â  10.5 or 10.5e2 or 10.5E2</para>
+               <para>Display: 10.5 or 1050.0</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Arrays</para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>chararray</para>
+            </entry>
+            <entry>
+               <para>Character array (string) in Unicode UTF-8 format</para>
+            </entry>
+            <entry>
+               <para>hello world</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>bytearray</para>
+            </entry>
+            <entry>
+               <para>Byte array (blob)</para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Complex Data Types</para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>tuple</para>
+            </entry>
+            <entry>
+               <para>An ordered set of fields.</para>
+            </entry>
+            <entry>
+               <para>(19,2)</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>bag</para>
+            </entry>
+            <entry>
+               <para>An collection of tuples.</para>
+            </entry>
+            <entry>
+               <para>{(19,2), (18,1)}</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>map</para>
+            </entry>
+            <entry>
+               <para>A set of key value pairs.</para>
+            </entry>
+            <entry>
+               <para>[open#apache]</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>Note the following general observations about data types:</para>
+   <orderedlist>
+      <listitem>
+         <para>Use schemas to assign types to fields. Â If you don't assign types, fields default to type bytearray and implicit conversions are applied to the data depending on the context in which that data is used. For example, in relation B, f1 is converted to integer because 5 is integer. In relation C, f1 and f2 are converted to double because we don't know the type of either f1 or f2.</para>
+      </listitem>
+   </orderedlist>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'data' AS (f1,f2,f3);</para>
+               <para>B = FOREACH A GENERATE f1 + 5;</para>
+               <para>C = FOREACH A generate f1 + f2;</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <orderedlist>
+      <listitem>
+         <para>If a schema is defined as part of a load statement, the load function will attempt to enforce the schema. If the data does not conform to the schema, the loader will generate a null value or an error.</para>
+      </listitem>
+   </orderedlist>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'data' AS (name:chararray, age:int, gpa:float);</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para/>
+   <orderedlist>
+      <listitem>
+         <para>If an explicit cast is not supported, an error will occur. For example, you cannot cast a chararray to int.</para>
+      </listitem>
+   </orderedlist>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'data' AS (name:chararray, age:int, gpa:float);</para>
+               <para>B = FOREACH A GENERATE (int)name;</para>
+               <para>This will cause an error â¦</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para/>
+   <orderedlist>
+      <listitem>
+         <para>If Pig cannot resolve incompatible types through implicit casts, an error will occur. For example, you cannot add chararray and float (see the Types Table for addition and subtraction).</para>
+      </listitem>
+   </orderedlist>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'data' AS (name:chararray, age:int, gpa:float);</para>
+               <para>B = FOREACH A GENERATE name + gpa;</para>
+               <para>This will cause an error â¦</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   
+   <section>
+   <title>Tuple</title>
+   <para>A tuple is an ordered set of fields.</para>
+   
+   <section>
+   <title>Syntax </title>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>( field [, field â¦] ) Â </para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Terms</title>
+   <informaltable frame="all">
+      <tgroup cols="2"><tbody><row>
+            <entry>
+               <para>( Â )</para>
+            </entry>
+            <entry>
+               <para>A tuple is enclosed in parentheses ( ).</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>field</para>
+            </entry>
+            <entry>
+               <para>A piece of data. A field can be any data type (including tuple and bag).</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Usage</title>
+   <para>You can think of a tuple as a row with one or more fields, where each field can be any data type and any field may or may not have data. If a field has no data, then the following happens:</para>
+   <orderedlist>
+      <listitem>
+         <para>In a load statement, the loader will inject null into the tuple. The actual value that is substituted for null is loader specific; for example, PigStorage substitutes an empty field for null.</para>
+      </listitem>
+      <listitem>
+         <para>In a non-load statement, if a requested field is missing from a tuple, Pig will inject null.</para>
+      </listitem>
+   </orderedlist></section>
+   
+   <section>
+   <title>Examples</title>
+   <para>In this example the tuple contains three fields.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>(John,18,4.0F)</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section></section>
+   
+   <section>
+   <title>Bag</title>
+   <para>A bag is a collection of tuples.</para>
+   
+   <section>
+   <title>Syntax: Inner bag</title><informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>{ tuple [, tuple â¦] }</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Terms</title>
+   <informaltable frame="all">
+      <tgroup cols="2"><tbody><row>
+            <entry>
+               <para>{ Â }</para>
+            </entry>
+            <entry>
+               <para>An inner bag is enclosed in curly brackets { }.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>tuple</para>
+            </entry>
+            <entry>
+               <para>A tuple.</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Usage </title>
+   <para>Note the following about bags:</para>
+   <orderedlist>
+      <listitem>
+         <para>A bag can have duplicate tuples.</para>
+      </listitem>
+      <listitem>
+         <para>A bag can have tuples with differing numbers of fields. However, if Pig tries to access a field that does not exist, a null value is substituted.</para>
+      </listitem>
+      <listitem>
+         <para>A bag can have tuples with fields that have different data types. However, for Pig to effectively process bags, the schemas of the tuples within those bags should be the same. For example, if half of the tuples include chararray fields and while the other half include float fields, only half of the tuples will participate in any kind of computation because the chararray fields will be converted to null.</para>
+         <para/>
+         <para>Bags have two forms: outer bag (or relation) and inner bag.</para>
+      </listitem>
+   </orderedlist></section>
+   
+   <section>
+   <title>Example: Outer Bag</title>
+   <para>In this example A is a relation or bag of tuples. You can think of this bag as an outer bag.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'data' as (f1:int, f2:int, f3;int);</para>
+               <para>DUMP A;</para>
+               <para>(1,2,3)</para>
+               <para>(4,2,1)</para>
+               <para>(8,3,4)</para>
+               <para>(4,3,3)</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Example: Inner Bag</title>
+   <para>Now, suppose we group relation A by the first field to form relation X. </para>
+   <para>In this example X is a relation or bag of tuples. The tuples in relation X have two fields. The first field is type int. The second field is type bag; you can think of this bag as an inner bag.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>X = GROUP A BY f1;</para>
+               <para>DUMP X;</para>
+               <para>(1,{(1,2,3)})</para>
+               <para>(4,{(4,2,1),(4,3,3)})</para>
+               <para>(8,{(8,3,4)})</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section></section>
+   
+   <section>
+   <title>Map</title>
+   <para>A map is a set of key value pairs.</para>
+   
+   <section>
+   <title>Syntax (&lt;&gt; denotes optional)</title>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>[ key#value &lt;, key#value â¦&gt; ]</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Terms</title>
+   <informaltable frame="all">
+      <tgroup cols="2"><tbody><row>
+            <entry>
+               <para>[ ]</para>
+            </entry>
+            <entry>
+               <para>Maps are enclosed in straight brackets [ ].</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>#</para>
+            </entry>
+            <entry>
+               <para>Key value pairs are separated by the pound sign #.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>key</para>
+            </entry>
+            <entry>
+               <para>Must be a scalar data type. Must be a unique value.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>value</para>
+            </entry>
+            <entry>
+               <para>Any data type.</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Usage</title>
+   <para>Key values within a relation must be unique.</para></section>
+   
+   <section>
+   <title>Example</title>
+   <para>In this example the map includes two key value pairs.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>[name#John,phone#5551212]</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section></section></section>
+   
+   <section>
+   <title>Nulls</title>
+   <para>In Pig Latin, nulls are implemented using the SQL definition of null as unknown or non-existent. Nulls can occur naturally in data or can be the result of an operation. </para>
+   
+   <section>
+   <title>Nulls and Operators</title>
+   <para>Pig Latin operators interact with nulls as shown in this table.</para>
+   <informaltable frame="all">
+      <tgroup cols="2"><tbody><row>
+            <entry>
+               <para>Operator </para>
+            </entry>
+            <entry>
+               <para>Interaction </para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Comparison operators:</para>
+               <para>==, !=</para>
+               <para>&gt;, &lt;</para>
+               <para>&gt;=, &lt;=</para>
+            </entry>
+            <entry>
+               <para>If either sub-expression is null, the result is null.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Comparison operator:</para>
+               <para>matches </para>
+            </entry>
+            <entry>
+               <para>If either the string being matched against or the string defining the match is null, the result is null.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Arithmetic operators:</para>
+               <para>Â + , -, *, /</para>
+               <para>% modulo</para>
+               <para>? bincond</para>
+            </entry>
+            <entry>
+               <para>If either sub-expression is null, the resulting expression is null.</para>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Null operator:</para>
+               <para>is null </para>
+            </entry>
+            <entry>
+               <para>If the tested value is null, returns true; otherwise, returns false.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Null operator:</para>
+               <para>is not null</para>
+            </entry>
+            <entry>
+               <para>If the tested value is not null, returns true; otherwise, returns false.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Dereference operators:</para>
+               <para>tuple (.) or map (#)</para>
+            </entry>
+            <entry>
+               <para>If the de-referenced tuple or map is null, returns null.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Cast operator</para>
+            </entry>
+            <entry>
+               <para>Casting a null from one type to another type results in a null.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Functions:</para>
+               <para>AVG, MIN, MAX, SUM</para>
+            </entry>
+            <entry>
+               <para>These functions ignore nulls. </para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Function:</para>
+               <para>COUNT</para>
+            </entry>
+            <entry>
+               <para>This function counts all values, including nulls.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Function:</para>
+               <para>CONCAT</para>
+            </entry>
+            <entry>
+               <para>If either sub-expression is null, the resulting expression is null.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Function:</para>
+               <para>SIZE</para>
+            </entry>
+            <entry>
+               <para>If the tested object is null, returns null.</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>For Boolean sub-expressions, note the results when nulls are used with these operators:</para>
+   <orderedlist>
+      <listitem>
+         <para>FILTER operator â If a filter expression results in null value, the filter does not pass them through (if X is null, !X is also null, and the filter will reject both).</para>
+      </listitem>
+      <listitem>
+         <para>Bincond operator â If a Boolean sub-expression results in null value, the resulting expression is null (see the interactions above for Arithmetic operators)</para>
+      </listitem>
+   </orderedlist>
+   
+   <section>
+   <title>Example: COUNT function</title>
+   <para>As noted, the COUNT function counts all values, including nulls. If you don't want the function to count null values, you can use one of the methods shown here.</para>
+   <para>In this example the is not null operator is used to filter (remove) all null values before subsequent operations, including the COUNT function, are applied.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'data';</para>
+               <para>B = FILTER A BY $1 is not null;</para>
+               <para>C = GROUP A BY $0;</para>
+               <para>D = FOREACH B GENERATE GROUP, COUNT(B.$1);</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>Suppose you have written a function, RemoveNulls, to filter null values. In this example RemoveNulls is used to filter nulls values for the COUNT function only. </para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'data';</para>
+               <para>B = GROUP A BY $0;</para>
+               <para>D = FOREACH B GENERATE GROUP, COUNT(RemoveNulls($1));</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section></section>
+   
+   <section>
+   <title>Nulls and Constants</title>
+   <para>Nulls can be used as constant expressions in place of expressions of any type.</para>
+   <para>In this example a and null are projected.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'data' AS (a, b, c).</para>
+               <para>B = FOREACH A GENERATE a, null;</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>In this example of an outer join, if the join key is missing from a table it is replaced by null.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'student' AS (name: chararray, age: int, gpa: float);</para>
+               <para>B = LOAD 'votertab10k' AS (name: chararray, age: int, registration: chararray, donation: float);</para>
+               <para>C = COGROUP A BY name, B BY name;</para>
+               <para>D = FOREACH C GENERATEÂ FLATTEN((IsEmpty(A) ?Â null : A)),Â FLATTEN((IsEmpty(B) ? null : B));</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>Like any other expression, null constants can be implicitly or explicitly cast. </para>
+   <para>In this example both a and null will be implicitly cast to double.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'data' AS (a, b, c).</para>
+               <para>B = FOREACH A GENERATE a + null;</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>In this example Â both a and null will be cast to int, a implicitly, and null explicitly.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'data' AS (a, b, c).</para>
+               <para>B = FOREACH A GENERATE a + (int)null;</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Operations That Produce Nulls</title>
+   <para>As noted, nulls can be the result of an operation. These operations can produce null values: </para>
+   <orderedlist>
+      <listitem>
+         <para>Division by zero</para>
+      </listitem>
+      <listitem>
+         <para>Returns from user defined functions (UDFs) </para>
+      </listitem>
+      <listitem>
+         <para>Dereferencing a field that does not exist.</para>
+      </listitem>
+      <listitem>
+         <para>Dereferencing a key that does not exist in a map. For example, given a map, info, containing [name#john, phone#5551212] if a user tries to use info#address a null is returned.</para>
+      </listitem>
+      <listitem>
+         <para>Accessing a field that does not exist in a tuple. As a further explanation, see the examples below.</para>
+      </listitem>
+   </orderedlist>
+   
+   <section>
+   <title>Example: Accessing a field that does not exist in a tuple</title>
+   <para>In this example nulls are injected if fields do not have data.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>cat data;</para>
+               <para>Â  Â  2 Â  3</para>
+               <para>4 Â  </para>
+               <para>7 Â  8 Â  9</para>
+               <para/>
+               <para>A = LOAD 'data' AS (f1:int,f2:int,f3:int)</para>
+               <para>DUMP A;</para>
+               <para>(,2,3)</para>
+               <para>(4,,)</para>
+               <para>(7,8,9)</para>
+               <para/>
+               <para>B = FOREACH A GENERATE f1,f2;</para>
+               <para>DUMP B;</para>
+               <para>(,2)</para>
+               <para>(4,)</para>
+               <para>(7,8)</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section></section>
+   
+   <section>
+   <title>Nulls and Load Functions</title>
+   <para>
+As noted, nulls can occur naturally in the data. If nulls are part of the data, it is the responsibility of the load function to handle them correctly. Keep in mind that what is considered a null value is loader-specific; however, the load function should always communicate null values to Pig by producing Java nulls.</para>
+   <para>The Pig Latin load functions (for example, PigStorage and TextLoader) produce null values wherever data is missing. For example, empty strings (chararrays) are not loaded; instead, they are replaced by nulls.</para>
+   <para>PigStorage is the default load function for the LOAD operator. In this example the is not null operator is used to filter names with null values.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'student' AS (name, age, gpa); </para>
+               <para>B = FILTER A BY name is not null;</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section></section>
+   
+   <section>
+   <title>Constants</title>
+   <para>Pig provides constant representations for all data types except bytearrays.</para>
+   <informaltable frame="all">
+      <tgroup cols="3"><tbody><row>
+            <entry>
+               <para/>
+            </entry>
+            <entry>
+               <para>Constant Example</para>
+            </entry>
+            <entry>
+               <para>Notes</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Simple Data Types</para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Scalars</para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>int</para>
+            </entry>
+            <entry>
+               <para>19</para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>long</para>
+            </entry>
+            <entry>
+               <para>19L</para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>float</para>
+            </entry>
+            <entry>
+               <para>19.2F or 1.92e2f</para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>double</para>
+            </entry>
+            <entry>
+               <para>19.2 or 1.92e2</para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Arrays </para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>chararray</para>
+            </entry>
+            <entry>
+               <para>'hello world'</para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>bytearray</para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+            <entry>
+               <para>Not applicable.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>Complex Data Types</para>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+            <entry>
+               <para/>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>tuple</para>
+            </entry>
+            <entry>
+               <para>(19, 2, 1)</para>
+            </entry>
+            <entry>
+               <para>A constant in this form creates a tuple.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>bag</para>
+            </entry>
+            <entry>
+               <para>{ (19, 2), (1, 2) }</para>
+            </entry>
+            <entry>
+               <para>A constant in this form creates a bag.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>map</para>
+            </entry>
+            <entry>
+               <para>[ 'name' # 'John', 'ext' # 5555 ]</para>
+            </entry>
+            <entry>
+               <para>A constant in this form creates a map.</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>Please note the following:</para>
+   <orderedlist>
+      <listitem>
+         <para>On UTF-8 systems you can specify string constants consisting of printable ASCII characters such as 'abc'; you can specify control characters such as '\t'; and, you can specify a character in Unicode by starting it with '\u', for instance, '\u0001' represents Ctrl-A in hexadecimal (see Wikipedia <ulink url="http://en.wikipedia.org/wiki/ASCII">ASCII</ulink>, <ulink url="http://en.wikipedia.org/wiki/Unicode">Unicode</ulink>, and <ulink url="http://en.wikipedia.org/wiki/UTF-8">UTF-8</ulink>). In theory, you should be able to specify non-UTF-8 constants on non-UTF-8 systems but as far as we know this has not been tested.</para>
+      </listitem>
+      <listitem>
+         <para>To specify a long constant, l or L must be appended to the number (for example, 12345678L). If the l or L is not specified, but the number is too large to fit into an int, the problem will be detected at parse time and the processing is terminated. </para>
+      </listitem>
+      <listitem>
+         <para>Any numeric constant with decimal point (for example, 1.5) and/or exponent (for example, 5e+1) is treated as double unless it ends with f or F in which case it is assigned type float (for example, Â 1.5f). </para>
+      </listitem>
+   </orderedlist>
+   <para>The data type definitions for tuples, bags, and maps apply to constants:</para>
+   <orderedlist>
+      <listitem>
+         <para>A tuple can contain fields of any data type</para>
+      </listitem>
+      <listitem>
+         <para>A bag is a collection of tuples</para>
+      </listitem>
+      <listitem>
+         <para>A map key must be a scalar; a map value can be any data type</para>
+      </listitem>
+   </orderedlist>
+   <para>Complex constants can be used in the same places scalar constants can be used, that is, in FILTER and GENERATE statements.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'data' USING MyStorage() AS (T: tuple(name:chararray, age: int));</para>
+               <para>B = FILTER A BY T == ('john', 25);</para>
+               <para>D = FOREACH B GENERATE T.name, [25#5.6], {(1, 5, 18)};</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Expressions</title>
+   <para>In Pig Latin, expressions are language constructs used with the FILTER, FOREACH, GROUP, and SPLIT operators as well as the eval functions.</para>
+   <para>Expressions are written in conventional mathematical infix notation and are adapted to the UTF-8 character set. Depending on the context, expressions can include:</para>
+   <orderedlist>
+      <listitem>
+         <para>Any Pig data type (simple data types, complex data types)</para>
+      </listitem>
+      <listitem>
+         <para>Any Pig operator (arithmetic, comparison, null, boolean, dereference, sign, and cast)</para>
+      </listitem>
+      <listitem>
+         <para>Any Pig built-in function.</para>
+      </listitem>
+      <listitem>
+         <para>Any user-defined function (UDF) written in Java. </para>
+         <para/>
+         <para>In a Pig Latin statement, an arithmetic expression could look like this:</para>
+      </listitem>
+   </orderedlist>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>X = GROUP A BY f2*f3;</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <orderedlist>
+      <listitem>
+         <para/>
+         <para>A string expression could look like this, where a and b are both chararrays:</para>
+      </listitem>
+   </orderedlist>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>X = FOREACH A GENERATE CONCAT(a,b);</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <orderedlist>
+      <listitem>
+         <para/>
+         <para>A boolean expression could look like this:</para>
+      </listitem>
+   </orderedlist>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>X = FILTER A BY (f1==8) OR (NOT (f2+f3 &gt; f1));</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Schemas</title>
+   <para>Schemas enable you to assign names to and declare types for fields. Schemas are optional but we encourage you to use them whenever possible; type declarations result in better parse-time error checking and more efficient code execution. </para>
+   <para>Schemas are defined using the AS keyword with the LOAD, STREAM, and FOREACH operators. If you define a schema using the LOAD operator, then it is the load function that enforces the schema (see the LOAD operator and the <ulink url="http://wiki.apache.org/pig/UDFManual">User-Defined Function Manual</ulink> for more information).</para>
+   <para>Note the following:</para>
+   <orderedlist>
+      <listitem>
+         <para>You can define a schema that includes both the field name and field type.</para>
+      </listitem>
+      <listitem>
+         <para>You can define a schema that includes the field name only; in this case, the field type defaults to bytearray.</para>
+      </listitem>
+      <listitem>
+         <para>You can choose not to define a schema; in this case, the field is un-named and the field type defaults to bytearray.</para>
+      </listitem>
+   </orderedlist>
+   <para>If you assign a name to a field, you can refer to that field using the name or by positional notation. If you don't assign a name to a field (the field is un-named) you can only refer to the field using positional notation.</para>
+   <para>If you assign a type to a field, you can subsequently change the type using the cast operators. If you don't assign a type to a field, the field defaults to bytearray; you can change the default type using the cast operators.</para>
+   
+   <section>
+   <title>Schemas with LOAD and STREAM Statements</title>
+   <para>With LOAD and STREAM statements, the schema following the AS keyword must be enclosed in parentheses.</para>
+   <para>In this example the LOAD statement includes a schema definition for simple data types.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'data' AS (f1:int, f2:int);</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Schemas with FOREACH Statements</title>
+   <para>With FOREACH statements, the schema following the AS keyword must be enclosed in parentheses when the FLATTEN keyword is used. Otherwise, the schema should not be enclosed in parentheses.</para>
+   <para>In this example the FOREACH statement includes the FLATTEN keyword and a schema for simple data types.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>X = FOREACH C GENERATE FLATTEN(B) AS (f1:int, f2:int, f3:int);</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>In this example the FOREACH statement includes a schema for simple data types.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>X = FOREACH A GENERATE f1+f2 AS x1:int;</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Schemas for Simple Data Types</title>
+   <para>Simple data types include int, long, float, double, chararray, and bytearray.</para>
+   
+   <section>
+   <title>Syntax</title>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>(alias[:type]) [, (alias[:type]) â¦] )</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Terms</title>
+   <informaltable frame="all">
+      <tgroup cols="2"><tbody><row>
+            <entry>
+               <para>alias</para>
+            </entry>
+            <entry>
+               <para>The name assigned to the field.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>type</para>
+            </entry>
+            <entry>
+               <para>(Optional) The simple data type assigned to the field.</para>
+               <para>The alias and type are separated by a colon ( : ).</para>
+               <para>If the type is omitted, the field defaults to type bytearray.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>( , )</para>
+            </entry>
+            <entry>
+               <para>Multiple fields are enclosed in parentheses and separated by commas.</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Examples</title>
+   <para>In this example the schema defines multiple types.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>cat student</para>
+               <para>John Â  Â 18 Â  Â  Â 4.0</para>
+               <para>Mary Â  Â 19 Â  Â  Â 3.8</para>
+               <para>Bill Â  Â 20 Â  Â  Â 3.9</para>
+               <para>Joe Â  Â  18 Â  Â  Â 3.8</para>
+               <para/>
+               <para>A = LOAD 'student' AS (name:chararray, age:int, gpa:float);</para>
+               <para/>
+               <para>DESCRIBE A;</para>
+               <para>A: {name: chararray,age: int,gpa: float}</para>
+               <para/>
+               <para>DUMP A:</para>
+               <para>(John,18,4.0F)</para>
+               <para>(Mary,19,3.8F)</para>
+               <para>(Bill,20,3.9F)</para>
+               <para>(Joe,18,3.8F)</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>In this example field "gpa" will default to bytearray because no type is declared. </para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>cat student</para>
+               <para>John Â  Â 18 Â  Â  Â 4.0</para>
+               <para>Mary Â  Â 19 Â  Â  Â 3.8</para>
+               <para>Bill Â  Â   20 Â  Â  Â 3.9</para>
+               <para>Joe Â  Â  18 Â  Â  Â 3.8</para>
+               <para/>
+               <para>A = LOAD 'data' AS (name:chararray, age:int, gpa)</para>
+               <para/>
+               <para>DESCRIBE A;</para>
+               <para>A: {name: chararray,age: int,gpa: bytearray}</para>
+               <para/>
+               <para>DUMP A;</para>
+               <para>(John,18,4.0)</para>
+               <para>(Mary,19,3.8)</para>
+               <para>(Bill,20,3.9)</para>
+               <para>(Joe,18,3.8)</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section></section>
+   
+   <section>
+   <title>Schemas for Complex Data Types</title>
+   <para>Complex data types include tuples, bags, and maps.</para></section>
+   
+   <section>
+   <title>Tuple Schema</title>
+   <para>A tuple is an ordered set of fields.</para>
+   
+   <section>
+   <title>Syntax</title>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>alias[:tuple] (alias[:type]) [, (alias[:type]) â¦] )</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Terms</title>
+   <informaltable frame="all">
+      <tgroup cols="2"><tbody><row>
+            <entry>
+               <para>alias</para>
+            </entry>
+            <entry>
+               <para>The name assigned to the tuple.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>:tuple</para>
+            </entry>
+            <entry>
+               <para>(Optional) The data type, tuple (case insensitive).</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>( )</para>
+            </entry>
+            <entry>
+               <para>The designation for a tuple, a set of parentheses.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>alias[:type]</para>
+            </entry>
+            <entry>
+               <para>The constituents of the tuple, where the schema definition rules for the corresponding type applies to the constituents of the tuple:</para>
+               <orderedlist>
+                  <listitem>
+                     <para>alias â the name assigned to the field</para>
+                  </listitem>
+                  <listitem>
+                     <para>type (optional) â the simple or complex data type assigned to the field</para>
+                  </listitem>
+               </orderedlist>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section><title>Examples</title>
+   <para>In this example the schema defines one tuple. The load statements are equivalent.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>cat data</para>
+               <para>(3,8,9)</para>
+               <para>(1,4,7)</para>
+               <para>(2,5,8)</para>
+               <para/>
+               <para>A = LOAD 'data' AS (T: tuple (f1:int, f2:int, f3:int));</para>
+               <para>A = LOAD 'data' AS (T: (f1:int, f2:int, f3:int));</para>
+               <para/>
+               <para>DESCRIBE A;</para>
+               <para>A: {T: (f1: int,f2: int,f3: int)}</para>
+               <para/>
+               <para>DUMP A;</para>
+               <para>((3,8,9))</para>
+               <para>((1,4,7))</para>
+               <para>((2,5,8))</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>In this example the schema defines two tuples.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>cat data</para>
+               <para>(3,8,9) (mary,19)</para>
+               <para>(1,4,7) (john,18)</para>
+               <para>(2,5,8) (joe,18)</para>
+               <para/>
+               <para>A = LOAD data AS (F:tuple(f1:int,f2:int,f3:int),T:tuple(t1:chararray,t2:int));</para>
+               <para/>
+               <para>DESCRIBE A;</para>
+               <para>A: {F: (f1: int,f2: int,f3: int),T: (t1: chararray,t2: int)}</para>
+               <para/>
+               <para>DUMP A;</para>
+               <para>((3,8,9),(mary,19))</para>
+               <para>((1,4,7),(john,18))</para>
+               <para>((2,5,8),(joe,18))</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section></section>
+   
+   <section>
+   <title>Bag Schema</title>
+   <para>A bag is a collection of tuples.</para>
+   
+   <section>
+   <title>Syntax</title>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>alias[:bag] {tuple} </para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Terms</title>
+   <informaltable frame="all">
+      <tgroup cols="2"><tbody><row>
+            <entry>
+               <para>alias</para>
+            </entry>
+            <entry>
+               <para>The name assigned to the bag.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>:bag</para>
+            </entry>
+            <entry>
+               <para>(Optional) The data type, bag (case insensitive).</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>{ }</para>
+            </entry>
+            <entry>
+               <para>The designation for a bag, a set of curly brackets.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>tuple</para>
+            </entry>
+            <entry>
+               <para>A tuple (see Tuple Schema).</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Examples</title>
+   <para>In this example the schema defines a bag. The two load statements are equivalent.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>cat data;</para>
+               <para>{(3,8,9)}</para>
+               <para>{(1,4,7)}</para>
+               <para>{(2,5,8)}</para>
+               <para/>
+               <para>A = LOAD 'data' AS (B: bag {T: tuple(t1:int, t2:int, t3:int)});</para>
+               <para>A = LOAD 'data' AS (B: {T: (t1:int, t2:int, t3:int)});</para>
+               <para/>
+               <para>DESCRIBE A:</para>
+               <para>A: {B: {T: (t1: int,t2: int,t3: int)}}</para>
+               <para/>
+               <para>DUMP A;</para>
+               <para>({(3,8,9)})</para>
+               <para>({(1,4,7)})</para>
+               <para>({(2,5,8)})</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section></section>
+   
+   <section>
+   <title>Map Schema</title>
+   <para>A map is a set of key value pairs.</para>
+   
+   <section>
+   <title>Syntax (where &lt;&gt; means optional)</title>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>alias&lt;:map&gt; [ ] </para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Terms</title>
+   <informaltable frame="all">
+      <tgroup cols="2"><tbody><row>
+            <entry>
+               <para>alias</para>
+            </entry>
+            <entry>
+               <para>The name assigned to the map.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>:map</para>
+            </entry>
+            <entry>
+               <para>(Optional) The data type, map (case insensitive).</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>[ ] Â  </para>
+            </entry>
+            <entry>
+               <para>The designation for a map, a set of straight brackets [ ].</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section><title>Example</title>
+   <para>In this example the schema defines a map. The load statements are equivalent.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>cat data</para>
+               <para>[open#apache]</para>
+               <para>[apache#hadoop]</para>
+               <para/>
+               <para>A = LOAD 'data' AS (M:map []);</para>
+               <para>A = LOAD 'data' AS (M:[]);</para>
+               <para/>
+               <para>DESCRIBE A;</para>
+               <para>a: {M: map[ ]}</para>
+               <para/>
+               <para>DUMP A;</para>
+               <para>([open#apache])</para>
+               <para>([apache#hadoop])</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section></section>
+   
+   <section>
+   <title>Schemas for Multiple Types</title>
+   <para>You can define schemas for data that includes multiple types.</para>
+   
+   <section>
+   <title>Example</title>
+   <para>In this example the schema defines a tuple, bag, and map.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD 'mydata' AS (T1:tuple(f1:int, f2:int), B:bag{T2:tuple(t1:float,t2:float)}, M:map[] ); </para>
+               <para>A = LOAD 'mydata' AS (T1:(f1:int, f2:int), B:{T2:(t1:float,t2:float)}, M:[] ); </para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section></section></section>
+   
+   <section>
+   <title>Parameter Substitution</title>
+   
+   <section>
+   <title>Description</title>
+   <para>Substitute values for parameters at run time.</para>
+   
+   <section>
+   <title>Syntax: Specifying parameters using the Pig command line</title>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>pig {âparam param_name = param_value | âparam_file file_name} [-debug | -dryrun] script</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Syntax: Specifying parameters using preprocessor statements in a Pig script</title>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>{%declare | %default} param_name param_value</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Terms</title>
+   <informaltable frame="all">
+      <tgroup cols="2"><tbody><row>
+            <entry>
+               <para>pig</para>
+            </entry>
+            <entry>
+               <para>Keyword</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>âparam</para>
+            </entry>
+            <entry>
+               <para>Flag. Use this option when the parameter is included in the command line.</para>
+               <para>Multiple parameters can be specified. If the same parameter is specified multiple times, the last value will be used and a warning will be generated.</para>
+               <para>Command line parameters and parameter files can be combined with command line parameters taking precedence. </para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>param_name</para>
+            </entry>
+            <entry>
+               <para>The name of the parameter.</para>
+               <para>The parameter name has the structure of a standard language identifier: it must start with a letter or underscore followed by any number of letters, digits, and underscores. </para>
+               <para>Parameter names are case insensitive. </para>
+               <para>If you pass a parameter to a script that the script does not use, this parameter is silently ignored. If the script has a parameter and no value is supplied or substituted, an error will result.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>param_value</para>
+            </entry>
+            <entry>
+               <para>The value of the parameter. </para>
+               <para>A parameter value can take two forms:</para>
+               <orderedlist>
+                  <listitem>
+                     <para>A sequence of characters enclosed in single or double quotes. In this case the unquoted version of the value is used during substitution. Quotes within the value can be escaped with the backslash character ( \ ). Single word values that don't use special characters such as % or = don't have to be quoted. </para>
+                  </listitem>
+                  <listitem>
+                     <para>A command enclosed in back ticks. </para>
+                  </listitem>
+               </orderedlist>
+               <para>The value of a parameter, in either form, can be expressed in terms of other parameters as long as the values of the dependent parameters are already defined.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>âparam_file</para>
+            </entry>
+            <entry>
+               <para>Flag. Use this option when the parameter is included in a file. </para>
+               <para>Multiple files can be specified. If the same parameter is present multiple times in the file, the last value will be used and a warning will be generated. If a parameter present in multiple files, the value from the last file will be used and a warning will be generated.</para>
+               <para>Command line parameters and parameter files can be combined with command line parameters taking precedence. </para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>file_name</para>
+            </entry>
+            <entry>
+               <para>The name of a file containing one or more parameters.</para>
+               <para>A parameter file will contain one line per parameter. Empty lines are allowed. Perl-style (#) comment lines are also allowed. Comments must take a full line and # must be the first character on the line. Each parameter line will be of the form: param_name = param_value. White spaces around = are allowed but are optional.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>âdebug</para>
+            </entry>
+            <entry>
+               <para>Flag. With this option, the script is run and a fully substituted Pig script produced in the current working directory named original_script_name.substituted </para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>âdryrun</para>
+            </entry>
+            <entry>
+               <para>Flag. With this option, the script is not run and a fully substituted Pig script produced in the current working directory named original_script_name.substituted</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>script</para>
+            </entry>
+            <entry>
+               <para>A pig script. The pig script must be the last element in the Pig command line.</para>
+               <orderedlist>
+                  <listitem>
+                     <para>If parameters are specified in the Pig command line or in a parameter file, the script should include a $param_name for each para_name included in the command line or parameter file.</para>
+                  </listitem>
+                  <listitem>
+                     <para>If parameters are specified using the preprocessor statements, the script should include either %declare or %default.</para>
+                  </listitem>
+                  <listitem>
+                     <para>In the script, parameter names can be escaped with the backslash character ( \ ) in which case substitution does not take place.</para>
+                  </listitem>
+               </orderedlist>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>%declare</para>
+            </entry>
+            <entry>
+               <para>Preprocessor statement included in a Pig script.</para>
+               <para>Use to describe one parameter in terms of other parameters.</para>
+               <para>The declare statement is processed prior to running the Pig script. </para>
+               <para>The scope of a parameter value defined using declare is all the lines following the declare statement until the next declare statement that defines the same parameter is encountered.</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>%default</para>
+            </entry>
+            <entry>
+               <para>Preprocessor statement included in a Pig script.</para>
+               <para>Use to provide a default value for a parameter. The default value has the lowest priority and is used if a parameter value has not been defined by other means.</para>
+               <para>The default statement is processed prior to running the Pig script. </para>
+               <para>The scope is the same as for %declare.</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Usage</title>
+   <para>Parameter substitution enables you to write Pig scripts that include parameters and to supply values for these parameters at run time. For instance, suppose you have a job that needs to run every day using the current day's data. You can create a Pig script that includes a parameter for the date. Then, when you run this script you can specify or supply a value for the date parameter using one of the supported methods. </para>
+   
+   <section>
+   <title>Specifying Parameters </title>
+   <para>You can specify parameter names and parameter values as follows:</para>
+   <orderedlist>
+      <listitem>
+         <para>As part of a command line.</para>
+      </listitem>
+      <listitem>
+         <para>In parameter file, as part of a command line.</para>
+      </listitem>
+      <listitem>
+         <para>With the declare statement, as part of Pig script.</para>
+      </listitem>
+      <listitem>
+         <para>With default statement, as part of a Pig script.</para>
+      </listitem>
+   </orderedlist></section>
+   
+   <section>
+   <title>Precedence</title>
+   <para>Precedence for parameters is as follows:</para>
+   <orderedlist>
+      <listitem>
+         <para>Highest - parameters defined using the declare statement</para>
+      </listitem>
+      <listitem>
+         <para>Next - parameters defined in the command line</para>
+      </listitem>
+      <listitem>
+         <para>Lowest - parameters defined in a script</para>
+      </listitem>
+   </orderedlist></section>
+   
+   <section>
+   <title>Processing Order and Precedence</title>
+   <para>Parameters are processed as follows:</para>
+   <orderedlist>
+      <listitem>
+         <para>Command line parameters are scanned in the order they are specified on the command line. </para>
+      </listitem>
+      <listitem>
+         <para>Parameter files are scanned in the order they are specified on the command line. Within each file, the parameters are processed in the order they are listed. </para>
+      </listitem>
+      <listitem>
+         <para>Declare and default preprocessors statements are processed in the order they appear in the Pig script. </para>
+      </listitem>
+   </orderedlist></section></section>
+   
+   <section>
+   <title>Example: Specifying parameters in the command line</title>
+   <para>Suppose we have a data file called 'mydata' and a pig script called 'myscript.pig'.</para>
+   <orderedlist>
+      <listitem>
+         <para>mydata </para>
+      </listitem>
+   </orderedlist>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>1 Â  Â  Â  2 Â  Â  Â  3</para>
+               <para>4 Â  Â  Â  2 Â  Â  Â  1</para>
+               <para>8 Â  Â  Â  3 Â  Â  Â  4</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <orderedlist>
+      <listitem>
+         <para>myscript.pig</para>
+      </listitem>
+   </orderedlist>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>A = LOAD '$data' USING PigStorage() AS (f1:int, f2:int, f3:int);</para>
+               <para>DUMP A;</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>In this example the parameter (data) and the parameter value (mydata) are specified in the command line. If the parameter name in the command line (data) and the parameter name in the script ($data) do not match, the script will not run. If the value for the parameter (mydata) is not found, an error is generated.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>$ pig âparam data=mydata myscript.pig</para>
+               <para>(1,2,3)</para>
+               <para>(4,2,1)</para>
+               <para>(8,3,4)</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Example: Specifying parameters using a parameter file</title><para>Suppose we have a parameter file called 'myparams.'</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para># my parameters</para>
+               <para>data1 = mydata1</para>
+               <para>cmd = `generate_name`</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>In this example the parameters and values are passed to the script using the parameter file.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>$ pig âparam_file myparams script2.pig</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Example: Specifying parameters using the declare statement</title>
+   <para>In this example the command is executed and its stdout is used as the parameter value.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>%declare CMD `generate_date`</para>
+               <para>A = LOAD '/data/mydata/$CMD';</para>
+               <para>B = FILTER A BY $0&gt;'5';</para>
+               <para>etc â¦</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Example: Specifying parameters using the default statement</title>
+   <para>In this example the parameter (DATE) and value ('20090101') are specified in the Pig script using the default statement. If a value for DATE is not specified elsewhere, the default value 20090101 is used.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>%default DATE '20090101';</para>
+               <para>A = load '/data/mydata/$DATE';</para>
+               <para>etc â¦</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Examples: Specifying parameter values as a sequence of characters</title>
+   <para>In this example the characters (in this case, Joe's URL) can be enclosed in single or double quotes, and quotes within the sequence of characters can be escaped. </para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>%declare DES 'Joe\'s URL';</para>
+               <para>A = LOAD 'data' AS (name, description, url);</para>
+               <para>B = FILTER A BY description == '$DES';</para>
+               <para>etc â¦</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable>
+   <para>In this example single word values that don't use special characters (in this case, mydata) don't have to be enclosed in quotes.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>$ pig âparam data=mydata myscript.pig</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section>
+   
+   <section>
+   <title>Example: Specifying parameter values as a command</title>
+   <para>In this example the command is enclosed in back ticks. First, the parameters mycmd and date are substituted when the declare statement is encountered. Then the resulting command is executed and its stdout is placed in the path before the load statement is run.</para>
+   <informaltable frame="all">
+      <tgroup cols="1"><tbody><row>
+            <entry>
+               <para>%declare CMD `$mycmd $date`</para>
+               <para>A = LOAD '/data/mydata/$CMD';</para>
+               <para>B = FILTER A BY $0&gt;'5';</para>
+               <para>etc â¦</para>
+            </entry>
+         </row></tbody></tgroup>
+   </informaltable></section></section></section>
+   
+   <section>
+   <title>Keywords</title>
+   <informaltable frame="all">
+      <tgroup cols="4"><tbody><row>
+            <entry>
+               <para>A</para>
+            </entry>
+            <entry>
+               <para>F</para>
+            </entry>
+            <entry>
+               <para>M</para>
+            </entry>
+            <entry>
+               <para>Functions</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>and</para>
+            </entry>
+            <entry>
+               <para>f</para>
+            </entry>
+            <entry>
+               <para>map</para>
+            </entry>
+            <entry>
+               <para>AVG</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>all</para>
+            </entry>
+            <entry>
+               <para>F</para>
+            </entry>
+            <entry>
+               <para>matches</para>
+            </entry>
+            <entry>
+               <para>BinaryDeserializer</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>as</para>
+            </entry>
+            <entry>
+               <para>filter </para>
+            </entry>
+            <entry>
+               <para>mkdir</para>
+            </entry>
+            <entry>
+               <para>BinarySerializer</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>asc</para>
+            </entry>
+            <entry>
+               <para>flatten</para>
+            </entry>
+            <entry>
+               <para>mv</para>
+            </entry>
+            <entry>
+               <para>BinStorage</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para/>
+            </entry>
+            <entry>
+               <para>float</para>
+            </entry>
+            <entry>
+               <para>N</para>
+            </entry>
+            <entry>
+               <para>CONCAT</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>B</para>
+            </entry>
+            <entry>
+               <para>foreach</para>
+            </entry>
+            <entry>
+               <para>not</para>
+            </entry>
+            <entry>
+               <para>COUNT</para>
+            </entry>
+         </row>
+         <row>
+            <entry>
+               <para>bag</para>
+            </entry>
+            <entry>
+               <para>G</para>
+            </entry>
+            <entry>
+               <para>null</para>
+            </entry>
+            <entry>
+               <para>DIFF</para>
+            </entry>
+         </row>
+         <row>
+            <entry>

[... 7832 lines stripped ...]