You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ol...@apache.org on 2010/12/16 19:10:59 UTC

svn commit: r1050082 [2/6] - in /pig/trunk: ./ src/docs/src/documentation/content/xdocs/

Added: pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml?rev=1050082&view=auto
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml (added)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/basic.xml Thu Dec 16 18:10:59 2010
@@ -0,0 +1,7401 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+-->
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+
+<document>
+  <header>
+    <title>Pig Latin Basics</title>
+  </header>
+  <body>
+
+
+<!-- CONVENTIONS -->
+<section>
+<title>Conventions</title>
+   <p>Conventions for the syntax and code examples in the Pig Latin Reference Manual are described here.</p>
+   <table>
+      <tr>
+            <td>
+               <p>Convention</p>
+            </td>
+            <td>
+               <p>Description</p>
+            </td>
+            <td>
+               <p>Example</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>( )</p>
+            </td>
+            <td>
+               <p>Parentheses enclose one or more items.</p>
+               <p>Parentheses are also used to indicate the tuple data type.</p>
+            </td>
+            <td>
+               <p>Multiple items:</p>
+               <p>(1, abc, (2,4,6) )</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>[ ]</p>
+            </td>
+            <td>
+               <p>Straight brackets enclose one or more optional items.</p>
+               <p>Straight brackets are also used to indicate the map data type. In this case &lt;&gt; is used to indicate optional items.</p>
+            </td>
+            <td>
+               <p>Optional items:</p>
+               <p>[INNER | OUTER]</p>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>{ }</p>
+            </td>
+            <td>
+               <p>Curly brackets enclose two or more items, one of which is required. </p>
+               <p>Curly brackets also used to indicate the bag data type. In this case &lt;&gt; is used to indicate required items.</p>
+            </td>
+            <td>
+               <p>Two items, one required:</p>
+               <p>{ gen_blk | nested_gen_blk }</p>
+              <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>…</p>
+            </td>
+            <td>
+               <p>Horizontal ellipsis points indicate that you can repeat a portion of the code.</p>
+            </td>
+            <td>
+               <p>Pig Latin syntax statement:</p>
+               <p>cat path [path …]</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>UPPERCASE</p>
+               <p></p>
+               <p>lowercase</p>
+            </td>
+            <td>
+               <p>In general, uppercase type indicates elements the system supplies.</p>
+               <p>In general, lowercase type indicates elements that you supply.</p>
+               <p>(These conventions are not strictly adherered to in all examples.)</p>
+               <p>See <a href="#Case+Sensitivity">Case Sensitivity</a></p>
+            </td>
+            <td>
+               <p>Pig Latin statement:</p>
+               <p>a = LOAD 'data' AS (f1:int);</p>
+               <p></p>
+               <ul>
+                  <li>
+                     <p>LOAD, AS - Pig keywords</p>
+                  </li>
+                  <li>
+                     <p>a, f1 - aliases you supply</p>
+                  </li>
+                  <li>
+                     <p>'data' - data source you supply</p>
+                  </li>
+               </ul>
+            </td>
+         </tr>
+   </table>
+   </section>   
+
+
+ <!-- KEYWORDS -->
+   <section>
+   <title>Reserved Keywords</title>
+   <p>Pig reserved keywords are listed here.</p>
+   <table>
+         <tr>
+            <td> <p>-- A </p> </td>
+            <td> <p>and, any, all, arrange, as, asc, AVG</p> </td>
+         </tr>      
+      
+         <tr>
+            <td> <p>-- B </p> </td>
+            <td> <p>bag, BinStorage, by, bytearray </p> </td>
+         </tr>   
+
+         <tr>
+            <td> <p>-- C </p> </td>
+            <td> <p>cache, cat, cd, chararray, cogroup, CONCAT, copyFromLocal, copyToLocal, COUNT, cp, cross</p> </td>
+         </tr>
+         
+         <tr>
+            <td> <p>-- D </p> </td>
+            <td> <p>%declare, %default, define, desc, describe, DIFF, distinct, double, du, dump</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- E </p> </td>
+            <td> <p>e, E, eval, exec, explain</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- F </p> </td>
+            <td> <p>f, F, filter, flatten, float, foreach, full</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- G </p> </td>
+            <td> <p>generate, group</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- H </p> </td>
+            <td> <p>help</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- I </p> </td>
+            <td> <p>if, illustrate, inner, input, int, into, is</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- J </p> </td>
+            <td> <p>join</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- K </p> </td>
+            <td> <p>kill</p> </td>
+         </tr>   
+         
+         <tr>
+            <td> <p>-- L </p> </td>
+            <td> <p>l, L, left, limit, load, long, ls</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- M </p> </td>
+            <td> <p>map, matches, MAX, MIN, mkdir, mv </p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- N </p> </td>
+            <td> <p>not, null</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- O </p> </td>
+            <td> <p>onschema, or, order, outer, output</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- P </p> </td>
+            <td> <p>parallel, pig, PigDump, PigStorage, pwd</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- Q </p> </td>
+            <td> <p>quit</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- R </p> </td>
+            <td> <p>register, right, rm, rmf, run</p> </td>
+         </tr>  
+
+         <tr>
+            <td> <p>-- S </p> </td>
+            <td> <p>sample, set, ship, SIZE, split, stderr, stdin, stdout, store, stream, SUM</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- T </p> </td>
+            <td> <p>TextLoader, TOKENIZE, through, tuple</p> </td>
+         </tr>  
+         
+         <tr>
+            <td> <p>-- U </p> </td>
+            <td> <p>union, using</p> </td>
+         </tr>  
+
+         <tr>
+            <td> <p>-- V, W, X, Y, Z </p> </td>
+            <td> <p> </p> </td>
+         </tr>  
+                    
+        <tr>
+            <td> <p>-- Symbols</p> </td>
+            <td> <p>= =   !=   &lt;  &gt;   &lt;=   &gt;=   +   -   *   /   %   ?   $   .   #   ::   ( )   [ ]   { } </p> </td>
+         </tr> 
+            
+   </table>
+   </section>
+   
+   
+ <!-- ++++++++++++++++++++++++++++++++++ --> 
+ <!-- CASE SENSITIVITY -->
+   <section>
+   <title>Case Sensitivity</title>
+   <p>The names (aliases) of relations and fields are case sensitive. The names of Pig Latin functions are case sensitive. 
+   The names of parameters (see <a href="cont.html#Parameter+Substitution">Parameter Substitution</a>) and all other Pig Latin keywords (see <a href="#Reserved+Keywords">Reserved Keywords</a>) are case insensitive.</p>
+   <p>In the example below, note the following:</p>
+   <ul>
+      <li>
+         <p>The names (aliases) of relations A, B, and C are case sensitive.</p>
+      </li>
+      <li>
+         <p>The names (aliases) of fields f1, f2, and f3 are case sensitive.</p>
+      </li>
+      <li>
+         <p>Function names PigStorage and COUNT are case sensitive.</p>
+      </li>
+      <li>
+         <p>Keywords LOAD, USING, AS, GROUP, BY, FOREACH, GENERATE, and DUMP are case insensitive. 
+         They can also be written as load, using, as, group, by, etc.</p>
+      </li>
+      <li>
+         <p>In the FOREACH statement, the field in relation B is referred to by positional notation ($0).</p>
+      </li>
+   </ul>
+   <p/>
+
+<source>
+grunt> A = LOAD 'data' USING PigStorage() AS (f1:int, f2:int, f3:int);
+grunt> B = GROUP A BY f1;
+grunt> C = FOREACH B GENERATE COUNT ($0);
+grunt> DUMP C;
+</source>
+</section>
+   
+   
+<!-- DATA TYPES AND MORE-->
+<section>
+<title>Data Types and More</title>
+
+<!-- RELATIONS, BAGS, TUPLES, FIELDS-->
+   <section id="relations">
+   <title>Relations, Bags, Tuples, Fields</title>
+      <p><a href="start.html#Pig+Latin+Statements">Pig Latin statements</a> work with relations. A relation can be defined as follows:</p>
+   <ul>
+      <li>
+         <p>A relation is a bag (more specifically, an outer bag).</p>
+      </li>
+      <li>
+         <p>A bag is a collection of tuples. </p>
+      </li>
+      <li>
+         <p>A tuple is an ordered set of fields.</p>
+      </li>
+      <li>
+         <p>A field is a piece of data.</p>
+      </li>
+   </ul>
+   <p></p>
+   <p>A Pig relation is a bag of tuples. A Pig relation is similar to a table in a relational database, where the tuples in the bag correspond to the rows in a table. Unlike a relational table, however, Pig relations don't require that every tuple contain the same number of fields or that the fields in the same position (column) have the same type.</p>
+   <p>Also note that relations are unordered which means there is no guarantee that tuples are processed in any particular order. Furthermore, processing may be parallelized in which case tuples are not processed according to any total ordering.</p>
+   
+   <section><title>Referencing Relations</title>
+   <p>Relations are referred to by name (or alias). Names are assigned by you as part of the Pig Latin statement. In this example the name (alias) of the relation is A.</p>
+   
+   <source>
+A = LOAD 'student' USING PigStorage() AS (name:chararray, age:int, gpa:float);
+DUMP A;
+(John,18,4.0F)
+(Mary,19,3.8F)
+(Bill,20,3.9F)
+(Joe,18,3.8F)
+</source>
+</section>
+   
+   
+   <!-- +++++++++++++++++++++++++++++++++++++++++++++++ -->
+   <section>
+   <title>Referencing Fields</title>
+   <p>Fields are referred to by positional notation or by name (alias). </p>
+   <ul>
+      <li>
+         <p>Positional notation is generated by the system. Positional notation is indicated with the dollar sign ($) and begins with zero (0); for example, $0, $1, $2. </p>
+      </li>
+      <li>
+         <p>Names are assigned by you using schemas (or, in the case of the GROUP operator and some functions, by the system). You can use any name that is not a Pig keyword; for example, f1, f2, f3 or a, b, c or name, age, gpa.</p>
+      </li>
+   </ul>
+   <p>Given relation A above, the three fields are separated out in this table. </p>
+   <table>
+         <tr>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>First Field</p>
+            </td>
+            <td>
+               <p>Second Field</p>
+            </td>
+            <td>
+               <p>Third Field </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Data type</p>
+            </td>
+            <td>
+               <p>chararray</p>
+            </td>
+            <td>
+               <p>int</p>
+            </td>
+            <td>
+               <p>float</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Positional notation (generated by system) </p>
+            </td>
+            <td>
+               <p>$0</p>
+            </td>
+            <td>
+               <p>$1</p>
+            </td>
+            <td>
+               <p>$2</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Possible name (assigned by you using a schema)</p>
+            </td>
+            <td>
+               <p>name</p>
+            </td>
+            <td>
+               <p>age</p>
+            </td>
+            <td>
+               <p>gpa</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Field value (for the first tuple)</p>
+            </td>
+            <td>
+               <p>John</p>
+            </td>
+            <td>
+               <p>18</p>
+            </td>
+            <td>
+               <p>4.0</p>
+            </td>
+         </tr>
+   </table>
+   
+   <p>As shown in this example when you assign names to fields (using the AS schema clause) you can still refer to the fields using positional notation. However, for debugging purposes and ease of comprehension, it is better to use field names.</p>
+<source>
+A = LOAD 'student' USING PigStorage() AS (name:chararray, age:int, gpa:float);
+X = FOREACH A GENERATE name,$2;
+DUMP X;
+(John,4.0F)
+(Mary,3.8F)
+(Bill,3.9F)
+(Joe,3.8F)
+</source>   
+   
+   <p>In this example an error is generated because the requested column ($3) is outside of the declared schema (positional notation begins with $0). Note that the error is caught before the statements are executed.</p>
+   
+   <source>
+A = LOAD 'data' AS (f1:int,f2:int,f3:int);
+B = FOREACH A GENERATE $3;
+DUMP B;
+2009-01-21 23:03:46,715 [main] ERROR org.apache.pig.tools.grunt.GruntParser - java.io.IOException: 
+Out of bound access. Trying to access non-existent  : 3. Schema {f1: bytearray,f2: bytearray,f3: bytearray} has 3 column(s). 
+<em>etc ... </em></source>
+</section>
+   
+   
+   <!-- +++++++++++++++++++++++++++++++++++++++++++++++ -->
+   <section>
+   <title>Referencing Fields that are Complex Data Types</title>
+   <p>As noted, the fields in a tuple can be any data type, including the complex data types: bags, tuples, and maps. </p>
+   <ul>
+      <li>
+         <p>Use the schemas for complex data types to name fields that are complex data types. </p>
+      </li>
+      <li>
+         <p>Use the dereference operators to reference and work with fields that are complex data types.</p>
+      </li>
+   </ul>
+   <p>In this example the data file contains tuples. A schema for complex data types (in this case, tuples) is used to load the data. Then, dereference operators (the dot in t1.t1a and t2.$0) are used to access the fields in the tuples. Note that when you assign names to fields you can still refer to these fields using positional notation.</p>
+
+   
+   <source>
+cat data;
+(3,8,9) (4,5,6)
+(1,4,7) (3,7,5)
+(2,5,8) (9,5,8)
+
+A = LOAD 'data' AS (t1:tuple(t1a:int, t1b:int,t1c:int),t2:tuple(t2a:int,t2b:int,t2c:int));
+
+DUMP A;
+((3,8,9),(4,5,6))
+((1,4,7),(3,7,5))
+((2,5,8),(9,5,8))
+
+X = FOREACH A GENERATE t1.t1a,t2.$0;
+
+DUMP X;
+(3,4)
+(1,3)
+(2,9)
+</source>
+</section>
+</section>   
+
+<!-- ++++++++++++++++++++++++++++++++++ --> 
+<section>
+<title>Data Types</title>
+
+<section>
+<title>Simple and Complex</title>
+<p></p>
+
+   <table>
+      <tr>
+            <td>
+               <p>Simple Data Types</p>
+            </td>
+            <td>
+               <p>Description</p>
+            </td>
+            <td>
+               <p>Example </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Scalars</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>int</p>
+            </td>
+            <td>
+               <p>Signed 32-bit integer</p>
+            </td>
+            <td>
+               <p>10</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>long</p>
+            </td>
+            <td>
+               <p>Signed 64-bit integer</p>
+            </td>
+            <td>
+               <p>Data:     10L or 10l </p>
+               <p>Display: 10L </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>float</p>
+            </td>
+            <td>
+               <p>32-bit floating point</p>
+            </td>
+            <td>
+               <p>Data:     10.5F or 10.5f or 10.5e2f or 10.5E2F</p>
+               <p>Display: 10.5F or 1050.0F</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>double</p>
+            </td>
+            <td>
+               <p>64-bit floating point</p>
+            </td>
+            <td>
+               <p>Data:     10.5 or 10.5e2 or 10.5E2</p>
+               <p>Display: 10.5 or 1050.0</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Arrays</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>chararray</p>
+            </td>
+            <td>
+               <p>Character array (string) in Unicode UTF-8 format</p>
+            </td>
+            <td>
+               <p>hello world</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>bytearray</p>
+            </td>
+            <td>
+               <p>Byte array (blob)</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Complex Data Types</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>tuple</p>
+            </td>
+            <td>
+               <p>An ordered set of fields.</p>
+            </td>
+            <td>
+               <p>(19,2)</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>bag</p>
+            </td>
+            <td>
+               <p>An collection of tuples.</p>
+            </td>
+            <td>
+               <p>{(19,2), (18,1)}</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>map</p>
+            </td>
+            <td>
+               <p>A set of key value pairs.</p>
+            </td>
+            <td>
+               <p>[open#apache]</p>
+            </td>
+         </tr>
+   </table>
+   
+   <p>Note the following general observations about data types:</p>
+   <ul>
+      <li>
+         <p>Use schemas to assign types to fields.  If you don't assign types, fields default to type bytearray and implicit conversions are applied to the data depending on the context in which that data is used. For example, in relation B, f1 is converted to integer because 5 is integer. In relation C, f1 and f2 are converted to double because we don't know the type of either f1 or f2.</p>
+      <source>
+A = LOAD 'data' AS (f1,f2,f3);
+B = FOREACH A GENERATE f1 + 5;
+C = FOREACH A generate f1 + f2;
+</source>
+      </li>
+   </ul>
+
+   <ul>
+      <li>
+         <p>If a schema is defined as part of a load statement, the load function will attempt to enforce the schema. If the data does not conform to the schema, the loader will generate a null value or an error.</p>
+      <source>
+A = LOAD 'data' AS (name:chararray, age:int, gpa:float);
+</source>
+      </li>
+   </ul>
+   <p></p>
+   <ul>
+      <li>
+         <p>If an explicit cast is not supported, an error will occur. For example, you cannot cast a chararray to int.</p>
+         <source>
+A = LOAD 'data' AS (name:chararray, age:int, gpa:float);
+B = FOREACH A GENERATE (int)name;
+
+This will cause an error …</source>
+      </li>
+   </ul>
+
+   <p></p>
+   <ul>
+      <li>
+         <p>If Pig cannot resolve incompatible types through implicit casts, an error will occur. For example, you cannot add chararray and float (see the Types Table for addition and subtraction).</p>
+      <source>
+A = LOAD 'data' AS (name:chararray, age:int, gpa:float);
+B = FOREACH A GENERATE name + gpa;
+
+This will cause an error …</source>
+      </li>
+   </ul>
+   </section>
+   
+   <section>
+   <title>Tuple</title>
+   <p>A tuple is an ordered set of fields.</p>
+   
+   <section>
+   <title>Syntax </title>
+   <table>
+        <tr>
+            <td>
+               <p>( field [, field …] )  </p>
+            </td>
+        </tr>
+   </table>
+   </section>
+   
+   <section>
+   <title>Terms</title>
+   <table>
+      <tr>
+            <td>
+               <p>(  )</p>
+            </td>
+            <td>
+               <p>A tuple is enclosed in parentheses ( ).</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>field</p>
+            </td>
+            <td>
+               <p>A piece of data. A field can be any data type (including tuple and bag).</p>
+            </td>
+         </tr>
+   </table>
+   </section>
+   
+   <section>
+   <title>Usage</title>
+   <p>You can think of a tuple as a row with one or more fields, where each field can be any data type and any field may or may not have data. If a field has no data, then the following happens:</p>
+   <ul>
+      <li>
+         <p>In a load statement, the loader will inject null into the tuple. The actual value that is substituted for null is loader specific; for example, PigStorage substitutes an empty field for null.</p>
+      </li>
+      <li>
+         <p>In a non-load statement, if a requested field is missing from a tuple, Pig will inject null.</p>
+      </li>
+   </ul>
+   </section>
+   
+   <section>
+   <title>Example</title>
+   <p>In this example the tuple contains three fields.</p>
+   <source>(John,18,4.0F)</source>
+   </section></section>
+   
+   <section>
+   <title>Bag</title>
+   <p>A bag is a collection of tuples.</p>
+   
+   <section>
+   <title>Syntax: Inner bag</title>
+   <table>
+      <tr>
+            <td>
+               <p>{ tuple [, tuple …] }</p>
+            </td>
+         </tr>
+   </table>
+   </section>
+   
+   <section>
+   <title>Terms</title>
+   <table>
+      <tr>
+            <td>
+               <p>{  }</p>
+            </td>
+            <td>
+               <p>An inner bag is enclosed in curly brackets { }.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>tuple</p>
+            </td>
+            <td>
+               <p>A tuple.</p>
+            </td>
+         </tr>
+   </table>
+   </section>
+   
+   <section>
+   <title>Usage </title>
+   <p>Note the following about bags:</p>
+   <ul>
+      <li>
+         <p>A bag can have duplicate tuples.</p>
+      </li>
+      <li>
+         <p>A bag can have tuples with differing numbers of fields. However, if Pig tries to access a field that does not exist, a null value is substituted.</p>
+      </li>
+      <li>
+         <p>A bag can have tuples with fields that have different data types. However, for Pig to effectively process bags, the schemas of the tuples within those bags should be the same. For example, if half of the tuples include chararray fields and while the other half include float fields, only half of the tuples will participate in any kind of computation because the chararray fields will be converted to null.</p>
+         <p></p>
+         <p>Bags have two forms: outer bag (or relation) and inner bag.</p>
+      </li>
+   </ul>
+   </section>
+   
+   <section>
+   <title>Example: Outer Bag</title>
+   <p>In this example A is a relation or bag of tuples. You can think of this bag as an outer bag.</p>
+<source>
+A = LOAD 'data' as (f1:int, f2:int, f3;int);
+DUMP A;
+(1,2,3)
+(4,2,1)
+(8,3,4)
+(4,3,3)
+</source>
+   </section>
+   
+   <section>
+   <title>Example: Inner Bag</title>
+   <p>Now, suppose we group relation A by the first field to form relation X. </p>
+   <p>In this example X is a relation or bag of tuples. The tuples in relation X have two fields. The first field is type int. The second field is type bag; you can think of this bag as an inner bag.</p>
+<source>
+X = GROUP A BY f1;
+DUMP X;
+(1,{(1,2,3)})
+(4,{(4,2,1),(4,3,3)})
+(8,{(8,3,4)})
+</source>
+   </section>
+   </section>
+   
+   <section>
+   <title>Map</title>
+   <p>A map is a set of key value pairs.</p>
+   
+   <section>
+   <title>Syntax (&lt;&gt; denotes optional)</title>
+   <table>
+      <tr>
+            <td>
+               <p>[ key#value &lt;, key#value …&gt; ]</p>
+            </td>
+         </tr>
+   </table>
+   </section>
+   
+   <section>
+   <title>Terms</title>
+   <table>
+      <tr>
+            <td>
+               <p>[ ]</p>
+            </td>
+            <td>
+               <p>Maps are enclosed in straight brackets [ ].</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>#</p>
+            </td>
+            <td>
+               <p>Key value pairs are separated by the pound sign #.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>key</p>
+            </td>
+            <td>
+               <p>Must be chararray data type. Must be a unique value.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>value</p>
+            </td>
+            <td>
+               <p>Any data type.</p>
+            </td>
+         </tr>
+   </table>
+   </section>
+   
+   <section>
+   <title>Usage</title>
+   <p>Key values within a relation must be unique.</p></section>
+   
+   <section>
+   <title>Example</title>
+   <p>In this example the map includes two key value pairs.</p>
+<source>[name#John,phone#5551212]</source>
+</section></section></section> 
+   
+   <!-- ++++++++++++++++++++++++++++++++++ --> 
+   <section id="nulls">
+   <title>Nulls</title>
+   <p>In Pig Latin, nulls are implemented using the SQL definition of null as unknown or non-existent. Nulls can occur naturally in data or can be the result of an operation. </p>
+   <section>
+   <title>Nulls, Operators, and Functions</title>
+   <p>Pig Latin operators and functions interact with nulls as shown in this table.</p>
+   <table>
+      <tr>
+            <td>
+               <p>Operator </p>
+            </td>
+            <td>
+               <p>Interaction </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Comparison operators:</p>
+               <p>==, !=</p>
+               <p>&gt;, &lt;</p>
+               <p>&gt;=, &lt;=</p>
+            </td>
+            <td>
+               <p>If either subexpression is null, the result is null.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Comparison operator:</p>
+               <p>matches </p>
+            </td>
+            <td>
+               <p>If either the string being matched against or the string defining the match is null, the result is null.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Arithmetic operators:</p>
+               <p> + , -, *, /</p>
+               <p>% modulo</p>
+               <p>? : bincond</p>
+            </td>
+            <td>
+               <p>If either subexpression is null, the resulting expression is null.</p>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Null operator:</p>
+               <p>is null </p>
+            </td>
+            <td>
+               <p>If the tested value is null, returns true; otherwise, returns false (see  <a href="#null_operators">Null Operators</a>).</p>
+              
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Null operator:</p>
+               <p>is not null</p>
+            </td>
+            <td>
+               <p>If the tested value is not null, returns true; otherwise, returns false (see  <a href="#null_operators">Null Operators</a>).</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Dereference operators:</p>
+               <p>tuple (.) or map (#)</p>
+            </td>
+            <td>
+               <p>If the de-referenced tuple or map is null, returns null.</p>
+            </td>
+         </tr>
+                           <tr>
+            <td>
+               <p>Operators:</p>
+               <p>COGROUP, GROUP, JOIN</p>
+            </td>
+            <td>
+               <p>These operators handle nulls differently (see examples below).</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Function:</p>
+               <p>COUNT_STAR</p>
+            </td>
+            <td>
+               <p>This function counts all values, including nulls.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Cast operator</p>
+            </td>
+            <td>
+               <p>Casting a null from one type to another type results in a null.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Functions:</p>
+               <p>AVG, MIN, MAX, SUM, COUNT</p>
+            </td>
+            <td>
+               <p>These functions ignore nulls. </p>
+            </td>
+         </tr>
+
+         <tr>
+            <td>
+               <p>Function:</p>
+               <p>CONCAT</p>
+            </td>
+            <td>
+               <p>If either subexpression is null, the resulting expression is null.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Function:</p>
+               <p>SIZE</p>
+            </td>
+            <td>
+               <p>If the tested object is null, returns null.</p>
+            </td>
+         </tr>
+   </table>
+   
+   <p>For Boolean subexpressions, note the results when nulls are used with these operators:</p>
+   <ul>
+      <li>
+         <p>FILTER operator – If a filter expression results in null value, the filter does not pass them through (if X is null, !X is also null, and the filter will reject both).</p>
+      </li>
+      <li>
+      <p>Bincond operator – If a Boolean subexpression results in null value, the resulting expression is null (see the interactions above for Arithmetic operators)</p>
+      </li>
+   </ul>
+   </section>
+   
+   
+   <!-- ++++++++++++++++++++++++++++++++++ --> 
+   <section>
+   <title>Nulls and Constants</title>
+   <p>Nulls can be used as constant expressions in place of expressions of any type.</p>
+   <p>In this example a and null are projected.</p>
+<source>
+A = LOAD 'data' AS (a, b, c).
+B = FOREACH A GENERATE a, null;
+</source>
+  
+   <p>In this example of an outer join, if the join key is missing from a table it is replaced by null.</p>
+<source>
+A = LOAD 'student' AS (name: chararray, age: int, gpa: float);
+B = LOAD 'votertab10k' AS (name: chararray, age: int, registration: chararray, donation: float);
+C = COGROUP A BY name, B BY name;
+D = FOREACH C GENERATE FLATTEN((IsEmpty(A) ? null : A)), FLATTEN((IsEmpty(B) ? null : B));
+</source>
+   
+   <p>Like any other expression, null constants can be implicitly or explicitly cast. </p>
+   <p>In this example both a and null will be implicitly cast to double.</p>
+<source>
+A = LOAD 'data' AS (a, b, c).
+B = FOREACH A GENERATE a + null;
+</source>
+   
+   <p>In this example  both a and null will be cast to int, a implicitly, and null explicitly.</p>
+<source>
+A = LOAD 'data' AS (a, b, c).
+B = FOREACH A GENERATE a + (int)null;
+</source>
+   </section>
+   
+   <section>
+   <title>Operations That Produce Nulls</title>
+   <p>As noted, nulls can be the result of an operation. These operations can produce null values: </p>
+   <ul>
+      <li>
+         <p>Division by zero</p>
+      </li>
+      <li>
+         <p>Returns from user defined functions (UDFs) </p>
+      </li>
+      <li>
+         <p>Dereferencing a field that does not exist.</p>
+      </li>
+      <li>
+         <p>Dereferencing a key that does not exist in a map. For example, given a map, info, containing [name#john, phone#5551212] if a user tries to use info#address a null is returned.</p>
+      </li>
+      <li>
+         <p>Accessing a field that does not exist in a tuple.</p>
+      </li>
+   </ul>
+   
+   <section>
+   <title>Example: Accessing a field that does not exist in a tuple</title>
+   <p>In this example nulls are injected if fields do not have data.</p>
+<source>
+cat data;
+    2   3
+4   
+7   8   9
+
+A = LOAD 'data' AS (f1:int,f2:int,f3:int)
+
+DUMP A;
+(,2,3)
+(4,,)
+(7,8,9)
+
+B = FOREACH A GENERATE f1,f2;
+
+DUMP B;
+(,2)
+(4,)
+(7,8)
+</source>
+   
+   </section></section>
+   
+   <section>
+   <title>Nulls and Load Functions</title>
+   <p>As noted, nulls can occur naturally in the data. If nulls are part of the data, it is the responsibility of the load function to handle them correctly. Keep in mind that what is considered a null value is loader-specific; however, the load function should always communicate null values to Pig by producing Java nulls.</p>
+   <p>The Pig Latin load functions (for example, PigStorage and TextLoader) produce null values wherever data is missing. For example, empty strings (chararrays) are not loaded; instead, they are replaced by nulls.</p>
+   
+   <p>PigStorage is the default load function for the LOAD operator. In this example the is not null operator is used to filter names with null values.</p>
+
+ <source>
+A = LOAD 'student' AS (name, age, gpa); 
+B = FILTER A BY name is not null;
+</source>  
+   </section>
+   
+   <section id="nulls_group">
+   <title>Nulls and GROUP/COGROUP Operators</title>
+   <p>When using the GROUP operator with a single relation, records with a null group key are grouped together.</p>
+   <source>
+A = load 'student' as (name:chararray, age:int, gpa:float);
+dump A;
+(joe,18,2.5)
+(sam,,3.0)
+(bob,,3.5)
+
+X = group A by age;
+dump X;
+(18,{(joe,18,2.5)})
+(,{(sam,,3.0),(bob,,3.5)})
+   </source>
+   
+<p>When using the GROUP (COGROUP) operator with multiple relations, records with a null group key are considered different and are grouped separately. In the example below note that there are two tuples in the output corresponding to the null group key: one that contains tuples from relation A (but not relation B) and one that contains tuples from relation B (but not relation A).</p>
+   
+<source>
+A = load 'student' as (name:chararray, age:int, gpa:float);
+B = load 'student' as (name:chararray, age:int, gpa:float);
+dump B;
+(joe,18,2.5)
+(sam,,3.0)
+(bob,,3.5)
+
+X = cogroup A by age, B by age;
+dump X;
+(18,{(joe,18,2.5)},{(joe,18,2.5)})
+(,{(sam,,3.0),(bob,,3.5)},{})
+(,{},{(sam,,3.0),(bob,,3.5)})
+</source>
+   </section>
+   
+   <section id="nulls_join">
+   <title>Nulls and JOIN Operator</title>
+   <p>The JOIN operator - when performing inner joins - adheres to the SQL standard and disregards (filters out) null values. 
+   (See also <a href="perf.html#Drop+Nulls+Before+a+Join">Drop Nulls Before a Join</a>.)</p>
+<source>
+A = load 'student' as (name:chararray, age:int, gpa:float);
+B = load 'student' as (name:chararray, age:int, gpa:float);
+dump B;
+(joe,18,2.5)
+(sam,,3.0)
+(bob,,3.5)
+  
+X = join A by age, B by age;
+dump X;
+(joe,18,2.5,joe,18,2.5)
+</source>
+   </section>
+   
+   </section>
+  
+  
+   <!-- ++++++++++++++++++++++++++++++++++ -->  
+   <section>
+   <title>Constants</title>
+   <p>Pig provides constant representations for all data types except bytearrays.</p>
+   <table>
+      <tr>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>Constant Example</p>
+            </td>
+            <td>
+               <p>Notes</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Simple Data Types</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Scalars</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>int</p>
+            </td>
+            <td>
+               <p>19</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>long</p>
+            </td>
+            <td>
+               <p>19L</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>float</p>
+            </td>
+            <td>
+               <p>19.2F or 1.92e2f</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>double</p>
+            </td>
+            <td>
+               <p>19.2 or 1.92e2</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Arrays </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>chararray</p>
+            </td>
+            <td>
+               <p>'hello world'</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>bytearray</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>Not applicable.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>Complex Data Types</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>tuple</p>
+            </td>
+            <td>
+               <p>(19, 2, 1)</p>
+            </td>
+            <td>
+               <p>A constant in this form creates a tuple.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>bag</p>
+            </td>
+            <td>
+               <p>{ (19, 2), (1, 2) }</p>
+            </td>
+            <td>
+               <p>A constant in this form creates a bag.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>map</p>
+            </td>
+            <td>
+               <p>[ 'name' # 'John', 'ext' # 5555 ]</p>
+            </td>
+            <td>
+               <p>A constant in this form creates a map.</p>
+            </td>
+         </tr>
+   </table>
+   <p></p>
+   <p>Please note the following:</p>
+   <ul>
+      <li>
+         <p>On UTF-8 systems you can specify string constants consisting of printable ASCII characters such as 'abc'; you can specify control characters such as '\t'; and, you can specify a character in Unicode by starting it with '\u', for instance, '\u0001' represents Ctrl-A in hexadecimal (see Wikipedia <a href="http://en.wikipedia.org/wiki/ASCII">ASCII</a>, <a href="http://en.wikipedia.org/wiki/Unicode">Unicode</a>, and <a href="http://en.wikipedia.org/wiki/UTF-8">UTF-8</a>). In theory, you should be able to specify non-UTF-8 constants on non-UTF-8 systems but as far as we know this has not been tested.</p>
+      </li>
+      <li>
+         <p>To specify a long constant, l or L must be appended to the number (for example, 12345678L). If the l or L is not specified, but the number is too large to fit into an int, the problem will be detected at parse time and the processing is terminated. </p>
+      </li>
+      <li>
+         <p>Any numeric constant with decimal point (for example, 1.5) and/or exponent (for example, 5e+1) is treated as double unless it ends with f or F in which case it is assigned type float (for example,  1.5f). </p>
+      </li>
+   </ul>
+   <p></p>
+   <p>The data type definitions for tuples, bags, and maps apply to constants:</p>
+   <ul>
+      <li>
+         <p>A tuple can contain fields of any data type</p>
+      </li>
+      <li>
+         <p>A bag is a collection of tuples</p>
+      </li>
+      <li>
+         <p>A map key must be a scalar; a map value can be any data type</p>
+      </li>
+   </ul>
+   <p></p>
+   <p>Complex constants (either with or without values) can be used in the same places scalar constants can be used; that is, in FILTER and GENERATE statements.</p>
+
+<source>
+A = LOAD 'data' USING MyStorage() AS (T: tuple(name:chararray, age: int));
+B = FILTER A BY T == ('john', 25);
+D = FOREACH B GENERATE T.name, [25#5.6], {(1, 5, 18)};
+</source>
+   </section>
+   
+   
+    <!-- ++++++++++++++++++++++++++++++++++ --> 
+   <section>
+   <title>Expressions</title>
+   <p>In Pig Latin, expressions are language constructs used with the FILTER, FOREACH, GROUP, and SPLIT operators as well as the eval functions.</p>
+   <p>Expressions are written in conventional mathematical infix notation and are adapted to the UTF-8 character set. Depending on the context, expressions can include:</p>
+   <ul>
+      <li>
+         <p>Any Pig data type (simple data types, complex data types)</p>
+      </li>
+      <li>
+         <p>Any Pig operator (arithmetic, comparison, null, boolean, dereference, sign, and cast)</p>
+      </li>
+      <li>
+         <p>Any Pig built in function.</p>
+      </li>
+      <li>
+         <p>Any user-defined function (UDF) written in Java. </p>
+       </li>
+        </ul>
+        <p></p>
+       <p>In Pig Latin,</p>
+        <ul>
+       <li>
+         <p>An arithmetic expression could look like this:</p>
+         <source>
+X = GROUP A BY f2*f3;
+</source>
+      </li>
+
+      <li>
+         <p></p>
+         <p>A string expression could look like this, where a and b are both chararrays:</p>
+         <source>
+X = FOREACH A GENERATE CONCAT(a,b);
+</source>
+      </li>
+
+      <li>
+         <p></p>
+         <p>A boolean expression could look like this:</p>
+         <source>
+X = FILTER A BY (f1==8) OR (NOT (f2+f3 &gt; f1));
+</source>
+      </li>
+   </ul>
+
+      <section id="fexp">
+          <title>Field expressions</title>
+          <p>Field expressions represent a field or a <a href="#deref">dereference operator</a> applied to a field.</p>
+      </section>
+
+      <section id="sexp">
+          <title>Star expression</title>
+          <p>The star symbol, *, can be used to represent all the fields of a tuple. It is equivalent to writing out the fields explicitly. In the following example the definition of B and C are exactly the same, and MyUDF will be invoked with exactly the same arguments in both cases.</p>
+          <source>
+A = LOAD 'data' USING MyStorage() AS (name:chararray, age: int);
+B = FOREACH A GENERATE *, MyUDF(name, age);
+C = FOREACH A GENERATE name, age, MyUDF(*);
+          </source>
+          <p>A common error when using the star expression is the following:</p>
+          <source>
+G = GROUP A BY $0;
+C = FOREACH G GENERATE COUNT(*)
+          </source>
+          <p>In this example, the programmer really wants to count the number of elements in the bag in the second field: COUNT($1).</p>
+      </section>
+
+      <section id="bexp">
+          <title>Boolean expressions</title>
+          <p>Boolean expressions can be made up of UDFs that return a boolean value or boolean operators 
+          (see <a href="#boolops">Boolean Operators</a>). 
+          </p>
+      </section>
+           
+      <section id="texp">
+          <title>Tuple expressions</title>
+          <p>Tuple expressions form subexpressions into tuples. The tuple expression has the form (expression [, expression …]), where expression is a general expression. The simplest tuple expression is the star expression, which represents all fields.
+          </p>
+      </section>
+
+    <section id="gexp">
+          <title>General expressions</title>
+          <p>General expressions can be made up of UDFs and almost any operator. Since Pig does not consider boolean a base type, the result of a general expression cannot be a boolean. Field expressions are the simpliest general expressions.
+          </p>
+      </section>
+   </section>
+   
+   
+    <!-- ++++++++++++++++++++++++++++++++++ --> 
+   <section>
+   <title>Schemas</title>
+   <p>Schemas enable you to assign names to and declare types for fields. Schemas are optional but we encourage you to use them whenever possible; type declarations result in better parse-time error checking and more efficient code execution. </p>
+   <p>Schemas are defined using the AS keyword with the LOAD, STREAM, and FOREACH operators. If you define a schema using the LOAD operator, then it is the load function that enforces the schema (see the <a href="#LOAD">LOAD</a> operator and <a href="udf.html">User Defined Functions</a> for more information).</p>
+   <p>Note the following:</p>
+   <ul>
+      <li>
+         <p>You can define a schema that includes both the field name and field type.</p>
+      </li>
+      <li>
+         <p>You can define a schema that includes the field name only; in this case, the field type defaults to bytearray.</p>
+      </li>
+      <li>
+         <p>You can choose not to define a schema; in this case, the field is un-named and the field type defaults to bytearray.</p>
+      </li>
+   </ul>
+   <p>If you assign a name to a field, you can refer to that field using the name or by positional notation. If you don't assign a name to a field (the field is un-named) you can only refer to the field using positional notation.</p>
+   <p>If you assign a type to a field, you can subsequently change the type using the cast operators. If you don't assign a type to a field, the field defaults to bytearray; you can change the default type using the cast operators.</p>
+   
+   <section>
+   <title>Schemas with LOAD and STREAM Statements</title>
+   <p>With LOAD and STREAM statements, the schema following the AS keyword must be enclosed in parentheses.</p>
+   <p>In this example the LOAD statement includes a schema definition for simple data types.</p>
+<source>
+A = LOAD 'data' AS (f1:int, f2:int);
+</source>   
+   </section>
+   
+   <section id="schemaforeach">
+   <title>Schemas with FOREACH Statements</title>
+   <p>With FOREACH statements, the schema following the AS keyword must be enclosed in parentheses when the FLATTEN operator is used. Otherwise, the schema should not be enclosed in parentheses.</p>
+   <p>In this example the FOREACH statement includes FLATTEN and a schema for simple data types.</p>
+<source>
+X = FOREACH C GENERATE FLATTEN(B) AS (f1:int, f2:int, f3:int), group;
+</source>  
+   <p>In this example the FOREACH statement includes a schema for simple expression.</p>
+<source>
+X = FOREACH A GENERATE f1+f2 AS x1:int;
+</source>   
+   <p>In this example the FOREACH statement includes a schemas for multiple fields.</p>
+<source>
+X = FOREACH A GENERATE f1 as user, f2 as age, f3 as gpa;
+</source> 
+   </section>
+   
+   <section>
+   <title>Schemas for Simple Data Types</title>
+   <p>Simple data types include int, long, float, double, chararray, and bytearray.</p>
+   
+   <section>
+   <title>Syntax</title>
+   <table>
+      <tr>
+            <td>
+               <p>(alias[:type]) [, (alias[:type]) …] )</p>
+            </td>
+         </tr>
+   </table></section>
+   
+   <section>
+   <title>Terms</title>
+   <table>
+      <tr>
+            <td>
+               <p>alias</p>
+            </td>
+            <td>
+               <p>The name assigned to the field.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>type</p>
+            </td>
+            <td>
+               <p>(Optional) The simple data type assigned to the field.</p>
+               <p>The alias and type are separated by a colon ( : ).</p>
+               <p>If the type is omitted, the field defaults to type bytearray.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>( , )</p>
+            </td>
+            <td>
+               <p>Multiple fields are enclosed in parentheses and separated by commas.</p>
+            </td>
+         </tr>
+   </table></section>
+   
+   <section>
+   <title>Examples</title>
+   <p>In this example the schema defines multiple types.</p>
+<source>
+cat student;
+John	18	4.0
+Mary	19   	3.8
+Bill	20   	3.9
+Joe	18   	3.8
+
+A = LOAD 'student' AS (name:chararray, age:int, gpa:float);
+
+DESCRIBE A;
+A: {name: chararray,age: int,gpa: float}
+
+DUMP A;
+(John,18,4.0F)
+(Mary,19,3.8F)
+(Bill,20,3.9F)
+(Joe,18,3.8F)
+</source>
+   
+   <p>In this example field "gpa" will default to bytearray because no type is declared. </p>
+<source>
+cat student;
+John	18	4.0
+Mary	19	3.8
+Bill	20	3.9
+Joe	18	3.8
+
+A = LOAD 'data' AS (name:chararray, age:int, gpa);
+
+DESCRIBE A;
+A: {name: chararray,age: int,gpa: bytearray}
+
+DUMP A;
+(John,18,4.0)
+(Mary,19,3.8)
+(Bill,20,3.9)
+(Joe,18,3.8)
+</source>
+   
+   </section></section>
+   
+   <section>
+   <title>Schemas for Complex Data Types</title>
+   <p>Complex data types include tuples, bags, and maps.</p></section>
+   
+   <section>
+   <title>Tuple Schema</title>
+   <p>A tuple is an ordered set of fields.</p>
+   
+   <section>
+   <title>Syntax</title>
+   <table>
+      <tr>
+            <td>
+               <p>alias[:tuple] (alias[:type]) [, (alias[:type]) …] )</p>
+            </td>
+         </tr>
+   </table>
+   </section>
+   
+   <section>
+   <title>Terms</title>
+   <table>
+      <tr>
+            <td>
+               <p>alias</p>
+            </td>
+            <td>
+               <p>The name assigned to the tuple.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>:tuple</p>
+            </td>
+            <td>
+               <p>(Optional) The data type, tuple (case insensitive).</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>( )</p>
+            </td>
+            <td>
+               <p>The designation for a tuple, a set of parentheses.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>alias[:type]</p>
+            </td>
+            <td>
+               <p>The constituents of the tuple, where the schema definition rules for the corresponding type applies to the constituents of the tuple:</p>
+               <ul>
+                  <li>
+                     <p>alias – the name assigned to the field</p>
+                  </li>
+                  <li>
+                     <p>type (optional) – the simple or complex data type assigned to the field</p>
+                  </li>
+               </ul>
+            </td>
+         </tr>
+   </table>
+   </section>
+   
+   <section>
+   <title>Examples</title>
+   <p>In this example the schema defines one tuple. The load statements are equivalent.</p>
+
+ <source>
+cat data;
+(3,8,9)
+(1,4,7)
+(2,5,8)
+
+A = LOAD 'data' AS (T: tuple (f1:int, f2:int, f3:int));
+A = LOAD 'data' AS (T: (f1:int, f2:int, f3:int));
+
+DESCRIBE A;
+A: {T: (f1: int,f2: int,f3: int)}
+
+DUMP A;
+((3,8,9))
+((1,4,7))
+((2,5,8))
+</source>
+   
+   <p>In this example the schema defines two tuples.</p>
+<source>
+cat data;
+(3,8,9) (mary,19)
+(1,4,7) (john,18)
+(2,5,8) (joe,18)
+
+A = LOAD data AS (F:tuple(f1:int,f2:int,f3:int),T:tuple(t1:chararray,t2:int));
+
+DESCRIBE A;
+A: {F: (f1: int,f2: int,f3: int),T: (t1: chararray,t2: int)}
+
+DUMP A;
+((3,8,9),(mary,19))
+((1,4,7),(john,18))
+((2,5,8),(joe,18))
+</source>
+   </section></section>
+   
+   <section>
+   <title>Bag Schema</title>
+   <p>A bag is a collection of tuples.</p>
+   
+   <section>
+   <title>Syntax</title>
+   <table>
+      <tr>
+            <td>
+               <p>alias[:bag] {tuple} </p>
+            </td>
+         </tr>
+   </table>
+   </section>
+   
+   <section>
+   <title>Terms</title>
+   <table>
+      <tr>
+            <td>
+               <p>alias</p>
+            </td>
+            <td>
+               <p>The name assigned to the bag.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>:bag</p>
+            </td>
+            <td>
+               <p>(Optional) The data type, bag (case insensitive).</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>{ }</p>
+            </td>
+            <td>
+               <p>The designation for a bag, a set of curly brackets.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>tuple</p>
+            </td>
+            <td>
+               <p>A tuple (see Tuple Schema).</p>
+            </td>
+         </tr>
+   </table>
+   </section>
+   
+   <section>
+   <title>Examples</title>
+   <p>In this example the schema defines a bag. The two load statements are equivalent.</p>
+<source>
+cat data;
+{(3,8,9)}
+{(1,4,7)}
+{(2,5,8)}
+
+A = LOAD 'data' AS (B: bag {T: tuple(t1:int, t2:int, t3:int)});
+A = LOAD 'data' AS (B: {T: (t1:int, t2:int, t3:int)});
+
+DESCRIBE A:
+A: {B: {T: (t1: int,t2: int,t3: int)}}
+
+DUMP A;
+({(3,8,9)})
+({(1,4,7)})
+({(2,5,8)})
+</source>
+   </section></section>
+   
+   <section>
+   <title>Map Schema</title>
+   <p>A map is a set of key value pairs.</p>
+   
+   <section>
+   <title>Syntax (where &lt;&gt; means optional)</title>
+   <table>
+      <tr>
+            <td>
+               <p>alias&lt;:map&gt; [ ] </p>
+            </td>
+         </tr>
+   </table></section>
+   
+   <section>
+   <title>Terms</title>
+   <table>
+      <tr>
+            <td>
+               <p>alias</p>
+            </td>
+            <td>
+               <p>The name assigned to the map.</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>:map</p>
+            </td>
+            <td>
+               <p>(Optional) The data type, map (case insensitive).</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>[ ]   </p>
+            </td>
+            <td>
+               <p>The designation for a map, a set of straight brackets [ ].</p>
+            </td>
+         </tr>
+   </table>
+   </section>
+   
+   <section>
+   <title>Example</title>
+   <p>In this example the schema defines a map. The load statements are equivalent.</p>
+<source>
+cat data;
+[open#apache]
+[apache#hadoop]
+
+A = LOAD 'data' AS (M:map []);
+A = LOAD 'data' AS (M:[]);
+
+DESCRIBE A;
+a: {M: map[ ]}
+
+DUMP A;
+([open#apache])
+([apache#hadoop])
+</source>
+    </section></section>
+   
+   <section>
+   <title>Schemas for Multiple Types</title>
+   <p>You can define schemas for data that includes multiple types.</p>
+   
+   <section>
+   <title>Example</title>
+   <p>In this example the schema defines a tuple, bag, and map.</p>
+<source>
+A = LOAD 'mydata' AS (T1:tuple(f1:int, f2:int), B:bag{T2:tuple(t1:float,t2:float)}, M:map[] );
+
+A = LOAD 'mydata' AS (T1:(f1:int, f2:int), B:{T2:(t1:float,t2:float)}, M:[] );
+</source>
+</section></section></section>
+
+
+
+   
+   </section>
+   
+   
+<!-- ARITHMETIC OPERATORS, ETC -->
+<section>
+	<title>Arithmetic Operators and More</title>
+
+<section>
+<title>Arithmetic Operators</title>
+
+<section>
+<title>Description</title>
+   <table>
+      <tr>
+            <td>
+               <p>Operator</p>
+            </td>
+            <td>
+               <p>Symbol</p>
+            </td>
+            <td>
+               <p> Notes</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>addition </p>
+            </td>
+            <td>
+               <p>+</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>subtraction </p>
+            </td>
+            <td>
+               <p>-</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>multiplication  </p>
+            </td>
+            <td>
+               <p>*</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>division  </p>
+            </td>
+            <td>
+               <p>/</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>modulo  </p>
+            </td>
+            <td>
+               <p>%</p>
+            </td>
+            <td>
+               <p>Returns the remainder of a divided by b (a%b).</p>
+               <p>Works with integral numbers (int, long). </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>bincond </p>
+            </td>
+            <td>
+               <p>? :</p>
+            </td>
+            <td>
+               <p>(condition ? value_if_true : value_if_false) </p>
+               <p>The bincond should be enclosed in parenthesis. </p>
+               <p>The schemas for the two conditional outputs of the bincond should match.</p>
+               <p>Use expressions  only (relational operators are not allowed).</p>
+            </td>
+          </tr>
+   </table>
+
+   <section>
+   <title>Examples </title>
+   <p>Suppose we have relation A.</p>
+<source>
+A = LOAD 'data' AS (f1:int, f2:int, B:bag{T:tuple(t1:int,t2:int)});
+
+DUMP A;
+(10,1,{(2,3),(4,6)})
+(10,3,{(2,3),(4,6)})
+(10,6,{(2,3),(4,6),(5,7)})
+</source>
+
+  <p>In this example the modulo operator is used with fields f1 and f2.</p>
+<source>
+X = FOREACH A GENERATE f1, f2, f1%f2;
+
+DUMP X;
+(10,1,0)
+(10,3,1)
+(10,6,4)
+</source>
+   
+   <p>In this example the bincond operator is used with fields f2 and B. The condition is "f2 equals 1"; if the condition is true, return 1; if the condition is false, return the count of the number of tuples in B.</p>
+<source>
+X = FOREACH A GENERATE f2, (f2==1?1:COUNT(B));
+
+DUMP X;
+(1,1L)
+(3,2L)
+(6,3L)
+</source>
+   </section>
+   
+   <section>
+   <title> Types Table: addition (+) and subtraction (-) operators</title>
+   <p>* bytearray cast as this data type</p>
+   <table>
+         <tr>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>bag </p>
+            </td>
+            <td>
+               <p>tuple </p>
+            </td>
+            <td>
+               <p>map </p>
+            </td>
+            <td>
+               <p>int </p>
+            </td>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p>chararray </p>
+            </td>
+            <td>
+               <p>bytearray </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>bag </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>tuple </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>not yet </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>map </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>int </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>int </p>
+            </td>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>cast as int </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>cast as long  </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>cast as float  </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>cast as double   </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>chararray </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>bytearray </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>cast as double </p>
+            </td>
+         </tr>
+   </table>
+   </section>
+   
+   <section>
+   <title>Types Table: multiplication (*) and division (/) operators</title>
+   <p>* bytearray cast as this data type</p>
+   <table>
+         <tr>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>bag </p>
+            </td>
+            <td>
+               <p>tuple </p>
+            </td>
+            <td>
+               <p>map </p>
+            </td>
+            <td>
+               <p>int </p>
+            </td>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p>chararray </p>
+            </td>
+            <td>
+               <p>bytearray </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>bag </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>not yet </p>
+            </td>
+            <td>
+               <p>not yet </p>
+            </td>
+            <td>
+               <p>not yet </p>
+            </td>
+            <td>
+               <p>not yet </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>tuple </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>not yet </p>
+            </td>
+            <td>
+               <p>not yet </p>
+            </td>
+            <td>
+               <p>not yet </p>
+            </td>
+            <td>
+               <p>not yet </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>map </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>int </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>int </p>
+            </td>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>cast as int </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>cast as long </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>cast as float </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>cast as double  </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>chararray </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>bytearray </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>cast as double  </p>
+            </td>
+         </tr>
+   </table>
+   </section>
+   
+   <section>
+   <title>Types Table: modulo (%) operator</title>
+   <table>
+         <tr>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>int </p>
+            </td>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p>bytearray </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>int </p>
+            </td>
+            <td>
+               <p>int </p>
+            </td>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p>cast as int </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p>cast as long </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>bytearray </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+   </table>
+   </section></section></section>
+   
+   <section>
+   <title>Comparison Operators</title>
+      
+    <section><title>Description</title>
+   <table>
+      <tr>
+            <td>
+               <p>Operator</p>
+            </td>
+            <td>
+               <p>Symbol</p>
+            </td>
+            <td>
+               <p> Notes</p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>equal  </p>
+            </td>
+            <td>
+               <p>==</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>not equal </p>
+            </td>
+            <td>
+               <p>!=</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>less than  </p>
+            </td>
+            <td>
+               <p>&lt;</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>greater than </p>
+            </td>
+            <td>
+               <p>&gt;</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>less than or equal to  </p>
+            </td>
+            <td>
+               <p>&lt;=</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>greater than or equal to</p>
+            </td>
+            <td>
+               <p>&gt;=</p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>pattern matching  </p>
+            </td>
+            <td>
+               <p>matches</p>
+            </td>
+            <td>
+               <p>Regular expression matching.  Use the Java <a href="http://java.sun.com/j2se/1.5.0/docs/api/java/util/regex/Pattern.html">format</a> for regular expressions.</p>
+            </td>
+         </tr>
+   </table>
+   <p>Use the comparison operators with numeric and string data.</p>
+   
+   <section>
+   <title>Example: numeric</title>
+<source>
+X = FILTER A BY (f1 == 8);
+</source>
+   </section>
+   
+   <section>
+   <title>Example: string</title>
+<source>
+X = FILTER A BY (f2 == 'apache');
+</source>
+   </section>
+   
+   <section>
+   <title>Example: matches</title>
+<source>
+X = FILTER A BY (f1 matches '.*apache.*');
+</source>
+   </section>
+   
+   <section>
+   <title>Types Table: equal (==) and not equal (!=) operators</title>
+   <p>* bytearray cast as this data type</p>
+   <table>
+         <tr>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>bag </p>
+            </td>
+            <td>
+               <p>tuple </p>
+            </td>
+            <td>
+               <p>map </p>
+            </td>
+            <td>
+               <p>int </p>
+            </td>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p>chararray </p>
+            </td>
+            <td>
+               <p>bytearray </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>bag </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>tuple </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>boolean (see Note 1) </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>map </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>boolean </p>
+               <p>(see Note 2)</p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>int </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>cast as boolean </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>cast as boolean </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>cast as boolean  </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>cast as boolean  </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>chararray </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>cast as boolean </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>bytearray </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+         </tr>
+   </table>
+   <p>Note 1: boolean (Tuple A is equal to tuple B if they have the same size s, and for all 0 &lt;= i &lt; s A[i] = = B[i])</p>
+   <p>Note 2: boolean (Map A is equal to map B if A and B have the same number of entries, and for every key k1 in A with a value of v1, there is a key k2 in B with a value of v2, such that k1 = = k2 and v1 = = v2)</p>
+   </section>
+   
+   <section>
+   <title></title>
+   <table>
+         <tr>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>bag </p>
+            </td>
+            <td>
+               <p>tuple </p>
+            </td>
+            <td>
+               <p>map </p>
+            </td>
+            <td>
+               <p>int </p>
+            </td>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p>double </p>
+            </td>
+            <td>
+               <p>chararray </p>
+            </td>
+            <td>
+               <p>bytearray </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>bag </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>tuple </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>map </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>int </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>boolean (bytearray cast as int) </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>long </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>error </p>
+            </td>
+            <td>
+               <p>boolean (bytearray cast as long) </p>
+            </td>
+         </tr>
+         <tr>
+            <td>
+               <p>float </p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p></p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>
+            <td>
+               <p>boolean </p>
+            </td>

[... 4039 lines stripped ...]