You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2013/07/24 18:01:18 UTC

svn commit: r1506605 - in /pig/trunk: ./ src/docs/src/documentation/content/xdocs/ src/org/apache/pig/builtin/ test/org/apache/pig/test/

Author: cheolsoo
Date: Wed Jul 24 16:01:17 2013
New Revision: 1506605

URL: http://svn.apache.org/r1506605
Log:
PIG-3182: Pig currently lacks functions to trim the whitespace only on one hand side (sarutak via cheolsoo)

Added:
    pig/trunk/src/org/apache/pig/builtin/LTRIM.java
    pig/trunk/src/org/apache/pig/builtin/RTRIM.java
Modified:
    pig/trunk/CHANGES.txt
    pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
    pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml
    pig/trunk/test/org/apache/pig/test/TestBuiltin.java
    pig/trunk/test/org/apache/pig/test/TestStringUDFs.java

Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1506605&r1=1506604&r2=1506605&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Jul 24 16:01:17 2013
@@ -28,6 +28,8 @@ PIG-3174: Remove rpm and deb artifacts f
 
 IMPROVEMENTS
 
+PIG-3182: Pig currently lacks functions to trim the whitespace only on one hand side (sarutak via cheolsoo)
+
 PIG-3163: Pig current releases lack a UDF endsWith. This UDF tests if a given string ends with the specified suffix (sriramkrishnan via cheolsoo)
 
 PIG-3015: Rewrite of AvroStorage (jadler via cheolsoo)

Modified: pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/func.xml?rev=1506605&r1=1506604&r2=1506605&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/func.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/func.xml Wed Jul 24 16:01:17 2013
@@ -3332,6 +3332,43 @@ Use the LOWER function to convert all ch
 </section>
 </section> 
 
+<!-- ======================================================== -->  
+ <section id="ltrim">
+   <title>LTRIM</title>
+   <p>Returns a copy of a string with only leading white space removed.</p>
+
+<section>
+   <title>Syntax</title>
+   <table>
+       <tr>
+            <td>
+               <p>LTRIM(expression)</p>
+            </td>
+         </tr> 
+        
+   </table></section>
+   
+<section>
+   <title>Terms</title>
+   <table>
+       <tr>
+            <td>
+               <p>expression</p>
+            </td>
+            <td>
+               <p>An expression whose result is chararray. </p>
+            </td>
+         </tr> 
+   </table>
+</section>
+
+<section>
+     <title>Usage</title>
+     <p>
+Use the LTRIM function to remove leading white space from a string.
+     </p>
+</section>
+</section> 
 
 <!-- ======================================================== -->
  <section id="regex-extract">
@@ -3535,6 +3572,44 @@ by prefixing them with double backslashe
 </section> 
 
 <!-- ======================================================== -->  
+ <section id="rtrim">
+   <title>RTRIM</title>
+   <p>Returns a copy of a string with only trailing white space removed.</p>
+
+<section>
+   <title>Syntax</title>
+   <table>
+       <tr>
+            <td>
+               <p>RTRIM(expression)</p>
+            </td>
+         </tr> 
+        
+   </table></section>
+   
+<section>
+   <title>Terms</title>
+   <table>
+       <tr>
+            <td>
+               <p>expression</p>
+            </td>
+            <td>
+               <p>An expression whose result is chararray. </p>
+            </td>
+         </tr> 
+   </table>
+</section>
+
+<section>
+     <title>Usage</title>
+     <p>
+Use the RTRIM function to remove trailing white space from a string.
+     </p>
+</section>
+</section> 
+
+<!-- ======================================================== -->  
  <section id="strsplit">
    <title>STRSPLIT</title>
    <p>Splits a string around matches of a given regular expression. </p>

Modified: pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml?rev=1506605&r1=1506604&r2=1506605&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml Wed Jul 24 16:01:17 2013
@@ -556,6 +556,8 @@
 
 <p><a href="func.html#lower">LOWER</a> function</p>
 
+<p><a href="func.html#ltrim">LTRIM</a> function</p>
+
 <!-- ==== M ================================================================== -->
 <p></p>
 <p id="m-index"><strong>M</strong> (<a href="#top">top</a>) ----------------------------------------------</p>
@@ -836,6 +838,8 @@
 
 <p><a href="func.html#round">ROUND</a> function</p>
 
+<p><a href="func.html#rtrim">RTRIM</a> function</p>
+
 <p><a href="cmds.html#run">run</a> command</p>
 
 <p><a href="start.html#run">running Pig</a>

Added: pig/trunk/src/org/apache/pig/builtin/LTRIM.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/LTRIM.java?rev=1506605&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/LTRIM.java (added)
+++ pig/trunk/src/org/apache/pig/builtin/LTRIM.java Wed Jul 24 16:01:17 2013
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.PigWarning;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.FuncSpec;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+
+/**
+ * Returns a string, with only leading whitespace omitted.
+ * Implements a binding to the Java function {@link java.lang.String#trim() String.trim()}.
+ */
+public class LTRIM extends EvalFunc<String> {
+    @Override
+    public String exec(Tuple input) throws IOException {
+        if (input == null || input.size() == 0) {
+            return null;
+        }
+        try {
+            String str = (String) input.get(0);
+            if (str == null) return null;
+            if (str.length() == 0) return str;
+            return str.replaceFirst("^ +", "");
+        } catch (ExecException e) {
+            warn("Error reading input: " + e.getMessage(), PigWarning.UDF_WARNING_1);
+            return null;
+        }
+    }
+
+    @Override
+    public Schema outputSchema(Schema input) {
+        return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.pig.EvalFunc#getArgToFuncMapping()
+     */
+    @Override
+    public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
+        List<FuncSpec> funcList = new ArrayList<FuncSpec>();
+        funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY))));
+
+        return funcList;
+    }
+
+}

Added: pig/trunk/src/org/apache/pig/builtin/RTRIM.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/RTRIM.java?rev=1506605&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/RTRIM.java (added)
+++ pig/trunk/src/org/apache/pig/builtin/RTRIM.java Wed Jul 24 16:01:17 2013
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.PigWarning;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.FuncSpec;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+
+/**
+ * Returns a string, with only tailing whitespace omitted.
+ * Implements a binding to the Java function {@link java.lang.String#trim() String.trim()}.
+ */
+public class RTRIM extends EvalFunc<String> {
+    @Override
+    public String exec(Tuple input) throws IOException {
+        if (input == null || input.size() == 0) {
+            return null;
+        }
+        try {
+            String str = (String) input.get(0);
+            if (str == null) return null;
+            if (str.length() == 0) return str;
+            return str.replaceFirst(" +$", "");
+        } catch (ExecException e) {
+            warn("Error reading input: " + e.getMessage(), PigWarning.UDF_WARNING_1);
+            return null;
+        }
+    }
+
+    @Override
+    public Schema outputSchema(Schema input) {
+        return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
+    }
+
+    /* (non-Javadoc)
+     * @see org.apache.pig.EvalFunc#getArgToFuncMapping()
+     */
+    @Override
+    public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
+        List<FuncSpec> funcList = new ArrayList<FuncSpec>();
+        funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY))));
+
+        return funcList;
+    }
+
+}

Modified: pig/trunk/test/org/apache/pig/test/TestBuiltin.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestBuiltin.java?rev=1506605&r1=1506604&r2=1506605&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestBuiltin.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestBuiltin.java Wed Jul 24 16:01:17 2013
@@ -72,6 +72,7 @@ import org.apache.pig.builtin.KEYSET;
 import org.apache.pig.builtin.LAST_INDEX_OF;
 import org.apache.pig.builtin.LCFIRST;
 import org.apache.pig.builtin.LOWER;
+import org.apache.pig.builtin.LTRIM;
 import org.apache.pig.builtin.MapSize;
 import org.apache.pig.builtin.MilliSecondsBetween;
 import org.apache.pig.builtin.MinutesBetween;
@@ -80,6 +81,7 @@ import org.apache.pig.builtin.PigStorage
 import org.apache.pig.builtin.REGEX_EXTRACT;
 import org.apache.pig.builtin.REGEX_EXTRACT_ALL;
 import org.apache.pig.builtin.REPLACE;
+import org.apache.pig.builtin.RTRIM;
 import org.apache.pig.builtin.SIZE;
 import org.apache.pig.builtin.STRSPLIT;
 import org.apache.pig.builtin.SUBSTRING;
@@ -1546,7 +1548,9 @@ public class TestBuiltin {
         String inputStrUpper = "HELLO WORLD!";
         String inputStrCamel = "hello World!";
         String inputStroWitha = "Hella Warld!";
-        String inputStrExtra = "Hello World!   ";
+        String inputStrSpaceRight = "Hello World!   ";
+        String inputStrSpaceLeft = "   Hello World!";
+        String inputStrSpaceBoth = "   Hello World!   ";
 
         List<Object> l = new LinkedList<Object>();
         l.add(inputStr);
@@ -1614,7 +1618,19 @@ public class TestBuiltin {
         assertTrue(output.equals(expected));
 
         strFunc = new TRIM();
-        input = TupleFactory.getInstance().newTuple(inputStrExtra);
+        input = TupleFactory.getInstance().newTuple(inputStrSpaceBoth);
+        expected = inputStr;
+        output = strFunc.exec(input);
+        assertTrue(output.equals(expected));
+
+        strFunc = new LTRIM();
+        input = TupleFactory.getInstance().newTuple(inputStrSpaceLeft);
+        expected = inputStr;
+        output = strFunc.exec(input);
+        assertTrue(output.equals(expected));
+
+        strFunc = new RTRIM();
+        input = TupleFactory.getInstance().newTuple(inputStrSpaceRight);
         expected = inputStr;
         output = strFunc.exec(input);
         assertTrue(output.equals(expected));

Modified: pig/trunk/test/org/apache/pig/test/TestStringUDFs.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestStringUDFs.java?rev=1506605&r1=1506604&r2=1506605&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestStringUDFs.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestStringUDFs.java Wed Jul 24 16:01:17 2013
@@ -34,6 +34,8 @@ import org.apache.pig.builtin.ENDSWITH;
 import org.apache.pig.builtin.STRSPLIT;
 import org.apache.pig.builtin.SUBSTRING;
 import org.apache.pig.builtin.TRIM;
+import org.apache.pig.builtin.LTRIM;
+import org.apache.pig.builtin.RTRIM;
 import org.apache.pig.builtin.EqualsIgnoreCase;
 import org.apache.pig.data.Tuple;
 import org.apache.pig.data.TupleFactory;
@@ -135,13 +137,57 @@ public class TestStringUDFs {
         Tuple testTuple = Util.buildTuple("nospaces");
         assertEquals("nospaces".trim(), trim.exec(testTuple));
         
-        testTuple = Util.buildTuple("spaces    ");
-        assertEquals("spaces     ".trim(), trim.exec(testTuple));
+        testTuple = Util.buildTuple("spaces right    ");
+        assertEquals("spaces right", trim.exec(testTuple));
+        
+        testTuple = Util.buildTuple("    spaces left");
+        assertEquals("spaces left", trim.exec(testTuple));
+        
+        testTuple = Util.buildTuple("    spaces both    ");
+        assertEquals("spaces both", trim.exec(testTuple));
+        
+        testTuple = TupleFactory.getInstance().newTuple();
+        assertNull(trim.exec(testTuple));
+    }
+
+    @Test
+    public void testLtrim() throws IOException {
+        LTRIM trim = new LTRIM();
+        Tuple testTuple = Util.buildTuple("nospaces");
+        assertEquals("nospaces", trim.exec(testTuple));
+        
+        testTuple = Util.buildTuple("spaces right    ");
+        assertEquals("spaces right    ", trim.exec(testTuple));
+        
+        testTuple = Util.buildTuple("    spaces left");
+        assertEquals("spaces left", trim.exec(testTuple));
+        
+        testTuple = Util.buildTuple("    spaces both    ");
+        assertEquals("spaces both    ", trim.exec(testTuple));
         
         testTuple = TupleFactory.getInstance().newTuple();
         assertNull(trim.exec(testTuple));
     }
     
+    @Test
+    public void testRtrim() throws IOException {
+        RTRIM trim = new RTRIM();
+        Tuple testTuple = Util.buildTuple("nospaces");
+        assertEquals("nospaces", trim.exec(testTuple));
+        
+        testTuple = Util.buildTuple("spaces right    ");
+        assertEquals("spaces right", trim.exec(testTuple));
+        
+        testTuple = Util.buildTuple("    spaces left");
+        assertEquals("    spaces left", trim.exec(testTuple));
+        
+        testTuple = Util.buildTuple("    spaces both    ");
+        assertEquals("    spaces both", trim.exec(testTuple));
+        
+        testTuple = TupleFactory.getInstance().newTuple();
+        assertNull(trim.exec(testTuple));
+    }
+
     @Test 
     public void testSplit() throws IOException {
         STRSPLIT splitter = new STRSPLIT();
@@ -204,7 +250,7 @@ public class TestStringUDFs {
 
     @Test
     public void testEqualsIgnoreCase() throws IOException {
-    	EqualsIgnoreCase equalsIgnoreCase = new EqualsIgnoreCase ();
+        EqualsIgnoreCase equalsIgnoreCase = new EqualsIgnoreCase ();
         Tuple testTuple = Util.buildTuple("ABC","abc");
         assertEquals("Strings are NOT equalsIgnoreCase", "ABC".equalsIgnoreCase("abc"), equalsIgnoreCase.exec(testTuple));
         testTuple = Util.buildTuple("ABC", "aBC");