You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ch...@apache.org on 2013/07/24 18:01:18 UTC
svn commit: r1506605 - in /pig/trunk: ./
src/docs/src/documentation/content/xdocs/ src/org/apache/pig/builtin/
test/org/apache/pig/test/
Author: cheolsoo
Date: Wed Jul 24 16:01:17 2013
New Revision: 1506605
URL: http://svn.apache.org/r1506605
Log:
PIG-3182: Pig currently lacks functions to trim the whitespace only on one hand side (sarutak via cheolsoo)
Added:
pig/trunk/src/org/apache/pig/builtin/LTRIM.java
pig/trunk/src/org/apache/pig/builtin/RTRIM.java
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml
pig/trunk/test/org/apache/pig/test/TestBuiltin.java
pig/trunk/test/org/apache/pig/test/TestStringUDFs.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1506605&r1=1506604&r2=1506605&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Wed Jul 24 16:01:17 2013
@@ -28,6 +28,8 @@ PIG-3174: Remove rpm and deb artifacts f
IMPROVEMENTS
+PIG-3182: Pig currently lacks functions to trim the whitespace only on one hand side (sarutak via cheolsoo)
+
PIG-3163: Pig current releases lack a UDF endsWith. This UDF tests if a given string ends with the specified suffix (sriramkrishnan via cheolsoo)
PIG-3015: Rewrite of AvroStorage (jadler via cheolsoo)
Modified: pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/func.xml?rev=1506605&r1=1506604&r2=1506605&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/func.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/func.xml Wed Jul 24 16:01:17 2013
@@ -3332,6 +3332,43 @@ Use the LOWER function to convert all ch
</section>
</section>
+<!-- ======================================================== -->
+ <section id="ltrim">
+ <title>LTRIM</title>
+ <p>Returns a copy of a string with only leading white space removed.</p>
+
+<section>
+ <title>Syntax</title>
+ <table>
+ <tr>
+ <td>
+ <p>LTRIM(expression)</p>
+ </td>
+ </tr>
+
+ </table></section>
+
+<section>
+ <title>Terms</title>
+ <table>
+ <tr>
+ <td>
+ <p>expression</p>
+ </td>
+ <td>
+ <p>An expression whose result is chararray. </p>
+ </td>
+ </tr>
+ </table>
+</section>
+
+<section>
+ <title>Usage</title>
+ <p>
+Use the LTRIM function to remove leading white space from a string.
+ </p>
+</section>
+</section>
<!-- ======================================================== -->
<section id="regex-extract">
@@ -3535,6 +3572,44 @@ by prefixing them with double backslashe
</section>
<!-- ======================================================== -->
+ <section id="rtrim">
+ <title>RTRIM</title>
+ <p>Returns a copy of a string with only trailing white space removed.</p>
+
+<section>
+ <title>Syntax</title>
+ <table>
+ <tr>
+ <td>
+ <p>RTRIM(expression)</p>
+ </td>
+ </tr>
+
+ </table></section>
+
+<section>
+ <title>Terms</title>
+ <table>
+ <tr>
+ <td>
+ <p>expression</p>
+ </td>
+ <td>
+ <p>An expression whose result is chararray. </p>
+ </td>
+ </tr>
+ </table>
+</section>
+
+<section>
+ <title>Usage</title>
+ <p>
+Use the RTRIM function to remove trailing white space from a string.
+ </p>
+</section>
+</section>
+
+<!-- ======================================================== -->
<section id="strsplit">
<title>STRSPLIT</title>
<p>Splits a string around matches of a given regular expression. </p>
Modified: pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml?rev=1506605&r1=1506604&r2=1506605&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/pig-index.xml Wed Jul 24 16:01:17 2013
@@ -556,6 +556,8 @@
<p><a href="func.html#lower">LOWER</a> function</p>
+<p><a href="func.html#ltrim">LTRIM</a> function</p>
+
<!-- ==== M ================================================================== -->
<p></p>
<p id="m-index"><strong>M</strong> (<a href="#top">top</a>) ----------------------------------------------</p>
@@ -836,6 +838,8 @@
<p><a href="func.html#round">ROUND</a> function</p>
+<p><a href="func.html#rtrim">RTRIM</a> function</p>
+
<p><a href="cmds.html#run">run</a> command</p>
<p><a href="start.html#run">running Pig</a>
Added: pig/trunk/src/org/apache/pig/builtin/LTRIM.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/LTRIM.java?rev=1506605&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/LTRIM.java (added)
+++ pig/trunk/src/org/apache/pig/builtin/LTRIM.java Wed Jul 24 16:01:17 2013
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.PigWarning;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.FuncSpec;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+
+/**
+ * Returns a string, with only leading whitespace omitted.
+ * Implements a binding to the Java function {@link java.lang.String#trim() String.trim()}.
+ */
+public class LTRIM extends EvalFunc<String> {
+ @Override
+ public String exec(Tuple input) throws IOException {
+ if (input == null || input.size() == 0) {
+ return null;
+ }
+ try {
+ String str = (String) input.get(0);
+ if (str == null) return null;
+ if (str.length() == 0) return str;
+ return str.replaceFirst("^ +", "");
+ } catch (ExecException e) {
+ warn("Error reading input: " + e.getMessage(), PigWarning.UDF_WARNING_1);
+ return null;
+ }
+ }
+
+ @Override
+ public Schema outputSchema(Schema input) {
+ return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.pig.EvalFunc#getArgToFuncMapping()
+ */
+ @Override
+ public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
+ List<FuncSpec> funcList = new ArrayList<FuncSpec>();
+ funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY))));
+
+ return funcList;
+ }
+
+}
Added: pig/trunk/src/org/apache/pig/builtin/RTRIM.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/RTRIM.java?rev=1506605&view=auto
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/RTRIM.java (added)
+++ pig/trunk/src/org/apache/pig/builtin/RTRIM.java Wed Jul 24 16:01:17 2013
@@ -0,0 +1,70 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.pig.builtin;
+
+import java.io.IOException;
+import java.util.List;
+import java.util.ArrayList;
+
+import org.apache.pig.EvalFunc;
+import org.apache.pig.PigWarning;
+import org.apache.pig.backend.executionengine.ExecException;
+import org.apache.pig.data.DataType;
+import org.apache.pig.data.Tuple;
+import org.apache.pig.impl.logicalLayer.schema.Schema;
+import org.apache.pig.FuncSpec;
+import org.apache.pig.impl.logicalLayer.FrontendException;
+
+/**
+ * Returns a string, with only tailing whitespace omitted.
+ * Implements a binding to the Java function {@link java.lang.String#trim() String.trim()}.
+ */
+public class RTRIM extends EvalFunc<String> {
+ @Override
+ public String exec(Tuple input) throws IOException {
+ if (input == null || input.size() == 0) {
+ return null;
+ }
+ try {
+ String str = (String) input.get(0);
+ if (str == null) return null;
+ if (str.length() == 0) return str;
+ return str.replaceFirst(" +$", "");
+ } catch (ExecException e) {
+ warn("Error reading input: " + e.getMessage(), PigWarning.UDF_WARNING_1);
+ return null;
+ }
+ }
+
+ @Override
+ public Schema outputSchema(Schema input) {
+ return new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY));
+ }
+
+ /* (non-Javadoc)
+ * @see org.apache.pig.EvalFunc#getArgToFuncMapping()
+ */
+ @Override
+ public List<FuncSpec> getArgToFuncMapping() throws FrontendException {
+ List<FuncSpec> funcList = new ArrayList<FuncSpec>();
+ funcList.add(new FuncSpec(this.getClass().getName(), new Schema(new Schema.FieldSchema(null, DataType.CHARARRAY))));
+
+ return funcList;
+ }
+
+}
Modified: pig/trunk/test/org/apache/pig/test/TestBuiltin.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestBuiltin.java?rev=1506605&r1=1506604&r2=1506605&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestBuiltin.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestBuiltin.java Wed Jul 24 16:01:17 2013
@@ -72,6 +72,7 @@ import org.apache.pig.builtin.KEYSET;
import org.apache.pig.builtin.LAST_INDEX_OF;
import org.apache.pig.builtin.LCFIRST;
import org.apache.pig.builtin.LOWER;
+import org.apache.pig.builtin.LTRIM;
import org.apache.pig.builtin.MapSize;
import org.apache.pig.builtin.MilliSecondsBetween;
import org.apache.pig.builtin.MinutesBetween;
@@ -80,6 +81,7 @@ import org.apache.pig.builtin.PigStorage
import org.apache.pig.builtin.REGEX_EXTRACT;
import org.apache.pig.builtin.REGEX_EXTRACT_ALL;
import org.apache.pig.builtin.REPLACE;
+import org.apache.pig.builtin.RTRIM;
import org.apache.pig.builtin.SIZE;
import org.apache.pig.builtin.STRSPLIT;
import org.apache.pig.builtin.SUBSTRING;
@@ -1546,7 +1548,9 @@ public class TestBuiltin {
String inputStrUpper = "HELLO WORLD!";
String inputStrCamel = "hello World!";
String inputStroWitha = "Hella Warld!";
- String inputStrExtra = "Hello World! ";
+ String inputStrSpaceRight = "Hello World! ";
+ String inputStrSpaceLeft = " Hello World!";
+ String inputStrSpaceBoth = " Hello World! ";
List<Object> l = new LinkedList<Object>();
l.add(inputStr);
@@ -1614,7 +1618,19 @@ public class TestBuiltin {
assertTrue(output.equals(expected));
strFunc = new TRIM();
- input = TupleFactory.getInstance().newTuple(inputStrExtra);
+ input = TupleFactory.getInstance().newTuple(inputStrSpaceBoth);
+ expected = inputStr;
+ output = strFunc.exec(input);
+ assertTrue(output.equals(expected));
+
+ strFunc = new LTRIM();
+ input = TupleFactory.getInstance().newTuple(inputStrSpaceLeft);
+ expected = inputStr;
+ output = strFunc.exec(input);
+ assertTrue(output.equals(expected));
+
+ strFunc = new RTRIM();
+ input = TupleFactory.getInstance().newTuple(inputStrSpaceRight);
expected = inputStr;
output = strFunc.exec(input);
assertTrue(output.equals(expected));
Modified: pig/trunk/test/org/apache/pig/test/TestStringUDFs.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestStringUDFs.java?rev=1506605&r1=1506604&r2=1506605&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestStringUDFs.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestStringUDFs.java Wed Jul 24 16:01:17 2013
@@ -34,6 +34,8 @@ import org.apache.pig.builtin.ENDSWITH;
import org.apache.pig.builtin.STRSPLIT;
import org.apache.pig.builtin.SUBSTRING;
import org.apache.pig.builtin.TRIM;
+import org.apache.pig.builtin.LTRIM;
+import org.apache.pig.builtin.RTRIM;
import org.apache.pig.builtin.EqualsIgnoreCase;
import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
@@ -135,13 +137,57 @@ public class TestStringUDFs {
Tuple testTuple = Util.buildTuple("nospaces");
assertEquals("nospaces".trim(), trim.exec(testTuple));
- testTuple = Util.buildTuple("spaces ");
- assertEquals("spaces ".trim(), trim.exec(testTuple));
+ testTuple = Util.buildTuple("spaces right ");
+ assertEquals("spaces right", trim.exec(testTuple));
+
+ testTuple = Util.buildTuple(" spaces left");
+ assertEquals("spaces left", trim.exec(testTuple));
+
+ testTuple = Util.buildTuple(" spaces both ");
+ assertEquals("spaces both", trim.exec(testTuple));
+
+ testTuple = TupleFactory.getInstance().newTuple();
+ assertNull(trim.exec(testTuple));
+ }
+
+ @Test
+ public void testLtrim() throws IOException {
+ LTRIM trim = new LTRIM();
+ Tuple testTuple = Util.buildTuple("nospaces");
+ assertEquals("nospaces", trim.exec(testTuple));
+
+ testTuple = Util.buildTuple("spaces right ");
+ assertEquals("spaces right ", trim.exec(testTuple));
+
+ testTuple = Util.buildTuple(" spaces left");
+ assertEquals("spaces left", trim.exec(testTuple));
+
+ testTuple = Util.buildTuple(" spaces both ");
+ assertEquals("spaces both ", trim.exec(testTuple));
testTuple = TupleFactory.getInstance().newTuple();
assertNull(trim.exec(testTuple));
}
+ @Test
+ public void testRtrim() throws IOException {
+ RTRIM trim = new RTRIM();
+ Tuple testTuple = Util.buildTuple("nospaces");
+ assertEquals("nospaces", trim.exec(testTuple));
+
+ testTuple = Util.buildTuple("spaces right ");
+ assertEquals("spaces right", trim.exec(testTuple));
+
+ testTuple = Util.buildTuple(" spaces left");
+ assertEquals(" spaces left", trim.exec(testTuple));
+
+ testTuple = Util.buildTuple(" spaces both ");
+ assertEquals(" spaces both", trim.exec(testTuple));
+
+ testTuple = TupleFactory.getInstance().newTuple();
+ assertNull(trim.exec(testTuple));
+ }
+
@Test
public void testSplit() throws IOException {
STRSPLIT splitter = new STRSPLIT();
@@ -204,7 +250,7 @@ public class TestStringUDFs {
@Test
public void testEqualsIgnoreCase() throws IOException {
- EqualsIgnoreCase equalsIgnoreCase = new EqualsIgnoreCase ();
+ EqualsIgnoreCase equalsIgnoreCase = new EqualsIgnoreCase ();
Tuple testTuple = Util.buildTuple("ABC","abc");
assertEquals("Strings are NOT equalsIgnoreCase", "ABC".equalsIgnoreCase("abc"), equalsIgnoreCase.exec(testTuple));
testTuple = Util.buildTuple("ABC", "aBC");