You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ro...@apache.org on 2013/06/13 15:03:21 UTC
svn commit: r1492658 - in /pig/trunk: ./
src/docs/src/documentation/content/xdocs/ src/org/apache/pig/
src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/
src/org/apache/pig/builtin/ src/org/apache/pig/data/ test/org/ap...
Author: rohini
Date: Thu Jun 13 13:03:20 2013
New Revision: 1492658
URL: http://svn.apache.org/r1492658
Log:
PIG-3341: Strict datetime parsing and improve performance of loading datetime values (rohini)
Modified:
pig/trunk/CHANGES.txt
pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
pig/trunk/src/org/apache/pig/Main.java
pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
pig/trunk/src/org/apache/pig/builtin/ToDate.java
pig/trunk/src/org/apache/pig/builtin/ToDateISO.java
pig/trunk/src/org/apache/pig/builtin/ToMilliSeconds.java
pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java
pig/trunk/src/org/apache/pig/data/DataType.java
pig/trunk/test/org/apache/pig/test/TestDefaultDateTimeZone.java
Modified: pig/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/pig/trunk/CHANGES.txt?rev=1492658&r1=1492657&r2=1492658&view=diff
==============================================================================
--- pig/trunk/CHANGES.txt (original)
+++ pig/trunk/CHANGES.txt Thu Jun 13 13:03:20 2013
@@ -385,6 +385,8 @@ PIG-2769: a simple logic causes very lon
BUG FIXES
+PIG-3341: Strict datetime parsing and improve performance of loading datetime values (rohini)
+
PIG-3329: RANK operator failed when working with SPLIT (xalan via cheolsoo)
PIG-3345: Handle null in DateTime functions (rohini)
Modified: pig/trunk/src/docs/src/documentation/content/xdocs/func.xml
URL: http://svn.apache.org/viewvc/pig/trunk/src/docs/src/documentation/content/xdocs/func.xml?rev=1492658&r1=1492657&r2=1492658&view=diff
==============================================================================
--- pig/trunk/src/docs/src/documentation/content/xdocs/func.xml (original)
+++ pig/trunk/src/docs/src/documentation/content/xdocs/func.xml Thu Jun 13 13:03:20 2013
@@ -4138,7 +4138,7 @@ Use the SecondsBetween function to get t
</section>
<!-- ======================================================== -->
- <section id="subtract-duratioin">
+ <section id="subtract-duration">
<title>SubtractDuration</title>
<p>Returns the result of a DateTime object minus a <a href="http://en.wikipedia.org/wiki/ISO_8601#Durations">Duration object</a>.</p>
@@ -4257,6 +4257,89 @@ Use the ToDate function to generate a Da
</section>
</section>
+<!-- ======================================================== -->
+<section id="to-milli-seconds">
+ <title>ToMilliSeconds</title>
+ <p>
+ Returns the number of milliseconds elapsed since January 1, 1970, 00:00:00.000 GMT
+ for a DateTime object.
+ </p>
+
+ <section>
+ <title>Syntax</title>
+ <table>
+ <tr>
+ <td>
+ <p>ToMilliSeconds(datetime)</p>
+ </td>
+ </tr>
+ </table>
+ </section>
+
+ <section>
+ <title>Terms</title>
+ <table>
+ <tr>
+ <td>
+ <p>datetime</p>
+ </td>
+ <td>
+ <p>A datetime object.</p>
+ </td>
+ </tr>
+ </table>
+ </section>
+
+ <section>
+ <title>Usage</title>
+ <p>
+ Use the ToMilliSeconds function to convert the DateTime to the number of
+ milliseconds that have passed since January 1, 1970 00:00:00.000 GMT.
+ </p>
+ </section>
+</section>
+
+<!-- ======================================================== -->
+<section id="to-unix-time">
+ <title>ToUnixTime</title>
+ <p>
+ Returns the Unix Time as long for a DateTime object. UnixTime is the
+ number of seconds elapsed since January 1, 1970, 00:00:00.000 GMT.
+ </p>
+
+ <section>
+ <title>Syntax</title>
+ <table>
+ <tr>
+ <td>
+ <p>ToUnixTime(datetime)</p>
+ </td>
+ </tr>
+ </table>
+ </section>
+
+ <section>
+ <title>Terms</title>
+ <table>
+ <tr>
+ <td>
+ <p>datetime</p>
+ </td>
+ <td>
+ <p>A datetime object.</p>
+ </td>
+ </tr>
+ </table>
+ </section>
+
+ <section>
+ <title>Usage</title>
+ <p>
+ Use the ToUnixTime function to convert the DateTime to Unix Time.
+ </p>
+ </section>
+</section>
+
<!-- ======================================================== -->
<section id="weeks-between">
<title>WeeksBetween</title>
Modified: pig/trunk/src/org/apache/pig/Main.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/Main.java?rev=1492658&r1=1492657&r2=1492658&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/Main.java (original)
+++ pig/trunk/src/org/apache/pig/Main.java Thu Jun 13 13:03:20 2013
@@ -914,6 +914,8 @@ public static void printProperties(){
System.out.println(" pig.additional.jars=<colon seperated list of jars>. Used in place of register command.");
System.out.println(" udf.import.list=<comma seperated list of imports>. Used to avoid package names in UDF.");
System.out.println(" stop.on.failure=true|false; default is false. Set to true to terminate on the first error.");
+ System.out.println(" pig.datetime.default.tz=<UTC time offset>. e.g. +08:00. Default is the default timezone of the host.");
+ System.out.println(" Determines the timezone used to handle datetime datatype and UDFs. ");
System.out.println("Additionally, any Hadoop property can be specified.");
}
Modified: pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java?rev=1492658&r1=1492657&r2=1492658&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java (original)
+++ pig/trunk/src/org/apache/pig/backend/hadoop/executionengine/physicalLayer/expressionOperators/POCast.java Thu Jun 13 13:03:20 2013
@@ -53,7 +53,6 @@ import org.apache.pig.impl.plan.VisitorE
import org.apache.pig.impl.util.CastUtils;
import org.apache.pig.impl.util.LogUtils;
import org.joda.time.DateTime;
-import org.joda.time.DateTimeZone;
/**
* This is just a cast that converts DataByteArray into either String or
@@ -1071,15 +1070,9 @@ public class POCast extends ExpressionOp
return in.getNextDateTime();
case DataType.CHARARRAY: {
- String str = null;
Result res = in.getNextString();
if (res.returnStatus == POStatus.STATUS_OK && res.result != null) {
- DateTimeZone dtz = ToDate.extractDateTimeZone((String) res.result);
- if (dtz == null) {
- res.result = new DateTime((String) res.result);
- } else {
- res.result = new DateTime((String) res.result, dtz);
- }
+ res.result = ToDate.extractDateTime((String) res.result);
}
return res;
}
@@ -1658,12 +1651,7 @@ public class POCast extends ExpressionOp
result = (DateTime)obj;
break;
case DataType.CHARARRAY:
- DateTimeZone dtz = ToDate.extractDateTimeZone((String) obj);
- if (dtz == null) {
- result = new DateTime((String) obj);
- } else {
- result = new DateTime((String) obj, dtz);
- }
+ result = ToDate.extractDateTime((String) obj);
break;
case DataType.BIGINTEGER:
result = new DateTime(((BigInteger)obj).longValue());
Modified: pig/trunk/src/org/apache/pig/builtin/ToDate.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/ToDate.java?rev=1492658&r1=1492657&r2=1492658&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/ToDate.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/ToDate.java Thu Jun 13 13:03:20 2013
@@ -21,8 +21,6 @@ package org.apache.pig.builtin;
import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
import org.apache.pig.EvalFunc;
import org.apache.pig.FuncSpec;
@@ -32,12 +30,14 @@ import org.apache.pig.impl.logicalLayer.
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.joda.time.DateTime;
import org.joda.time.DateTimeZone;
+import org.joda.time.format.DateTimeFormatter;
+import org.joda.time.format.ISODateTimeFormat;
/**
- *
+ *
* <p>ToDate converts the ISO or the customized string or the Unix timestamp to the DateTime object.</p>
* <p>ToDate is overloaded.</p>
- *
+ *
* <dl>
* <dt><b>Syntax:</b></dt>
* <dd><code>DateTime ToDate(Long millis)</code>.</dd>
@@ -55,7 +55,7 @@ import org.joda.time.DateTimeZone;
* <dt><b>Output:</b></dt>
* <dd><code>the DateTime object</code>.</dd>
* </dl>
- *
+ *
* <dl>
* <dt><b>Syntax:</b></dt>
* <dd><code>DateTime ToDate(String dtStr, String format)</code>.</dd>
@@ -78,8 +78,9 @@ import org.joda.time.DateTimeZone;
* </dl>
*/
public class ToDate extends EvalFunc<DateTime> {
-
- private static final Pattern TIMEZONE_PATTERN = Pattern.compile("(Z|(?<=(T[0-9\\.:]{0,12}))((\\+|-)\\d{2}(:?\\d{2})?))$");
+
+ private static final DateTimeFormatter isoDateTimeFormatter = ISODateTimeFormat
+ .dateOptionalTimeParser().withOffsetParsed();
public DateTime exec(Tuple input) throws IOException {
if (input == null || input.size() < 1 || input.get(0) == null) {
@@ -114,18 +115,12 @@ public class ToDate extends EvalFunc<Dat
funcList.add(new FuncSpec(ToDate3ARGS.class.getName(), s));
return funcList;
}
-
+
public static DateTimeZone extractDateTimeZone(String dtStr) {
- Matcher matcher = TIMEZONE_PATTERN.matcher(dtStr);
- if (matcher.find()) {
- String dtzStr = matcher.group();
- if (dtzStr.equals("Z")) {
- return DateTimeZone.forOffsetMillis(DateTimeZone.UTC.getOffset(null));
- } else {
- return DateTimeZone.forOffsetMillis(DateTimeZone.forID(dtzStr).getOffset(null));
- }
- } else {
- return null;
- }
+ return isoDateTimeFormatter.parseDateTime(dtStr).getZone();
+ }
+
+ public static DateTime extractDateTime(String dtStr) {
+ return isoDateTimeFormatter.parseDateTime(dtStr);
}
}
Modified: pig/trunk/src/org/apache/pig/builtin/ToDateISO.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/ToDateISO.java?rev=1492658&r1=1492657&r2=1492658&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/ToDateISO.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/ToDateISO.java Thu Jun 13 13:03:20 2013
@@ -24,7 +24,6 @@ import org.apache.pig.EvalFunc;
import org.apache.pig.data.DataType;
import org.apache.pig.data.Tuple;
import org.joda.time.DateTime;
-import org.joda.time.DateTimeZone;
/**
* This method should never be used directly, use {@link ToDate}.
@@ -36,12 +35,7 @@ public class ToDateISO extends EvalFunc<
return null;
}
String dtStr = DataType.toString(input.get(0));
- DateTimeZone dtz = ToDate.extractDateTimeZone(dtStr);
- if (dtz == null) {
- return new DateTime(dtStr);
- } else {
- return new DateTime(dtStr, dtz);
- }
+ return ToDate.extractDateTime(dtStr);
}
}
Modified: pig/trunk/src/org/apache/pig/builtin/ToMilliSeconds.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/ToMilliSeconds.java?rev=1492658&r1=1492657&r2=1492658&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/ToMilliSeconds.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/ToMilliSeconds.java Thu Jun 13 13:03:20 2013
@@ -31,7 +31,7 @@ import org.joda.time.DateTime;
/**
* <p>
- * ToUnixTime converts the DateTime to the number of milliseconds that have passed
+ * ToMilliSeconds converts the DateTime to the number of milliseconds that have passed
* since January 1, 1970 00:00:00.000 GMT.
* </p>
* <ul>
Modified: pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java?rev=1492658&r1=1492657&r2=1492658&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java (original)
+++ pig/trunk/src/org/apache/pig/builtin/Utf8StorageConverter.java Thu Jun 13 13:03:20 2013
@@ -43,7 +43,6 @@ import org.apache.pig.data.Tuple;
import org.apache.pig.data.TupleFactory;
import org.apache.pig.impl.util.LogUtils;
import org.joda.time.DateTime;
-import org.joda.time.DateTimeZone;
/**
* This abstract class provides standard conversions between utf8 encoded data
@@ -523,12 +522,7 @@ public class Utf8StorageConverter implem
}
try {
String dtStr = new String(b);
- DateTimeZone dtz = ToDate.extractDateTimeZone(dtStr);
- if (dtz == null) {
- return new DateTime(dtStr);
- } else {
- return new DateTime(dtStr, dtz);
- }
+ return ToDate.extractDateTime(dtStr);
} catch (IllegalArgumentException e) {
LogUtils.warn(this, "Unable to interpret value " + Arrays.toString(b) + " in field being " +
"converted to datetime, caught IllegalArgumentException <" +
Modified: pig/trunk/src/org/apache/pig/data/DataType.java
URL: http://svn.apache.org/viewvc/pig/trunk/src/org/apache/pig/data/DataType.java?rev=1492658&r1=1492657&r2=1492658&view=diff
==============================================================================
--- pig/trunk/src/org/apache/pig/data/DataType.java (original)
+++ pig/trunk/src/org/apache/pig/data/DataType.java Thu Jun 13 13:03:20 2013
@@ -39,8 +39,6 @@ import org.apache.pig.impl.logicalLayer.
import org.apache.pig.impl.logicalLayer.schema.Schema;
import org.apache.pig.impl.logicalLayer.schema.SchemaMergeException;
import org.joda.time.DateTime;
-import org.joda.time.DateTimeZone;
-
/**
@@ -1075,12 +1073,7 @@ public class DataType {
return new DateTime(((DataByteArray) o).toString());
case CHARARRAY:
// the string can contain just date part or date part plus time part
- DateTimeZone dtz = ToDate.extractDateTimeZone((String) o);
- if (dtz == null) {
- return new DateTime((String) o);
- } else {
- return new DateTime((String) o, dtz);
- }
+ return ToDate.extractDateTime((String) o);
case INTEGER:
return new DateTime(((Integer) o).longValue());
case LONG:
Modified: pig/trunk/test/org/apache/pig/test/TestDefaultDateTimeZone.java
URL: http://svn.apache.org/viewvc/pig/trunk/test/org/apache/pig/test/TestDefaultDateTimeZone.java?rev=1492658&r1=1492657&r2=1492658&view=diff
==============================================================================
--- pig/trunk/test/org/apache/pig/test/TestDefaultDateTimeZone.java (original)
+++ pig/trunk/test/org/apache/pig/test/TestDefaultDateTimeZone.java Thu Jun 13 13:03:20 2013
@@ -79,9 +79,9 @@ public class TestDefaultDateTimeZone ext
Tuple actualTuple = actualItr.next();
assertEquals(expectedTuple, actualTuple);
}
- assertEquals(expectedItr.hasNext(), actualItr.hasNext());
+ assertEquals(expectedItr.hasNext(), actualItr.hasNext());
}
-
+
private static Iterator<Tuple> generateExpectedResults(DateTimeZone dtz)
throws Exception {
List<Tuple> expectedResults = new ArrayList<Tuple>();
@@ -96,9 +96,12 @@ public class TestDefaultDateTimeZone ext
return expectedResults.iterator();
}
- // PIG-3316
- public void testTimeZoneExtracting() throws IOException {
- String[] inputs = {
+ public void testTimeZone() throws IOException {
+ // Usually set through "pig.datetime.default.tz"
+ String defaultDTZ = "+03:00";
+ DateTimeZone.setDefault(DateTimeZone.forOffsetMillis(DateTimeZone.forID(defaultDTZ)
+ .getOffset(null)));
+ String[] inputs = {
"1970-01-01T00:00:00.000-08:00",
"1970-01-01T00:00",
"1970-01-01T00",
@@ -109,34 +112,50 @@ public class TestDefaultDateTimeZone ext
"1970-01-01T00-05:00",
"1970-01-01T-08:00",
"1970-01T-08:00",
- "1970T+8:00",
+ //"1970T+8:00", //Invalid format
"1970-01-01",
"1970-01",
"1970",
};
- String[] expectedOutputs = {
- "-08:00",
- "null",
- "null",
- "null",
- "null",
- "null",
- "-08:00",
- "-05:00",
- "-08:00",
- "-08:00",
- "null",
- "null",
- "null",
- "null"
+ String[] expectedDTZOutputs = {
+ "-08:00",
+ defaultDTZ,
+ defaultDTZ,
+ defaultDTZ,
+ defaultDTZ,
+ defaultDTZ,
+ "-08:00",
+ "-05:00",
+ "-08:00",
+ "-08:00",
+ defaultDTZ,
+ defaultDTZ,
+ defaultDTZ
+ };
+ String[] expectedDTOutputs = {
+ "1970-01-01T00:00:00.000-08:00",
+ "1970-01-01T00:00:00.000" + defaultDTZ,
+ "1970-01-01T00:00:00.000" + defaultDTZ,
+ "1970-01-01T00:00:00.000" + defaultDTZ,
+ "1970-01-01T00:00:00.000" + defaultDTZ,
+ "1970-01-01T00:00:00.000" + defaultDTZ,
+ "1970-01-01T00:00:00.000-08:00",
+ "1970-01-01T00:00:00.000-05:00",
+ "1970-01-01T00:00:00.000-08:00",
+ "1970-01-01T00:00:00.000-08:00",
+ "1970-01-01T00:00:00.000" + defaultDTZ,
+ "1970-01-01T00:00:00.000" + defaultDTZ,
+ "1970-01-01T00:00:00.000" + defaultDTZ
};
-
+
for( int i = 0; i < inputs.length; i++ ) {
DateTimeZone dtz = ToDate.extractDateTimeZone( inputs[i] );
- assertEquals( expectedOutputs[i], dtz == null ? "null" : dtz.toString() );
- System.out.println( "\"" + dtz + "\"," );
+ assertEquals( expectedDTZOutputs[i], dtz.toString() );
+ DateTime dt = ToDate.extractDateTime( inputs[i] );
+ assertEquals( expectedDTOutputs[i], dt.toString() );
+ System.out.println( "\"" + dt + "\"," );
}
-
+
}
}