You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@oozie.apache.org by ro...@apache.org on 2014/05/01 19:16:12 UTC
git commit: OOZIE-1709 CoordELFunctions.getCurrentInstance() is
expensive (shwethags via rohini)
Repository: oozie
Updated Branches:
refs/heads/master de7e72413 -> ef513d285
OOZIE-1709 CoordELFunctions.getCurrentInstance() is expensive (shwethags via rohini)
Project: http://git-wip-us.apache.org/repos/asf/oozie/repo
Commit: http://git-wip-us.apache.org/repos/asf/oozie/commit/ef513d28
Tree: http://git-wip-us.apache.org/repos/asf/oozie/tree/ef513d28
Diff: http://git-wip-us.apache.org/repos/asf/oozie/diff/ef513d28
Branch: refs/heads/master
Commit: ef513d2853ce943ce3fea6aa6a3f6c9bdec25f63
Parents: de7e724
Author: Rohini Palaniswamy <ro...@yahoo-inc.com>
Authored: Thu May 1 10:16:07 2014 -0700
Committer: Rohini Palaniswamy <ro...@yahoo-inc.com>
Committed: Thu May 1 10:16:07 2014 -0700
----------------------------------------------------------------------
.../apache/oozie/coord/CoordELFunctions.java | 119 ++++++++++++++-----
.../oozie/coord/TestCoordELFunctions.java | 15 ++-
release-log.txt | 1 +
3 files changed, 102 insertions(+), 33 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/oozie/blob/ef513d28/core/src/main/java/org/apache/oozie/coord/CoordELFunctions.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/oozie/coord/CoordELFunctions.java b/core/src/main/java/org/apache/oozie/coord/CoordELFunctions.java
index d73bc7d..db3259b 100644
--- a/core/src/main/java/org/apache/oozie/coord/CoordELFunctions.java
+++ b/core/src/main/java/org/apache/oozie/coord/CoordELFunctions.java
@@ -17,25 +17,28 @@
*/
package org.apache.oozie.coord;
-import java.net.URI;
-import java.util.ArrayList;
-import java.util.Calendar;
-import java.util.Date;
-import java.util.List;
-import java.util.TimeZone;
-
+import com.google.common.collect.Lists;
+import org.apache.commons.lang.StringUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.oozie.ErrorCode;
import org.apache.oozie.client.OozieClient;
import org.apache.oozie.command.CommandException;
-import org.apache.oozie.dependency.URIHandler.Context;
import org.apache.oozie.dependency.URIHandler;
+import org.apache.oozie.dependency.URIHandler.Context;
+import org.apache.oozie.service.Services;
+import org.apache.oozie.service.URIHandlerService;
import org.apache.oozie.util.DateUtils;
import org.apache.oozie.util.ELEvaluator;
import org.apache.oozie.util.ParamChecker;
import org.apache.oozie.util.XLog;
-import org.apache.oozie.service.Services;
-import org.apache.oozie.service.URIHandlerService;
+
+import java.net.URI;
+import java.util.ArrayList;
+import java.util.Calendar;
+import java.util.Date;
+import java.util.GregorianCalendar;
+import java.util.List;
+import java.util.TimeZone;
/**
* This class implements the EL function related to coordinator
@@ -52,6 +55,12 @@ public class CoordELFunctions {
// TODO: in next release, support flexibility
private static String END_OF_OPERATION_INDICATOR_FILE = "_SUCCESS";
+ public static final long MINUTE_MSEC = 60 * 1000L;
+ public static final long HOUR_MSEC = 60 * MINUTE_MSEC;
+ public static final long DAY_MSEC = 24 * HOUR_MSEC;
+ public static final long MONTH_MSEC = 30 * DAY_MSEC;
+ public static final long YEAR_MSEC = 365 * DAY_MSEC;
+
/**
* Used in defining the frequency in 'day' unit. <p/> domain: <code> val > 0</code> and should be integer.
*
@@ -922,7 +931,6 @@ public class CoordELFunctions {
TimeUnit dsTimeUnit = getDSTimeUnit();
int[] instCount = new int[1];// used as pass by ref
Calendar nominalInstanceCal = getCurrentInstance(getActionCreationtime(), instCount);
- StringBuilder instanceList = new StringBuilder();
if (nominalInstanceCal == null) {
LOG.warn("If the initial instance of the dataset is later than the nominal time, an empty string is"
+ " returned. This means that no data is available at the current-instance specified by the user"
@@ -930,33 +938,25 @@ public class CoordELFunctions {
return "";
} else {
Calendar initInstance = getInitialInstanceCal();
- instCount[0] = instCount[0] + end;
// Add in the reverse order - newest instance first.
- for (int i = end; i >= start; i--) {
- // Tried to avoid the clone. But subtracting datasetFrequency gives different results than multiplying
- // and Spring DST transition test in TestCoordELfunctions.testCurrent() fails
- //nominalInstanceCal.add(dsTimeUnit.getCalendarUnit(), -datasetFrequency);
- nominalInstanceCal = (Calendar) initInstance.clone();
- nominalInstanceCal.add(dsTimeUnit.getCalendarUnit(), instCount[0] * datasetFrequency);
- instCount[0]--;
+ nominalInstanceCal = (Calendar) initInstance.clone();
+ nominalInstanceCal.add(dsTimeUnit.getCalendarUnit(), (instCount[0] + start) * datasetFrequency);
+ List<String> instances = new ArrayList<String>();
+ for (int i = start; i <= end; i++) {
if (nominalInstanceCal.compareTo(initInstance) < 0) {
LOG.warn("If the initial instance of the dataset is later than the current-instance specified,"
+ " such as coord:current({0}) in this case, an empty string is returned. This means that"
+ " no data is available at the current-instance specified by the user and the user could"
+ " try modifying his initial-instance to an earlier time.", start);
- break;
}
else {
- instanceList.append(DateUtils.formatDateOozieTZ(nominalInstanceCal));
- instanceList.append(CoordELFunctions.INSTANCE_SEPARATOR);
+ instances.add(DateUtils.formatDateOozieTZ(nominalInstanceCal));
}
+ nominalInstanceCal.add(dsTimeUnit.getCalendarUnit(), datasetFrequency);
}
+ instances = Lists.reverse(instances);
+ return StringUtils.join(instances, CoordELFunctions.INSTANCE_SEPARATOR);
}
-
- if (instanceList.length() > 0) {
- instanceList.setLength(instanceList.length() - CoordELFunctions.INSTANCE_SEPARATOR.length());
- }
- return instanceList.toString();
}
/**
@@ -1223,9 +1223,8 @@ public class CoordELFunctions {
if (ds == null) {
throw new RuntimeException("Associated Dataset should be defined with key " + DATASET);
}
- Calendar effInitTS = Calendar.getInstance();
+ Calendar effInitTS = new GregorianCalendar(ds.getTimeZone());
effInitTS.setTime(ds.getInitInstance());
- effInitTS.setTimeZone(ds.getTimeZone());
// To adjust EOD/EOM
DateUtils.moveToEnd(effInitTS, getDSEndOfFlag(eval));
return effInitTS;
@@ -1298,6 +1297,68 @@ public class CoordELFunctions {
TimeZone dsTZ = getDatasetTZ(eval);
int dsFreq = getDSFrequency(eval);
// Convert Date to Calendar for corresponding TZ
+ Calendar current = Calendar.getInstance(dsTZ);
+ current.setTime(datasetInitialInstance);
+
+ Calendar calEffectiveTime = new GregorianCalendar(dsTZ);
+ calEffectiveTime.setTime(effectiveTime);
+ if (instanceCount == null) { // caller doesn't care about this value
+ instanceCount = new int[1];
+ }
+ instanceCount[0] = 0;
+ if (current.compareTo(calEffectiveTime) > 0) {
+ return null;
+ }
+
+ switch(dsTimeUnit) {
+ case MINUTE:
+ instanceCount[0] = (int) ((effectiveTime.getTime() - datasetInitialInstance.getTime()) / MINUTE_MSEC);
+ break;
+ case HOUR:
+ instanceCount[0] = (int) ((effectiveTime.getTime() - datasetInitialInstance.getTime()) / HOUR_MSEC);
+ break;
+ case DAY:
+ case END_OF_DAY:
+ instanceCount[0] = (int) ((effectiveTime.getTime() - datasetInitialInstance.getTime()) / DAY_MSEC);
+ break;
+ case MONTH:
+ case END_OF_MONTH:
+ instanceCount[0] = (int) ((effectiveTime.getTime() - datasetInitialInstance.getTime()) / MONTH_MSEC);
+ break;
+ case YEAR:
+ instanceCount[0] = (int) ((effectiveTime.getTime() - datasetInitialInstance.getTime()) / YEAR_MSEC);
+ break;
+ default:
+ throw new IllegalArgumentException("Unhandled dataset time unit " + dsTimeUnit);
+ }
+
+ if (instanceCount[0] > 2) {
+ instanceCount[0] = (instanceCount[0] / dsFreq);
+ current.add(dsTimeUnit.getCalendarUnit(), instanceCount[0] * dsFreq);
+ } else {
+ instanceCount[0] = 0;
+ }
+ while (!current.getTime().after(effectiveTime)) {
+ current.add(dsTimeUnit.getCalendarUnit(), dsFreq);
+ instanceCount[0]++;
+ }
+ current.add(dsTimeUnit.getCalendarUnit(), -dsFreq);
+ instanceCount[0]--;
+ return current;
+ }
+
+ /**
+ * Find the current instance based on effectiveTime (i.e Action_Creation_Time or Action_Start_Time)
+ *
+ * @return current instance i.e. current(0) returns null if effectiveTime is earlier than Initial Instance time of
+ * the dataset.
+ */
+ private static Calendar getCurrentInstance_old(Date effectiveTime, int instanceCount[], ELEvaluator eval) {
+ Date datasetInitialInstance = getInitialInstance(eval);
+ TimeUnit dsTimeUnit = getDSTimeUnit(eval);
+ TimeZone dsTZ = getDatasetTZ(eval);
+ int dsFreq = getDSFrequency(eval);
+ // Convert Date to Calendar for corresponding TZ
Calendar current = Calendar.getInstance();
current.setTime(datasetInitialInstance);
current.setTimeZone(dsTZ);
http://git-wip-us.apache.org/repos/asf/oozie/blob/ef513d28/core/src/test/java/org/apache/oozie/coord/TestCoordELFunctions.java
----------------------------------------------------------------------
diff --git a/core/src/test/java/org/apache/oozie/coord/TestCoordELFunctions.java b/core/src/test/java/org/apache/oozie/coord/TestCoordELFunctions.java
index be35ce4..13315b9 100644
--- a/core/src/test/java/org/apache/oozie/coord/TestCoordELFunctions.java
+++ b/core/src/test/java/org/apache/oozie/coord/TestCoordELFunctions.java
@@ -184,7 +184,6 @@ public class TestCoordELFunctions extends XTestCase {
SyncCoordAction appInst = new SyncCoordAction();
SyncCoordDataset ds = new SyncCoordDataset();
- ;
ds.setFrequency(1);
ds.setTimeUnit(TimeUnit.DAY);
ds.setInitInstance(DateUtils.parseDateOozieTZ("2009-01-02T00:00Z"));
@@ -260,7 +259,6 @@ public class TestCoordELFunctions extends XTestCase {
SyncCoordAction appInst = new SyncCoordAction();
SyncCoordDataset ds = new SyncCoordDataset();
- ;
ds.setFrequency(1);
ds.setTimeUnit(TimeUnit.MONTH);
ds.setInitInstance(DateUtils.parseDateOozieTZ("2009-01-02T00:00Z"));
@@ -371,6 +369,16 @@ public class TestCoordELFunctions extends XTestCase {
assertEquals("2010-09-08T23:59Z", CoordELFunctions.evalAndWrap(eval, expr));
}
+ public void testCurrentRange() throws Exception {
+ init("coord-action-create");
+ String expr = "${coord:currentRange(-1, 0)}";
+ assertEquals("2009-09-09T23:59Z#2009-09-08T23:59Z", CoordELFunctions.evalAndWrap(eval, expr));
+
+ //test out of range instances, EL should return partial instances
+ appInst.setNominalTime(DateUtils.parseDateOozieTZ("2009-09-01T23:59Z"));
+ assertEquals("2009-09-01T23:59Z", CoordELFunctions.evalAndWrap(eval, expr));
+ }
+
public void testCurrent() throws Exception {
init("coord-action-create");
String expr = "${coord:current(-1)}";
@@ -395,7 +403,6 @@ public class TestCoordELFunctions extends XTestCase {
SyncCoordAction appInst = new SyncCoordAction();
SyncCoordDataset ds = new SyncCoordDataset();
- ;
ds.setFrequency(1);
ds.setTimeUnit(TimeUnit.DAY);
ds.setInitInstance(DateUtils.parseDateOozieTZ("2009-01-02T00:00Z"));
@@ -1026,7 +1033,7 @@ public class TestCoordELFunctions extends XTestCase {
* public void testDetach() throws Exception { Services.get().destroy(); }
*/
- private void init(String tag) throws Exception {
+ void init(String tag) throws Exception {
init(tag, "hdfs://localhost:9000/user/" + getTestUser() + "/US/${YEAR}/${MONTH}/${DAY}");
}
http://git-wip-us.apache.org/repos/asf/oozie/blob/ef513d28/release-log.txt
----------------------------------------------------------------------
diff --git a/release-log.txt b/release-log.txt
index eb9e40c..030fe98 100644
--- a/release-log.txt
+++ b/release-log.txt
@@ -1,5 +1,6 @@
-- Oozie 4.1.0 release (trunk - unreleased)
+OOZIE-1709 CoordELFunctions.getCurrentInstance() is expensive (shwethags via rohini)
OOZIE-1787 parameterize interval of SLAService updating SlaStatus (ryota)
OOZIE-1777 duplicated log message in Pig launcher's stdout (ryota)
OOZIE-1748 When using cron-like syntax, the "Time Unit" field says "MINUTE"