You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lens.apache.org by am...@apache.org on 2014/11/13 12:49:36 UTC
[09/50] incubator-lens git commit: Added MLTask to run/schedule ML
training and evaluation workflow
Added MLTask to run/schedule ML training and evaluation workflow
Project: http://git-wip-us.apache.org/repos/asf/incubator-lens/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-lens/commit/88b900c3
Tree: http://git-wip-us.apache.org/repos/asf/incubator-lens/tree/88b900c3
Diff: http://git-wip-us.apache.org/repos/asf/incubator-lens/diff/88b900c3
Branch: refs/heads/toapache
Commit: 88b900c35c678f47d80017dc61952ce9daee29ef
Parents: 0acac8e
Author: jdhok <ja...@inmobi.com>
Authored: Fri Nov 7 12:48:06 2014 +0530
Committer: jdhok <ja...@inmobi.com>
Committed: Fri Nov 7 12:48:06 2014 +0530
----------------------------------------------------------------------
.../org/apache/lens/client/LensConnection.java | 10 +-
.../data/naive_bayes/naive_bayes_train.data | 6 +
lens-ml-lib/pom.xml | 6 +-
.../org/apache/lens/client/LensMLClient.java | 50 ++-
.../apache/lens/client/LensMLJerseyClient.java | 58 +++-
.../java/org/apache/lens/ml/ExampleUtils.java | 43 ++-
.../main/java/org/apache/lens/ml/LensML.java | 6 +-
.../java/org/apache/lens/ml/LensMLImpl.java | 43 ++-
.../main/java/org/apache/lens/ml/MLModel.java | 2 +
.../java/org/apache/lens/ml/MLTestReport.java | 2 +
.../main/java/org/apache/lens/ml/MLUtils.java | 27 ++
.../org/apache/lens/ml/TableTestingSpec.java | 76 ++++-
.../java/org/apache/lens/ml/dao/MLDBUtils.java | 105 +++++++
.../lens/ml/spark/ColumnFeatureFunction.java | 2 +-
.../apache/lens/ml/spark/DoubleValueMapper.java | 6 +-
.../org/apache/lens/ml/spark/HiveTableRDD.java | 9 +-
.../apache/lens/ml/spark/TableTrainingSpec.java | 49 ++-
.../ml/spark/trainers/BaseSparkTrainer.java | 9 +-
.../java/org/apache/lens/ml/task/MLTask.java | 306 +++++++++++++++++++
.../java/org/apache/lens/server/ml/MLApp.java | 25 +-
.../apache/lens/server/ml/MLServiceImpl.java | 9 +-
.../lens/server/ml/MLServiceResource.java | 28 +-
.../java/org/apache/lens/ml/TestMLResource.java | 103 ++++++-
lens-ml-lib/testutils/hadoop | 110 +++++++
lens-server/src/test/resources/lens-site.xml | 12 +
25 files changed, 975 insertions(+), 127 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-client/src/main/java/org/apache/lens/client/LensConnection.java
----------------------------------------------------------------------
diff --git a/lens-client/src/main/java/org/apache/lens/client/LensConnection.java b/lens-client/src/main/java/org/apache/lens/client/LensConnection.java
index 48b7221..bf63ee7 100644
--- a/lens-client/src/main/java/org/apache/lens/client/LensConnection.java
+++ b/lens-client/src/main/java/org/apache/lens/client/LensConnection.java
@@ -36,10 +36,14 @@ import javax.ws.rs.client.Entity;
import javax.ws.rs.client.WebTarget;
import javax.ws.rs.core.MediaType;
import javax.ws.rs.core.Response;
+
import java.net.ConnectException;
import java.util.List;
import java.util.concurrent.atomic.AtomicBoolean;
+import lombok.Getter;
+import lombok.Setter;
+
/**
* Top level client connection class which is used to connect to a lens server.
*/
@@ -55,6 +59,8 @@ public class LensConnection {
private AtomicBoolean open = new AtomicBoolean(false);
/** The session handle. */
+ @Getter
+ @Setter
private LensSessionHandle sessionHandle;
/**
@@ -166,10 +172,6 @@ public class LensConnection {
}
- public LensSessionHandle getSessionHandle() {
- return this.sessionHandle;
- }
-
/**
* Close.
*
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/data/naive_bayes/naive_bayes_train.data
----------------------------------------------------------------------
diff --git a/lens-ml-lib/data/naive_bayes/naive_bayes_train.data b/lens-ml-lib/data/naive_bayes/naive_bayes_train.data
new file mode 100644
index 0000000..dfb9ccd
--- /dev/null
+++ b/lens-ml-lib/data/naive_bayes/naive_bayes_train.data
@@ -0,0 +1,6 @@
+0 1 0 0
+0 2 0 0
+1 0 1 0
+1 0 2 0
+2 0 0 1
+2 0 0 2
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/pom.xml
----------------------------------------------------------------------
diff --git a/lens-ml-lib/pom.xml b/lens-ml-lib/pom.xml
index ebbadca..4a1697f 100644
--- a/lens-ml-lib/pom.xml
+++ b/lens-ml-lib/pom.xml
@@ -142,10 +142,12 @@
<version>2.15</version>
<configuration>
<systemPropertyVariables>
- <hadoop.bin.path>${pom.basedir}/../lens-driver-hive/testutils/hadoop</hadoop.bin.path>
+ <hadoop.bin.path>${pom.basedir}/testutils/hadoop</hadoop.bin.path>
</systemPropertyVariables>
<environmentVariables>
<MVN_CLASSPATH_FILE>${mvn.classpath.file}</MVN_CLASSPATH_FILE>
+ <!-- We have to include the test classes so that UDF class is available -->
+ <TEST_CLASSPATH>${pom.basedir}/target/classes</TEST_CLASSPATH>
</environmentVariables>
<!-- More memory is required to run Spark in unit tests -->
<argLine>-XX:MaxPermSize=1024m -Xms1024m -Xmx2000m</argLine>
@@ -158,7 +160,7 @@
<executions>
<execution>
<id>build-classpath</id>
- <phase>compile</phase>
+ <phase>test-compile</phase>
<goals>
<goal>build-classpath</goal>
</goals>
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/client/LensMLClient.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/client/LensMLClient.java b/lens-ml-lib/src/main/java/org/apache/lens/client/LensMLClient.java
index a3c8a84..f880ead 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/client/LensMLClient.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/client/LensMLClient.java
@@ -19,6 +19,8 @@
package org.apache.lens.client;
import org.apache.commons.lang.StringUtils;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.lens.api.LensException;
@@ -30,6 +32,7 @@ import org.apache.lens.ml.MLModel;
import org.apache.lens.ml.MLTestReport;
import org.apache.lens.ml.MLTrainer;
+import java.io.Closeable;
import java.io.IOException;
import java.io.ObjectInputStream;
import java.net.URI;
@@ -39,10 +42,13 @@ import java.util.LinkedHashMap;
import java.util.List;
import java.util.Map;
+import javax.ws.rs.core.Form;
+
/**
- * The Class LensMLClient.
+ * Client side implementation of LensML
*/
-public class LensMLClient implements LensML {
+public class LensMLClient implements LensML, Closeable {
+ private static final Log LOG = LogFactory.getLog(LensMLClient.class);
/** The client. */
private LensMLJerseyClient client;
@@ -53,8 +59,20 @@ public class LensMLClient implements LensML {
* @param clientConf
* the client conf
*/
- public LensMLClient(LensConnectionParams clientConf) {
- client = new LensMLJerseyClient(new LensConnection(clientConf));
+ public LensMLClient(LensConnectionParams clientConf, String password) {
+ client = new LensMLJerseyClient(new LensConnection(clientConf), password);
+ LOG.info("Client created with new session");
+ }
+
+ /**
+ * Instantiates a new lens ml client.
+ *
+ * @param clientConf
+ * the client conf
+ */
+ public LensMLClient(LensConnectionParams clientConf, LensSessionHandle sessionHandle) {
+ client = new LensMLJerseyClient(new LensConnection(clientConf), sessionHandle);
+ LOG.info("Client created with existing session");
}
/**
@@ -116,10 +134,10 @@ public class LensMLClient implements LensML {
*/
@Override
public String train(String table, String algorithm, String[] args) throws LensException {
- Map<String, String> trainParams = new LinkedHashMap<String, String>();
- trainParams.put("table", table);
+ Form trainParams = new Form();
+ trainParams.param("table", table);
for (int i = 0; i < args.length; i += 2) {
- trainParams.put(args[i], args[i + 1]);
+ trainParams.param(args[i], args[i + 1]);
}
return client.trainModel(algorithm, trainParams);
}
@@ -211,9 +229,9 @@ public class LensMLClient implements LensML {
* the lens exception
*/
@Override
- public MLTestReport testModel(LensSessionHandle session, String table, String algorithm, String modelID)
- throws LensException {
- String reportID = client.testModel(table, algorithm, modelID);
+ public MLTestReport testModel(LensSessionHandle session, String table, String algorithm, String modelID,
+ String outputTable) throws LensException {
+ String reportID = client.testModel(table, algorithm, modelID, outputTable);
return getTestReport(algorithm, reportID);
}
@@ -306,4 +324,16 @@ public class LensMLClient implements LensML {
public void deleteTestReport(String algorithm, String reportID) throws LensException {
client.deleteTestReport(algorithm, reportID);
}
+
+ /**
+ * Close connection
+ */
+ @Override
+ public void close() throws IOException {
+ client.close();
+ }
+
+ public LensSessionHandle getSessionHandle() {
+ return client.getSessionHandle();
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/client/LensMLJerseyClient.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/client/LensMLJerseyClient.java b/lens-ml-lib/src/main/java/org/apache/lens/client/LensMLJerseyClient.java
index a71e678..3e41f52 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/client/LensMLJerseyClient.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/client/LensMLJerseyClient.java
@@ -18,6 +18,7 @@
*/
package org.apache.lens.client;
+import org.apache.lens.api.LensSessionHandle;
import org.apache.lens.api.StringList;
import org.apache.lens.api.ml.ModelMetadata;
import org.apache.lens.api.ml.TestReport;
@@ -33,6 +34,7 @@ import javax.ws.rs.NotFoundException;
import javax.ws.rs.client.*;
import javax.ws.rs.core.Form;
import javax.ws.rs.core.MediaType;
+
import java.util.List;
import java.util.Map;
@@ -43,7 +45,6 @@ import java.util.Map;
* The Class LensMLJerseyClient.
*/
public class LensMLJerseyClient {
-
/** The Constant LENS_ML_RESOURCE_PATH. */
public static final String LENS_ML_RESOURCE_PATH = "lens.ml.resource.path";
@@ -56,14 +57,30 @@ public class LensMLJerseyClient {
/** The connection. */
private final LensConnection connection;
+ private final LensSessionHandle sessionHandle;
+
+ /**
+ * Instantiates a new lens ml jersey client.
+ *
+ * @param connection
+ * the connection
+ */
+ public LensMLJerseyClient(LensConnection connection, String password) {
+ this.connection = connection;
+ connection.open(password);
+ this.sessionHandle = null;
+ }
+
/**
* Instantiates a new lens ml jersey client.
*
* @param connection
* the connection
*/
- public LensMLJerseyClient(LensConnection connection) {
+ public LensMLJerseyClient(LensConnection connection, LensSessionHandle sessionHandle) {
this.connection = connection;
+ this.sessionHandle = sessionHandle;
+ connection.setSessionHandle(sessionHandle);
}
protected WebTarget getMLWebTarget() {
@@ -133,15 +150,9 @@ public class LensMLJerseyClient {
* the params
* @return the string
*/
- public String trainModel(String algorithm, Map<String, String> params) {
- Form form = new Form();
-
- for (Map.Entry<String, String> entry : params.entrySet()) {
- form.param(entry.getKey(), entry.getValue());
- }
-
+ public String trainModel(String algorithm, Form params) {
return getMLWebTarget().path(algorithm).path("train").request(MediaType.APPLICATION_JSON_TYPE)
- .post(Entity.entity(form, MediaType.APPLICATION_FORM_URLENCODED_TYPE), String.class);
+ .post(Entity.entity(params, MediaType.APPLICATION_FORM_URLENCODED_TYPE), String.class);
}
/**
@@ -153,15 +164,21 @@ public class LensMLJerseyClient {
* the algorithm
* @param modelID
* the model id
+ * @param outputTable
+ * the output table name
* @return the string
*/
- public String testModel(String table, String algorithm, String modelID) {
+ public String testModel(String table, String algorithm, String modelID, String outputTable) {
WebTarget modelTestTarget = getMLWebTarget().path("test").path(table).path(algorithm).path(modelID);
FormDataMultiPart mp = new FormDataMultiPart();
- mp.bodyPart(new FormDataBodyPart(FormDataContentDisposition.name("sessionid").build(), connection
- .getSessionHandle(), MediaType.APPLICATION_XML_TYPE));
+ LensSessionHandle sessionHandle = this.sessionHandle == null ? connection.getSessionHandle() : this.sessionHandle;
+
+ mp.bodyPart(new FormDataBodyPart(FormDataContentDisposition.name("sessionid").build(), sessionHandle,
+ MediaType.APPLICATION_XML_TYPE));
+
+ mp.bodyPart(new FormDataBodyPart(FormDataContentDisposition.name("outputTable").build(), outputTable));
return modelTestTarget.request().post(Entity.entity(mp, MediaType.MULTIPART_FORM_DATA_TYPE), String.class);
}
@@ -252,4 +269,19 @@ public class LensMLJerseyClient {
public Configuration getConf() {
return connection.getLensConnectionParams().getConf();
}
+
+ public void close() {
+ try {
+ connection.close();
+ } catch (Exception exc) {
+ LOG.error("Error closing connection", exc);
+ }
+ }
+
+ public LensSessionHandle getSessionHandle() {
+ if (sessionHandle != null) {
+ return sessionHandle;
+ }
+ return connection.getSessionHandle();
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/ExampleUtils.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/ExampleUtils.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/ExampleUtils.java
index a2ca853..a48dcc2 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/ml/ExampleUtils.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/ExampleUtils.java
@@ -18,23 +18,30 @@
*/
package org.apache.lens.ml;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.metastore.TableType;
import org.apache.hadoop.hive.metastore.api.FieldSchema;
import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.hive.ql.plan.AddPartitionDesc;
-
-import java.io.*;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
+import org.apache.hadoop.hive.serde.serdeConstants;
+import org.apache.hadoop.mapred.TextInputFormat;
+import org.apache.hadoop.mapred.TextOutputFormat;
/**
* The Class ExampleUtils.
*/
public class ExampleUtils {
+ private static final Log LOG = LogFactory.getLog(ExampleUtils.class);
/**
* Creates the example table.
@@ -54,10 +61,12 @@ public class ExampleUtils {
* @throws HiveException
* the hive exception
*/
- public static void createExampleTable(HiveConf conf, String database, String tableName, String sampleDataFile,
- String labelColumn, String... features) throws HiveException {
- File dataFile = new File(sampleDataFile);
- File partDir = dataFile.getParentFile();
+ public static void createTable(HiveConf conf, String database, String tableName, String sampleDataFile,
+ String labelColumn, Map<String, String> tableParams, String... features) throws HiveException {
+
+ Path dataFilePath = new Path(sampleDataFile);
+ Path partDir = dataFilePath.getParent();
+
// Create table
List<FieldSchema> columns = new ArrayList<FieldSchema>();
@@ -72,16 +81,26 @@ public class ExampleUtils {
}
Table tbl = Hive.get(conf).newTable(database + "." + tableName);
- tbl.getCols().addAll(columns);
+ tbl.setTableType(TableType.MANAGED_TABLE);
+ tbl.getTTable().getSd().setCols(columns);
+ tbl.getTTable().getParameters().putAll(tableParams);
+ tbl.setInputFormatClass(TextInputFormat.class);
+ tbl.setSerdeParam(serdeConstants.LINE_DELIM, "\n");
+ tbl.setSerdeParam(serdeConstants.FIELD_DELIM, " ");
List<FieldSchema> partCols = new ArrayList<FieldSchema>(1);
partCols.add(new FieldSchema("dummy_partition_col", "string", ""));
tbl.setPartCols(partCols);
+
Hive.get(conf).createTable(tbl);
+ LOG.info("Created table " + tableName);
+
+ // Add partition for the data file
AddPartitionDesc partitionDesc = new AddPartitionDesc(database, tableName, false);
Map<String, String> partSpec = new HashMap<String, String>();
partSpec.put("dummy_partition_col", "dummy_val");
- partitionDesc.addPartition(partSpec, partDir.toURI().toString());
+ partitionDesc.addPartition(partSpec, partDir.toUri().toString());
Hive.get(conf).createPartitions(partitionDesc);
+ LOG.info(tableName + ": Added partition " + partDir.toUri().toString());
}
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/LensML.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/LensML.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/LensML.java
index 7c4fc7e..a09a93f 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/ml/LensML.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/LensML.java
@@ -34,7 +34,7 @@ public interface LensML {
/**
* Get list of available machine learning algorithms
- *
+ *
* @return
*/
public List<String> getAlgorithms();
@@ -125,8 +125,8 @@ public interface LensML {
* @throws LensException
* the lens exception
*/
- public MLTestReport testModel(LensSessionHandle session, String table, String algorithm, String modelID)
- throws LensException;
+ public MLTestReport testModel(LensSessionHandle session, String table, String algorithm, String modelID,
+ String outputTable) throws LensException;
/**
* Get test reports for an algorithm.
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/LensMLImpl.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/LensMLImpl.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/LensMLImpl.java
index 0073e57..2233a8c 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/ml/LensMLImpl.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/LensMLImpl.java
@@ -349,8 +349,8 @@ public class LensMLImpl implements LensML {
* java.lang.String)
*/
@Override
- public MLTestReport testModel(LensSessionHandle session, String table, String algorithm, String modelID)
- throws LensException {
+ public MLTestReport testModel(LensSessionHandle session, String table, String algorithm, String modelID,
+ String outputTable) throws LensException {
return null;
}
@@ -372,12 +372,20 @@ public class LensMLImpl implements LensML {
* the lens exception
*/
public MLTestReport testModelRemote(LensSessionHandle sessionHandle, String table, String algorithm, String modelID,
- String queryApiUrl) throws LensException {
- return testModel(sessionHandle, table, algorithm, modelID, new RemoteQueryRunner(sessionHandle, queryApiUrl));
+ String queryApiUrl, String outputTable) throws LensException {
+ return testModel(sessionHandle, table, algorithm, modelID, new RemoteQueryRunner(sessionHandle, queryApiUrl),
+ outputTable);
}
/**
- * Test model.
+ * Evaluate a model. Evaluation is done on data selected table from an input table. The model is run as a UDF and its
+ * output is inserted into a table with a partition. Each evaluation is given a unique ID. The partition label is
+ * associated with this unique ID.
+ *
+ * <p>
+ * This call also required a query runner. Query runner is responsible for executing the evaluation query against Lens
+ * server.
+ * </p>
*
* @param sessionHandle
* the session handle
@@ -389,18 +397,20 @@ public class LensMLImpl implements LensML {
* the model id
* @param queryRunner
* the query runner
+ * @param outputTable
+ * table where test output will be written
* @return the ML test report
* @throws LensException
* the lens exception
*/
public MLTestReport testModel(LensSessionHandle sessionHandle, String table, String algorithm, String modelID,
- TestQueryRunner queryRunner) throws LensException {
+ TestQueryRunner queryRunner, String outputTable) throws LensException {
// check if algorithm exists
if (!getAlgorithms().contains(algorithm)) {
throw new LensException("No such algorithm " + algorithm);
}
- MLModel model;
+ MLModel<?> model;
try {
model = ModelLoader.loadModel(conf, algorithm, modelID);
} catch (IOException e) {
@@ -418,22 +428,31 @@ public class LensMLImpl implements LensML {
}
String testID = UUID.randomUUID().toString().replace("-", "_");
- final String testTable = "ml_test_" + testID;
+ final String testTable = outputTable;
final String testResultColumn = "prediction_result";
// TODO support error metric UDAFs
TableTestingSpec spec = TableTestingSpec.newBuilder().hiveConf(conf)
- .database(database == null ? "default" : database).table(table).featureColumns(model.getFeatureColumns())
+ .database(database == null ? "default" : database).inputTable(table).featureColumns(model.getFeatureColumns())
.outputColumn(testResultColumn).labeColumn(model.getLabelColumn()).algorithm(algorithm).modelID(modelID)
- .outputTable(testTable).build();
- String testQuery = spec.getTestQuery();
+ .outputTable(testTable).testID(testID).build();
+ String testQuery = spec.getTestQuery();
if (testQuery == null) {
throw new LensException("Invalid test spec. " + "table=" + table + " algorithm=" + algorithm + " modelID="
+ modelID);
}
- LOG.info("Running test query " + testQuery);
+ if (!spec.isOutputTableExists()) {
+ LOG.info("Output table '" + testTable + "' does not exist for test algorithm = " + algorithm + " modelid="
+ + modelID + ", Creating table using query: " + spec.getCreateOutputTableQuery());
+ // create the output table
+ String createOutputTableQuery = spec.getCreateOutputTableQuery();
+ queryRunner.runQuery(createOutputTableQuery);
+ LOG.info("Table created " + testTable);
+ }
+
+ LOG.info("Running evaluation query " + testQuery);
QueryHandle testQueryHandle = queryRunner.runQuery(testQuery);
MLTestReport testReport = new MLTestReport();
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/MLModel.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/MLModel.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/MLModel.java
index 1b4b0ad..a9ba0fa 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/ml/MLModel.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/MLModel.java
@@ -21,6 +21,7 @@ package org.apache.lens.ml;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
+import lombok.ToString;
import java.io.Serializable;
import java.util.Date;
@@ -30,6 +31,7 @@ import java.util.List;
* Instantiates a new ML model.
*/
@NoArgsConstructor
+@ToString
public abstract class MLModel<PREDICTION> implements Serializable {
/** The id. */
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/MLTestReport.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/MLTestReport.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/MLTestReport.java
index 40b041b..0c70a8c 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/ml/MLTestReport.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/MLTestReport.java
@@ -21,6 +21,7 @@ package org.apache.lens.ml;
import lombok.Getter;
import lombok.NoArgsConstructor;
import lombok.Setter;
+import lombok.ToString;
import java.io.Serializable;
import java.util.List;
@@ -29,6 +30,7 @@ import java.util.List;
* Instantiates a new ML test report.
*/
@NoArgsConstructor
+@ToString
public class MLTestReport implements Serializable {
/** The test table. */
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/MLUtils.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/MLUtils.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/MLUtils.java
index aed79d0..ce21d24 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/ml/MLUtils.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/MLUtils.java
@@ -18,7 +18,23 @@
*/
package org.apache.lens.ml;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.lens.server.api.LensConfConstants;
+import org.apache.lens.server.api.ServiceProvider;
+import org.apache.lens.server.api.ServiceProviderFactory;
+import org.apache.lens.server.ml.MLService;
+import org.apache.lens.server.ml.MLServiceImpl;
+
public class MLUtils {
+
+ private static final HiveConf hiveConf;
+ static {
+ hiveConf = new HiveConf();
+ // Add default config so that we know the service provider implementation
+ hiveConf.addResource("lensserver-default.xml");
+ hiveConf.addResource("lens-site.xml");
+ }
+
public static String getTrainerName(Class<? extends MLTrainer> trainerClass) {
Algorithm annotation = trainerClass.getAnnotation(Algorithm.class);
if (annotation != null) {
@@ -26,4 +42,15 @@ public class MLUtils {
}
throw new IllegalArgumentException("Trainer should be decorated with annotation - " + Algorithm.class.getName());
}
+
+ public static MLServiceImpl getMLService() throws Exception {
+ return getServiceProvider().getService(MLService.NAME);
+ }
+
+ public static ServiceProvider getServiceProvider() throws Exception {
+ Class<? extends ServiceProviderFactory> spfClass = hiveConf.getClass(LensConfConstants.SERVICE_PROVIDER_FACTORY,
+ null, ServiceProviderFactory.class);
+ ServiceProviderFactory spf = spfClass.newInstance();
+ return spf.getServiceProvider();
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/TableTestingSpec.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/TableTestingSpec.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/TableTestingSpec.java
index d4feeec..e73e989 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/ml/TableTestingSpec.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/TableTestingSpec.java
@@ -18,6 +18,12 @@
*/
package org.apache.lens.ml;
+import java.util.ArrayList;
+import java.util.HashMap;
+import java.util.List;
+
+import lombok.Getter;
+
import org.apache.commons.lang3.StringUtils;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
@@ -27,9 +33,6 @@ import org.apache.hadoop.hive.ql.metadata.Hive;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.metadata.Table;
-import java.util.ArrayList;
-import java.util.List;
-
/**
* Table specification for running test on a table.
*/
@@ -41,8 +44,9 @@ public class TableTestingSpec {
/** The db. */
private String db;
- /** The table. */
- private String table;
+ /** The table containing input data. */
+ private String inputTable;
+
// TODO use partition condition
/** The partition filter. */
private String partitionFilter;
@@ -68,6 +72,13 @@ public class TableTestingSpec {
/** The model id. */
private String modelID;
+ @Getter
+ private boolean outputTableExists;
+
+ public String testID;
+
+ private HashMap<String, FieldSchema> columnNameToFieldSchema;
+
/**
* The Class TableTestingSpecBuilder.
*/
@@ -96,19 +107,19 @@ public class TableTestingSpec {
}
/**
- * Table.
+ * Set the input table
*
* @param table
* the table
* @return the table testing spec builder
*/
- public TableTestingSpecBuilder table(String table) {
- spec.table = table;
+ public TableTestingSpecBuilder inputTable(String table) {
+ spec.inputTable = table;
return this;
}
/**
- * Partition filter.
+ * Partition filter for input table
*
* @param partFilter
* the part filter
@@ -211,6 +222,17 @@ public class TableTestingSpec {
public TableTestingSpec build() {
return spec;
}
+
+ /**
+ * Set the unique test id
+ *
+ * @param testID
+ * @return
+ */
+ public TableTestingSpecBuilder testID(String testID) {
+ spec.testID = testID;
+ return this;
+ }
}
/**
@@ -231,8 +253,17 @@ public class TableTestingSpec {
List<FieldSchema> columns;
try {
Hive metastoreClient = Hive.get(conf);
- Table tbl = (db == null) ? metastoreClient.getTable(table) : metastoreClient.getTable(db, table);
+ Table tbl = (db == null) ? metastoreClient.getTable(inputTable) : metastoreClient.getTable(db, inputTable);
columns = tbl.getAllCols();
+ columnNameToFieldSchema = new HashMap<String, FieldSchema>();
+
+ for (FieldSchema fieldSchema : columns) {
+ columnNameToFieldSchema.put(fieldSchema.getName(), fieldSchema);
+ }
+
+ // Check if output table exists
+ Table outTbl = metastoreClient.getTable(db == null ? "default" : db, outputTable, false);
+ outputTableExists = (outTbl != null);
} catch (HiveException exc) {
LOG.error("Error getting table info " + toString(), exc);
return false;
@@ -271,12 +302,33 @@ public class TableTestingSpec {
return null;
}
- StringBuilder q = new StringBuilder("CREATE TABLE " + outputTable + " AS SELECT ");
+ // We always insert a dynamic partition
+ StringBuilder q = new StringBuilder("INSERT OVERWRITE TABLE " + outputTable + " PARTITION (part_testid='" + testID
+ + "') SELECT ");
String featureCols = StringUtils.join(featureColumns, ",");
q.append(featureCols).append(",").append(labelColumn).append(", ").append("predict(").append("'").append(algorithm)
.append("', ").append("'").append(modelID).append("', ").append(featureCols).append(") ").append(outputColumn)
- .append(" FROM ").append(table);
+ .append(" FROM ").append(inputTable);
return q.toString();
}
+
+ public String getCreateOutputTableQuery() {
+ StringBuilder createTableQuery = new StringBuilder("CREATE TABLE IF NOT EXISTS ").append(outputTable).append("(");
+ // Output table contains feature columns, label column, output column
+ List<String> outputTableColumns = new ArrayList<String>();
+ for (String featureCol : featureColumns) {
+ outputTableColumns.add(featureCol + " " + columnNameToFieldSchema.get(featureCol).getType());
+ }
+
+ outputTableColumns.add(labelColumn + " " + columnNameToFieldSchema.get(labelColumn).getType());
+ outputTableColumns.add(outputColumn + " string");
+
+ createTableQuery.append(StringUtils.join(outputTableColumns, ", "));
+
+ // Append partition column
+ createTableQuery.append(") PARTITIONED BY (part_testid string)");
+
+ return createTableQuery.toString();
+ }
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/dao/MLDBUtils.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/dao/MLDBUtils.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/dao/MLDBUtils.java
new file mode 100644
index 0000000..4aecae4
--- /dev/null
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/dao/MLDBUtils.java
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lens.ml.dao;
+
+import org.apache.lens.ml.MLModel;
+import org.apache.lens.ml.MLTestReport;
+import org.apache.lens.ml.task.MLTask;
+
+public class MLDBUtils {
+
+ /**
+ * Create table to store test report data
+ */
+ public void createTestReportTable() {
+
+ }
+
+ /**
+ * Create table to store ML task workflow data
+ */
+ public void createMLTaskTable() {
+
+ }
+
+ /**
+ * Create table to save ML Models
+ */
+ public void createMLModelTable() {
+
+ }
+
+ /**
+ * Insert an ML Task into ml task table
+ *
+ * @param task
+ */
+ public void saveMLTask(MLTask task) {
+
+ }
+
+ /**
+ * Get ML Task given its id
+ *
+ * @param taskID
+ * @return
+ */
+ public MLTask getMLTask(String taskID) {
+ return null;
+ }
+
+ /**
+ * Insert test report into test report table
+ *
+ * @param testReport
+ */
+ public void saveTestReport(MLTestReport testReport) {
+
+ }
+
+ /**
+ * Get test report given its ID
+ *
+ * @param testReportID
+ * @return
+ */
+ public MLTestReport getTestReport(String testReportID) {
+ return null;
+ }
+
+ /**
+ * Insert model metadata into model table
+ *
+ * @param mlModel
+ */
+ public void saveMLModel(MLModel<?> mlModel) {
+
+ }
+
+ /**
+ * Get model metadata given ID
+ *
+ * @param modelID
+ * @return
+ */
+ public MLModel<?> getMLModel(String modelID) {
+ return null;
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/ColumnFeatureFunction.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/ColumnFeatureFunction.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/ColumnFeatureFunction.java
index 1a75af6..0a9fa09 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/ColumnFeatureFunction.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/ColumnFeatureFunction.java
@@ -55,7 +55,7 @@ public class ColumnFeatureFunction extends FeatureFunction {
/**
* Feature positions and value mappers are parallel arrays. featurePositions[i] gives the position of ith feature in
* the HCatRecord, and valueMappers[i] gives the value mapper used to map that feature to a Double value
- *
+ *
* @param featurePositions
* position number of feature column in the HCatRecord
* @param valueMappers
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/DoubleValueMapper.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/DoubleValueMapper.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/DoubleValueMapper.java
index 5798a10..f6a2c0b 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/DoubleValueMapper.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/DoubleValueMapper.java
@@ -32,6 +32,10 @@ public class DoubleValueMapper extends FeatureValueMapper {
*/
@Override
public final Double call(Object input) {
- return input == null ? 0d : (Double) input;
+ if (input instanceof Double || input == null) {
+ return input == null ? Double.valueOf(0d) : (Double) input;
+ }
+
+ throw new IllegalArgumentException("Invalid input expecting only doubles, but got " + input);
}
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/HiveTableRDD.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/HiveTableRDD.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/HiveTableRDD.java
index d819258..fc4456d 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/HiveTableRDD.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/HiveTableRDD.java
@@ -18,9 +18,13 @@
*/
package org.apache.lens.ml.spark;
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hive.hcatalog.data.HCatRecord;
+import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.hive.hcatalog.data.schema.HCatSchema;
import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaSparkContext;
@@ -31,6 +35,7 @@ import java.io.IOException;
* Create a JavaRDD based on a Hive table using HCatInputFormat.
*/
public class HiveTableRDD {
+ public static final Log LOG = LogFactory.getLog(HiveTableRDD.class);
/**
* Creates the hive table rdd.
@@ -53,12 +58,10 @@ public class HiveTableRDD {
Configuration conf, String db, String table, String partitionFilter) throws IOException {
HCatInputFormat.setInput(conf, db, table, partitionFilter);
+
JavaPairRDD<WritableComparable, HCatRecord> rdd = javaSparkContext.newAPIHadoopRDD(conf, HCatInputFormat.class, // Input
- // format
- // class
WritableComparable.class, // input key class
HCatRecord.class); // input value class
-
return rdd;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/TableTrainingSpec.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/TableTrainingSpec.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/TableTrainingSpec.java
index d301961..7be4c08 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/TableTrainingSpec.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/TableTrainingSpec.java
@@ -18,19 +18,23 @@
*/
package org.apache.lens.ml.spark;
-import com.google.common.base.Preconditions;
-import org.apache.lens.api.LensException;
+import java.io.IOException;
+import java.io.Serializable;
+import java.util.ArrayList;
+import java.util.List;
+
import lombok.Getter;
-import org.apache.commons.lang.StringUtils;
+import lombok.ToString;
+
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
-import org.apache.hadoop.hive.metastore.api.FieldSchema;
-import org.apache.hadoop.hive.ql.metadata.Hive;
-import org.apache.hadoop.hive.ql.metadata.HiveException;
-import org.apache.hadoop.hive.ql.metadata.Table;
import org.apache.hadoop.io.WritableComparable;
import org.apache.hive.hcatalog.data.HCatRecord;
+import org.apache.hive.hcatalog.data.schema.HCatFieldSchema;
+import org.apache.hive.hcatalog.data.schema.HCatSchema;
+import org.apache.hive.hcatalog.mapreduce.HCatInputFormat;
+import org.apache.lens.api.LensException;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
@@ -38,14 +42,12 @@ import org.apache.spark.api.java.function.Function;
import org.apache.spark.mllib.regression.LabeledPoint;
import org.apache.spark.rdd.RDD;
-import java.io.IOException;
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
+import com.google.common.base.Preconditions;
/**
* The Class TableTrainingSpec.
*/
+@ToString
public class TableTrainingSpec implements Serializable {
/** The Constant LOG. */
@@ -326,21 +328,23 @@ public class TableTrainingSpec implements Serializable {
* @return true, if successful
*/
boolean validate() {
- List<FieldSchema> columns;
+ List<HCatFieldSchema> columns;
try {
- Hive metastoreClient = Hive.get(conf);
- Table tbl = (database == null) ? metastoreClient.getTable(table) : metastoreClient.getTable(database, table);
- columns = tbl.getAllCols();
- } catch (HiveException exc) {
+ HCatInputFormat.setInput(conf, database == null ? "default" : database, table, partitionFilter);
+ HCatSchema tableSchema = HCatInputFormat.getTableSchema(conf);
+ columns = tableSchema.getFields();
+ } catch (IOException exc) {
LOG.error("Error getting table info " + toString(), exc);
return false;
}
+ LOG.info(table + " columns " + columns.toString());
+
boolean valid = false;
if (columns != null && !columns.isEmpty()) {
// Check labeled column
List<String> columnNames = new ArrayList<String>();
- for (FieldSchema col : columns) {
+ for (HCatFieldSchema col : columns) {
columnNames.add(col.getName());
}
@@ -396,6 +400,8 @@ public class TableTrainingSpec implements Serializable {
throw new LensException("Table spec not valid: " + toString());
}
+ LOG.info("Creating RDDs with spec " + toString());
+
// Get the RDD for table
JavaPairRDD<WritableComparable, HCatRecord> tableRDD;
try {
@@ -435,13 +441,4 @@ public class TableTrainingSpec implements Serializable {
LOG.info("Generated RDDs");
}
- /*
- * (non-Javadoc)
- *
- * @see java.lang.Object#toString()
- */
- @Override
- public String toString() {
- return StringUtils.join(new String[] { database, table, partitionFilter, labelColumn }, ",");
- }
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/trainers/BaseSparkTrainer.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/trainers/BaseSparkTrainer.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/trainers/BaseSparkTrainer.java
index 92377eb..115fef3 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/trainers/BaseSparkTrainer.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/spark/trainers/BaseSparkTrainer.java
@@ -117,9 +117,10 @@ public abstract class BaseSparkTrainer implements MLTrainer {
* java.lang.String, java.lang.String[])
*/
@Override
- public MLModel train(LensConf conf, String db, String table, String modelId, String... params) throws LensException {
+ public MLModel<?> train(LensConf conf, String db, String table, String modelId, String... params)
+ throws LensException {
parseParams(params);
- LOG.info("Training " + " with " + features.size() + " features");
+
TableTrainingSpec.TableTrainingSpecBuilder builder = TableTrainingSpec.newBuilder().hiveConf(toHiveConf(conf))
.database(db).table(table).partitionFilter(partitionFilter).featureColumns(features).labelColumn(label);
@@ -128,10 +129,12 @@ public abstract class BaseSparkTrainer implements MLTrainer {
}
TableTrainingSpec spec = builder.build();
+ LOG.info("Training " + " with " + features.size() + " features");
+
spec.createRDDs(sparkContext);
RDD<LabeledPoint> trainingRDD = spec.getTrainingRDD();
- BaseSparkClassificationModel model = trainInternal(modelId, trainingRDD);
+ BaseSparkClassificationModel<?> model = trainInternal(modelId, trainingRDD);
model.setTable(table);
model.setParams(Arrays.asList(params));
model.setLabelColumn(label);
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/ml/task/MLTask.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/ml/task/MLTask.java b/lens-ml-lib/src/main/java/org/apache/lens/ml/task/MLTask.java
new file mode 100644
index 0000000..85fd01d
--- /dev/null
+++ b/lens-ml-lib/src/main/java/org/apache/lens/ml/task/MLTask.java
@@ -0,0 +1,306 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.lens.ml.task;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.UUID;
+
+import lombok.Getter;
+import lombok.ToString;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.lens.api.LensSessionHandle;
+import org.apache.lens.client.LensConnectionParams;
+import org.apache.lens.client.LensMLClient;
+import org.apache.lens.ml.LensML;
+import org.apache.lens.ml.MLTestReport;
+import org.apache.lens.ml.MLUtils;
+
+/**
+ * Run a complete cycle of train and test (evaluation) for an ML algorithm
+ */
+@ToString
+public class MLTask implements Runnable {
+ private static final Log LOG = LogFactory.getLog(MLTask.class);
+
+ public enum State {
+ RUNNING, SUCCESSFUL, FAILED
+ }
+
+ @Getter
+ private State taskState;
+
+ /**
+ * Name of the trainer/algorithm.
+ */
+ @Getter
+ private String algorithm;
+
+ /**
+ * Name of the table containing training data.
+ */
+ @Getter
+ private String trainingTable;
+
+ /**
+ * Training table partition spec
+ */
+ @Getter
+ private String partitionSpec;
+
+ /**
+ * Name of the column which is a label for supervised algorithms.
+ */
+ @Getter
+ private String labelColumn;
+
+ /**
+ * Names of columns which are features in the training data.
+ */
+ @Getter
+ private List<String> featureColumns;
+
+ /**
+ * Configuration for the example.
+ */
+ @Getter
+ private HiveConf configuration;
+
+ /**
+ * Lens Server base URL, when running example as a client.
+ */
+ @Getter
+ private String serverLocation;
+
+ private LensML ml;
+ private String taskID;
+
+ /**
+ * Output table name
+ */
+ @Getter
+ private String outputTable;
+
+ /**
+ * Session handle
+ */
+ @Getter
+ private LensSessionHandle sessionHandle;
+
+ /**
+ * Extra params passed to the training algorithm
+ */
+ @Getter
+ private Map<String, String> extraParams;
+
+ /**
+ * User name to connect to Lens server
+ */
+ @Getter
+ public String userName;
+
+ /**
+ * Password to connect to Lens server
+ */
+ @Getter
+ public String password;
+
+ /**
+ * Use ExampleTask.Builder to create an instance
+ */
+ private MLTask() {
+ // Use builder to construct the example
+ extraParams = new HashMap<String, String>();
+ taskID = UUID.randomUUID().toString();
+ }
+
+ /**
+ * Builder to create an example task
+ */
+ public static class Builder {
+ private MLTask task;
+
+ public Builder() {
+ task = new MLTask();
+ }
+
+ public Builder trainingTable(String trainingTable) {
+ task.trainingTable = trainingTable;
+ return this;
+ }
+
+ public Builder algorithm(String algorithm) {
+ task.algorithm = algorithm;
+ return this;
+ }
+
+ public Builder labelColumn(String labelColumn) {
+ task.labelColumn = labelColumn;
+ return this;
+ }
+
+ public Builder addFeatureColumn(String featureColumn) {
+ if (task.featureColumns == null) {
+ task.featureColumns = new ArrayList<String>();
+ }
+ task.featureColumns.add(featureColumn);
+ return this;
+ }
+
+ public Builder hiveConf(HiveConf hiveConf) {
+ task.configuration = hiveConf;
+ return this;
+ }
+
+ public Builder serverLocation(String serverLocation) {
+ task.serverLocation = serverLocation;
+ return this;
+ }
+
+ public Builder sessionHandle(LensSessionHandle sessionHandle) {
+ task.sessionHandle = sessionHandle;
+ return this;
+ }
+
+ public Builder extraParam(String param, String value) {
+ task.extraParams.put(param, value);
+ return this;
+ }
+
+ public Builder partitionSpec(String partitionSpec) {
+ task.partitionSpec = partitionSpec;
+ return this;
+ }
+
+ public Builder outputTable(String outputTable) {
+ task.outputTable = outputTable;
+ return this;
+ }
+
+ public MLTask build() {
+ MLTask builtTask = task;
+ task = null;
+ return builtTask;
+ }
+
+ public Builder userName(String userName) {
+ task.userName = userName;
+ return this;
+ }
+
+ public Builder password(String password) {
+ task.password = password;
+ return this;
+ }
+ }
+
+ @Override
+ public void run() {
+ taskState = State.RUNNING;
+ LOG.info("Starting " + taskID);
+ try {
+ runTask();
+ taskState = State.SUCCESSFUL;
+ LOG.info("Complete " + taskID);
+ } catch (Exception e) {
+ taskState = State.FAILED;
+ LOG.info("Error running task " + taskID, e);
+ }
+ }
+
+ /**
+ * Train an ML model, with specified algorithm and input data. Do model evaluation using the evaluation data and print
+ * evaluation result
+ *
+ * @throws Exception
+ */
+ private void runTask() throws Exception {
+ if (serverLocation != null) {
+ // Connect to a remote Lens server
+ LensConnectionParams connectionParams = new LensConnectionParams();
+ connectionParams.setBaseUrl(serverLocation);
+ connectionParams.getConf().setUser(userName);
+ LensMLClient mlClient = new LensMLClient(connectionParams, sessionHandle);
+ ml = mlClient;
+ LOG.info("Working in client mode. Lens session handle " + sessionHandle.getPublicId());
+ } else {
+ // In server mode session handle has to be passed by the user as a request parameter
+ ml = MLUtils.getMLService();
+ LOG.info("Working in Lens server");
+ }
+
+ String trainerArgs[] = buildTrainingArgs();
+ LOG.info("Starting task " + taskID + " trainer args: " + Arrays.toString(trainerArgs));
+
+ String modelID = ml.train(trainingTable, algorithm, trainerArgs);
+ printModelMetadata(taskID, modelID);
+
+ LOG.info("Starting test " + taskID);
+ MLTestReport testReport = ml.testModel(sessionHandle, trainingTable, algorithm, modelID, outputTable);
+ printTestReport(taskID, testReport);
+ saveTask();
+ }
+
+ // Save task metadata to DB
+ private void saveTask() {
+ LOG.info("Saving task details to DB");
+ }
+
+ private void printTestReport(String exampleID, MLTestReport testReport) {
+ StringBuilder builder = new StringBuilder("Example: ").append(exampleID);
+ builder.append("\n\t");
+ builder.append("EvaluationReport: ").append(testReport.toString());
+ System.out.println(builder.toString());
+ }
+
+ private String[] buildTrainingArgs() {
+ List<String> argList = new ArrayList<String>();
+ argList.add("label");
+ argList.add(labelColumn);
+
+ // Add all the features
+ for (String featureCol : featureColumns) {
+ argList.add("feature");
+ argList.add(featureCol);
+ }
+
+ // Add extra params
+ for (String param : extraParams.keySet()) {
+ argList.add(param);
+ argList.add(extraParams.get(param));
+ }
+
+ return argList.toArray(new String[argList.size()]);
+ }
+
+ // Get the model instance and print its metadat to stdout
+ private void printModelMetadata(String exampleID, String modelID) throws Exception {
+ StringBuilder builder = new StringBuilder("Example: ").append(exampleID);
+ builder.append("\n\t");
+ builder.append("Model: ");
+ builder.append(ml.getModel(algorithm, modelID).toString());
+ System.out.println(builder.toString());
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLApp.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLApp.java b/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLApp.java
index 4388d23..75d4f03 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLApp.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLApp.java
@@ -29,13 +29,32 @@ import org.glassfish.jersey.media.multipart.MultiPartFeature;
@ApplicationPath("/ml")
public class MLApp extends Application {
- @Override
- public Set<Class<?>> getClasses() {
- final Set<Class<?>> classes = new HashSet<Class<?>>();
+
+ private final Set<Class<?>> classes;
+
+ /**
+ * Pass additional classes when running in test mode
+ *
+ * @param additionalClasses
+ */
+ public MLApp(Class<?>... additionalClasses) {
+ classes = new HashSet<Class<?>>();
+
// register root resource
classes.add(MLServiceResource.class);
classes.add(MultiPartFeature.class);
classes.add(LoggingFilter.class);
+ for (Class<?> cls : additionalClasses) {
+ classes.add(cls);
+ }
+
+ }
+
+ /**
+ * Get classes for this resource
+ */
+ @Override
+ public Set<Class<?>> getClasses() {
return classes;
}
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLServiceImpl.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLServiceImpl.java b/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLServiceImpl.java
index d1b5ff3..be7cd27 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLServiceImpl.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLServiceImpl.java
@@ -32,6 +32,7 @@ import org.apache.lens.server.api.query.QueryExecutionService;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
+import org.apache.hadoop.hive.ql.exec.FunctionRegistry;
import org.apache.hive.service.CompositeService;
import org.apache.lens.server.ml.MLService;
@@ -203,10 +204,10 @@ public class MLServiceImpl extends CompositeService implements MLService {
* java.lang.String)
*/
@Override
- public MLTestReport testModel(LensSessionHandle sessionHandle, String table, String algorithm, String modelID)
- throws LensException {
+ public MLTestReport testModel(LensSessionHandle sessionHandle, String table, String algorithm, String modelID,
+ String outputTable) throws LensException {
- return ml.testModel(sessionHandle, table, algorithm, modelID, new DirectQueryRunner(sessionHandle));
+ return ml.testModel(sessionHandle, table, algorithm, modelID, new DirectQueryRunner(sessionHandle), outputTable);
}
/*
@@ -281,6 +282,8 @@ public class MLServiceImpl extends CompositeService implements MLService {
*/
@Override
public QueryHandle runQuery(String testQuery) throws LensException {
+ FunctionRegistry.registerTemporaryFunction("predict", HiveMLUDF.class);
+ LOG.info("Registered predict UDF");
// Run the query in query executions service
QueryExecutionService queryService = (QueryExecutionService) getServiceProvider().getService("query");
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLServiceResource.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLServiceResource.java b/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLServiceResource.java
index 2e9bbaf..08fc64b 100644
--- a/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLServiceResource.java
+++ b/lens-ml-lib/src/main/java/org/apache/lens/server/ml/MLServiceResource.java
@@ -37,6 +37,7 @@ import org.glassfish.jersey.media.multipart.FormDataParam;
import javax.ws.rs.*;
import javax.ws.rs.core.*;
+
import java.util.*;
import static org.apache.commons.lang.StringUtils.isBlank;
@@ -62,6 +63,11 @@ public class MLServiceResource {
private static final HiveConf hiveConf;
+ /**
+ * Message indicating if ML service is up
+ */
+ public static final String ML_UP_MESSAGE = "ML service is up";
+
static {
hiveConf = new HiveConf();
// Add default config so that we know the service provider implementation
@@ -109,6 +115,16 @@ public class MLServiceResource {
}
/**
+ * Indicates if ML resource is up
+ *
+ * @return
+ */
+ @GET
+ public String mlResourceUp() {
+ return ML_UP_MESSAGE;
+ }
+
+ /**
* Get a list of trainers available
*
* @return
@@ -122,7 +138,7 @@ public class MLServiceResource {
}
/**
- * Gets the param description.
+ * Gets the human readable param description of an algorithm
*
* @param algorithm
* the algorithm
@@ -277,10 +293,9 @@ public class MLServiceResource {
trainerArgs.add(values.get(0));
}
}
-
+ LOG.info("Training table " + table + " with algo " + algorithm + " params=" + trainerArgs.toString());
String modelId = getMlService().train(table, algorithm, trainerArgs.toArray(new String[] {}));
- LOG.info("Trained table " + table + " with algo " + algorithm + " params=" + trainerArgs.toString() + ", modelID="
- + modelId);
+ LOG.info("Done training " + table + " modelid = " + modelId);
return modelId;
}
@@ -317,8 +332,9 @@ public class MLServiceResource {
@Path("test/{table}/{algorithm}/{modelID}")
@Consumes(MediaType.MULTIPART_FORM_DATA)
public String test(@PathParam("algorithm") String algorithm, @PathParam("modelID") String modelID,
- @PathParam("table") String table, @FormDataParam("sessionid") LensSessionHandle session) throws LensException {
- MLTestReport testReport = getMlService().testModel(session, table, algorithm, modelID);
+ @PathParam("table") String table, @FormDataParam("sessionid") LensSessionHandle session,
+ @FormDataParam("outputTable") String outputTable) throws LensException {
+ MLTestReport testReport = getMlService().testModel(session, table, algorithm, modelID, outputTable);
return testReport.getReportID();
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/src/test/java/org/apache/lens/ml/TestMLResource.java
----------------------------------------------------------------------
diff --git a/lens-ml-lib/src/test/java/org/apache/lens/ml/TestMLResource.java b/lens-ml-lib/src/test/java/org/apache/lens/ml/TestMLResource.java
index 3fe0225..4abc010 100644
--- a/lens-ml-lib/src/test/java/org/apache/lens/ml/TestMLResource.java
+++ b/lens-ml-lib/src/test/java/org/apache/lens/ml/TestMLResource.java
@@ -18,21 +18,27 @@
*/
package org.apache.lens.ml;
+import java.io.File;
+import java.net.URI;
+import java.util.HashMap;
import java.util.List;
+import java.util.Map;
import javax.ws.rs.client.WebTarget;
import javax.ws.rs.core.Application;
-import javax.ws.rs.core.MediaType;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.hadoop.hive.conf.HiveConf;
import org.apache.hive.service.Service;
-import org.apache.lens.api.StringList;
+import org.apache.lens.api.LensSessionHandle;
+import org.apache.lens.client.LensConnectionParams;
+import org.apache.lens.client.LensMLClient;
import org.apache.lens.ml.spark.trainers.DecisionTreeTrainer;
import org.apache.lens.ml.spark.trainers.LogisticRegressionTrainer;
import org.apache.lens.ml.spark.trainers.NaiveBayesTrainer;
import org.apache.lens.ml.spark.trainers.SVMTrainer;
+import org.apache.lens.ml.task.MLTask;
import org.apache.lens.server.LensJerseyTest;
import org.apache.lens.server.LensServerConf;
import org.apache.lens.server.api.LensConfConstants;
@@ -41,8 +47,13 @@ import org.apache.lens.server.api.ServiceProviderFactory;
import org.apache.lens.server.ml.MLApp;
import org.apache.lens.server.ml.MLService;
import org.apache.lens.server.ml.MLServiceImpl;
+import org.apache.lens.server.ml.MLServiceResource;
+import org.apache.lens.server.query.QueryServiceResource;
+import org.apache.lens.server.session.HiveSessionService;
+import org.apache.lens.server.session.SessionResource;
import org.glassfish.jersey.client.ClientConfig;
import org.glassfish.jersey.media.multipart.MultiPartFeature;
+import org.glassfish.jersey.server.ResourceConfig;
import org.testng.Assert;
import org.testng.annotations.AfterTest;
import org.testng.annotations.BeforeMethod;
@@ -52,14 +63,25 @@ import org.testng.annotations.Test;
@Test
public class TestMLResource extends LensJerseyTest {
private static final Log LOG = LogFactory.getLog(TestMLResource.class);
+ private static final String TEST_CONN_URL = "http://localhost:8089/lens-server";
+ private static final LensConnectionParams connectionParams = new LensConnectionParams();
+
+ static {
+ connectionParams.setBaseUrl(TEST_CONN_URL);
+ connectionParams.getConf().setUser("foo@localhost");
+ }
+
private WebTarget mlTarget;
+ private LensMLClient mlClient;
+ private ServiceProvider serviceProvider;
+ private LensSessionHandle sessionHandle;
- public static ServiceProvider getServiceProvider() throws Exception {
+ public void setServiceProvider() throws Exception {
HiveConf conf = LensServerConf.get();
Class<? extends ServiceProviderFactory> spfClass = conf.getClass(LensConfConstants.SERVICE_PROVIDER_FACTORY, null,
ServiceProviderFactory.class);
ServiceProviderFactory spf = spfClass.newInstance();
- return spf.getServiceProvider();
+ this.serviceProvider = spf.getServiceProvider();
}
@Override
@@ -69,7 +91,7 @@ public class TestMLResource extends LensJerseyTest {
@Override
protected Application configure() {
- return new MLApp();
+ return new MLApp(SessionResource.class, QueryServiceResource.class);
}
@Override
@@ -80,35 +102,39 @@ public class TestMLResource extends LensJerseyTest {
@BeforeTest
public void setUp() throws Exception {
super.setUp();
+ setServiceProvider();
+ HiveSessionService sessionService = serviceProvider.getService(HiveSessionService.NAME);
+ this.sessionHandle = sessionService.openSession("foo@localhost", "bar", new HashMap<String, String>());
+ mlClient = new LensMLClient(connectionParams, sessionHandle);
}
@AfterTest
public void tearDown() throws Exception {
super.tearDown();
+ mlClient.close();
}
@BeforeMethod
public void setMLTarget() {
mlTarget = target().path("ml");
- LOG.info("## setMLTarget");
}
@Test
public void testStartMLServiceStarted() throws Exception {
LOG.info("## testStartMLServiceStarted");
- ServiceProvider serviceProvider = getServiceProvider();
MLServiceImpl svcImpl = serviceProvider.getService(MLService.NAME);
Assert.assertEquals(svcImpl.getServiceState(), Service.STATE.STARTED);
}
@Test
- public void testGetTrainers() throws Exception {
- final WebTarget trainerTarget = mlTarget.path("trainers");
- LOG.info("## testGetTrainers: " + trainerTarget.getUri());
- StringList trainerList = trainerTarget.request(MediaType.APPLICATION_XML).get(StringList.class);
- Assert.assertNotNull(trainerList);
+ public void testMLResourceUp() throws Exception {
+ String mlUpMsg = mlTarget.request().get(String.class);
+ Assert.assertEquals(mlUpMsg, MLServiceResource.ML_UP_MESSAGE);
+ }
- List<String> trainerNames = trainerList.getElements();
+ @Test
+ public void testGetTrainers() throws Exception {
+ List<String> trainerNames = mlClient.getAlgorithms();
Assert.assertNotNull(trainerNames);
Assert.assertTrue(trainerNames.contains(MLUtils.getTrainerName(NaiveBayesTrainer.class)),
@@ -124,4 +150,55 @@ public class TestMLResource extends LensJerseyTest {
MLUtils.getTrainerName(DecisionTreeTrainer.class));
}
+ @Test
+ public void testGetTrainerParams() throws Exception {
+ Map<String, String> params = mlClient.getAlgoParamDescription(MLUtils.getTrainerName(DecisionTreeTrainer.class));
+ Assert.assertNotNull(params);
+ Assert.assertFalse(params.isEmpty());
+
+ for (String key : params.keySet()) {
+ LOG.info("## Param " + key + " help = " + params.get(key));
+ }
+ }
+
+ @Test
+ public void trainAndEval() throws Exception {
+ LOG.info("Starting train & eval");
+ final String algoName = MLUtils.getTrainerName(NaiveBayesTrainer.class);
+ HiveConf conf = new HiveConf();
+ String database = "default";
+ String tableName = "naivebayes_training_table";
+ String sampleDataFilePath = "data/naive_bayes/naive_bayes_train.data";
+
+ File sampleDataFile = new File(sampleDataFilePath);
+ URI sampleDataFileURI = sampleDataFile.toURI();
+
+ String labelColumn = "label";
+ String features[] = { "feature_1", "feature_2", "feature_3" };
+ String outputTable = "naivebayes_eval_table";
+
+ LOG.info("Creating training table from file " + sampleDataFileURI.toString());
+
+ Map<String, String> tableParams = new HashMap<String, String>();
+ ExampleUtils.createTable(conf, database, tableName, sampleDataFileURI.toString(), labelColumn, tableParams,
+ features);
+ MLTask.Builder taskBuilder = new MLTask.Builder();
+
+ taskBuilder.algorithm(algoName).hiveConf(conf).labelColumn(labelColumn).outputTable(outputTable)
+ .serverLocation(getBaseUri().toString()).sessionHandle(mlClient.getSessionHandle()).trainingTable(tableName)
+ .userName("foo@localhost").password("bar");
+
+ // Add features
+ taskBuilder.addFeatureColumn("feature_1").addFeatureColumn("feature_2").addFeatureColumn("feature_3");
+
+ MLTask task = taskBuilder.build();
+
+ LOG.info("Created task " + task.toString());
+ task.run();
+ Assert.assertEquals(task.getTaskState(), MLTask.State.SUCCESSFUL);
+
+ LOG.info("Completed task run");
+
+ }
+
}
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-ml-lib/testutils/hadoop
----------------------------------------------------------------------
diff --git a/lens-ml-lib/testutils/hadoop b/lens-ml-lib/testutils/hadoop
new file mode 100755
index 0000000..415561f
--- /dev/null
+++ b/lens-ml-lib/testutils/hadoop
@@ -0,0 +1,110 @@
+#!/usr/bin/env bash
+
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# This script satisfies Hive's bin/hadoop dependency when running tests.
+# It's a quick and dirty hack, and should eventually be made obsolete by
+# completely eliminating Hive's dependency on bin/hadoop.
+
+bin=`which $0`
+bin=`dirname ${bin}`
+bin=`cd "$bin"; pwd`
+
+# Newer versions of glibc use an arena memory allocator that causes virtual
+# memory usage to explode. This interacts badly with the many threads that
+# we use in Hadoop. Tune the variable down to prevent vmem explosion.
+export MALLOC_ARENA_MAX=${MALLOC_ARENA_MAX:-4}
+
+JAVA=$JAVA_HOME/bin/java
+JAVA_HEAP_MAX=-Xmx1000m
+
+# check envvars which might override default args
+if [ "$HADOOP_HEAPSIZE" != "" ]; then
+ JAVA_HEAP_MAX="-Xmx""$HADOOP_HEAPSIZE""m"
+fi
+
+# CLASSPATH initially contains $HADOOP_CONF_DIR
+CLASSPATH="${HADOOP_CONF_DIR}"
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+BUILD_ROOT=./target/hadoop_logs
+
+#if [ "$HIVE_HADOOP_TEST_CLASSPATH" != "" ]; then
+# CLASSPATH=${CLASSPATH}:${HIVE_HADOOP_TEST_CLASSPATH}
+#else
+# echo "Error: HIVE_HADOOP_TEST_CLASSPATH not defined."
+# exit 1
+#fi
+
+# restore ordinary behaviour
+unset IFS
+
+mkdir -p $BUILD_ROOT/test/hadoop/logs
+HADOOP_LOG_DIR=$BUILD_ROOT/test/hadoop/logs
+HADOOP_LOGFILE=hadoop.log
+
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.dir=$HADOOP_LOG_DIR"
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.log.file=$HADOOP_LOGFILE"
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.home.dir=$HADOOP_PREFIX"
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.id.str=$HADOOP_IDENT_STRING"
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.root.logger=${HADOOP_ROOT_LOGGER:-INFO,console}"
+if [ "x$JAVA_LIBRARY_PATH" != "x" ]; then
+ HADOOP_OPTS="$HADOOP_OPTS -Djava.library.path=$JAVA_LIBRARY_PATH"
+fi
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.policy.file=$HADOOP_POLICYFILE"
+
+# Disable ipv6 as it can cause issues
+HADOOP_OPTS="$HADOOP_OPTS -Djava.net.preferIPv4Stack=true"
+
+COMMAND=$1
+# the core commands
+if [ "$COMMAND" = "fs" ] ; then
+ CLASS=org.apache.hadoop.fs.FsShell
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+elif [ "$COMMAND" = "version" ] ; then
+ CLASS=org.apache.hadoop.util.VersionInfo
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+elif [ "$COMMAND" = "jar" ] ; then
+ CLASS=org.apache.hadoop.util.RunJar
+elif [ "$COMMAND" = "distcp" ] ; then
+ CLASS=org.apache.hadoop.tools.DistCp
+ CLASSPATH=${CLASSPATH}:${TOOL_PATH}
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+elif [ "$COMMAND" = "daemonlog" ] ; then
+ CLASS=org.apache.hadoop.log.LogLevel
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+elif [ "$COMMAND" = "archive" ] ; then
+ CLASS=org.apache.hadoop.tools.HadoopArchives
+ CLASSPATH=${CLASSPATH}:${TOOL_PATH}
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+elif [[ "$COMMAND" = -* ]] ; then
+ # class and package names cannot begin with a -
+ echo "Error: No command named \`$COMMAND' was found. Perhaps you meant \`hadoop ${COMMAND#-}'"
+ exit 1
+else
+ HADOOP_OPTS="$HADOOP_OPTS $HADOOP_CLIENT_OPTS"
+ CLASS=$COMMAND
+fi
+shift
+
+#make sure security appender is turned off
+HADOOP_OPTS="$HADOOP_OPTS -Dhadoop.security.logger=${HADOOP_SECURITY_LOGGER:-INFO,NullAppender}"
+
+export CLASSPATH=$TEST_CLASSPATH:$CLASSPATH:`cat $MVN_CLASSPATH_FILE`
+echo "Classpath file -->" $MVN_CLASSPATH_FILE
+exec "$JAVA" $JAVA_HEAP_MAX $HADOOP_OPTS $CLASS "$@"
http://git-wip-us.apache.org/repos/asf/incubator-lens/blob/88b900c3/lens-server/src/test/resources/lens-site.xml
----------------------------------------------------------------------
diff --git a/lens-server/src/test/resources/lens-site.xml b/lens-server/src/test/resources/lens-site.xml
index e0a47bf..1dfb0b6 100644
--- a/lens-server/src/test/resources/lens-site.xml
+++ b/lens-server/src/test/resources/lens-site.xml
@@ -137,4 +137,16 @@
<description>Implementation class for session service</description>
</property>
+<property>
+ <name>lens.server.ws.resourcenames</name>
+ <value>session,metastore,query,quota,scheduler,index,ml</value>
+ <description>These JAX-RS resources would be started in the specified order when lens-server starts up</description>
+</property>
+
+<property>
+ <name>lens.server.ml.ws.resource.impl</name>
+ <value>org.apache.lens.server.ml.MLServiceResource</value>
+ <description>Implementation class for Session Resource</description>
+</property>
+
</configuration>