You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by le...@apache.org on 2017/12/19 14:13:12 UTC

[11/17] incubator-sdap-mudrod git commit: SDAP-7 Change all package namespaces to org.apache.sdap

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/process/UserHistoryAnalyzer.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/process/UserHistoryAnalyzer.java b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/process/UserHistoryAnalyzer.java
deleted file mode 100644
index 248756b..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/process/UserHistoryAnalyzer.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.weblog.process;
-
-import gov.nasa.jpl.mudrod.discoveryengine.DiscoveryStepAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.main.MudrodConstants;
-import gov.nasa.jpl.mudrod.semantics.SemanticAnalyzer;
-import gov.nasa.jpl.mudrod.utils.LinkageTriple;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.List;
-import java.util.Properties;
-
-/**
- * Supports ability to calculate term similarity based on user history
- */
-public class UserHistoryAnalyzer extends DiscoveryStepAbstract {
-
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-  private static final Logger LOG = LoggerFactory.getLogger(UserHistoryAnalyzer.class);
-
-  public UserHistoryAnalyzer(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-  }
-
-  /**
-   * Method of executing user history analyzer
-   */
-  @Override
-  public Object execute() {
-    LOG.info("Starting UserHistoryAnalyzer...");
-    startTime = System.currentTimeMillis();
-
-    SemanticAnalyzer sa = new SemanticAnalyzer(props, es, spark);
-    List<LinkageTriple> tripleList = sa.calTermSimfromMatrix(props.getProperty("userHistoryMatrix"));
-    sa.saveToES(tripleList, props.getProperty(MudrodConstants.ES_INDEX_NAME), props.getProperty(MudrodConstants.USE_HISTORY_LINKAGE_TYPE));
-
-    endTime = System.currentTimeMillis();
-    es.refreshIndex();
-    LOG.info("UserHistoryAnalyzer complete. Time elapsed: {}s", (endTime - startTime) / 1000);
-    return null;
-  }
-
-  @Override
-  public Object execute(Object o) {
-    return null;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/process/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/process/package-info.java b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/process/package-info.java
deleted file mode 100644
index e96fd3c..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/process/package-info.java
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes web log processing classes.
- */
-package gov.nasa.jpl.mudrod.weblog.process;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/ApacheAccessLog.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/ApacheAccessLog.java b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/ApacheAccessLog.java
deleted file mode 100644
index 1051384..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/ApacheAccessLog.java
+++ /dev/null
@@ -1,129 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.weblog.structure;
-
-import com.google.gson.Gson;
-import gov.nasa.jpl.mudrod.weblog.pre.CrawlerDetection;
-
-import java.io.IOException;
-import java.io.Serializable;
-import java.text.ParseException;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * This class represents an Apache access log line. See
- * http://httpd.apache.org/docs/2.2/logs.html for more details.
- */
-public class ApacheAccessLog extends WebLog implements Serializable {
-
-  // double Bytes;
-  String Response;
-  String Referer;
-  String Browser;
-
-  @Override
-  public double getBytes() {
-    return this.Bytes;
-  }
-
-  public String getBrowser() {
-    return this.Browser;
-  }
-
-  public String getResponse() {
-    return this.Response;
-  }
-
-  public String getReferer() {
-    return this.Referer;
-  }
-
-  public ApacheAccessLog() {
-
-  }
-
-  public static String parseFromLogLine(String log) throws IOException, ParseException {
-
-    String logEntryPattern = "^([\\d.]+) (\\S+) (\\S+) \\[([\\w:/]+\\s[+\\-]\\d{4})\\] \"(.+?)\" (\\d{3}) (\\d+|-) \"((?:[^\"]|\")+)\" \"([^\"]+)\"";
-    final int NUM_FIELDS = 9;
-    Pattern p = Pattern.compile(logEntryPattern);
-    Matcher matcher;
-
-    String lineJson = "{}";
-    matcher = p.matcher(log);
-    if (!matcher.matches() || NUM_FIELDS != matcher.groupCount()) {
-      return lineJson;
-    }
-
-    String time = matcher.group(4);
-    time = SwithtoNum(time);
-    SimpleDateFormat formatter = new SimpleDateFormat("dd/MM/yyyy:HH:mm:ss");
-    Date date = formatter.parse(time);
-
-    String bytes = matcher.group(7);
-
-    if (bytes.equals("-")) {
-      bytes = "0";
-    }
-
-    String request = matcher.group(5).toLowerCase();
-    String agent = matcher.group(9);
-    CrawlerDetection crawlerDe = new CrawlerDetection();
-    if (crawlerDe.checkKnownCrawler(agent)) {
-      return lineJson;
-    } else {
-
-      boolean tag = false;
-      String[] mimeTypes = { ".js", ".css", ".jpg", ".png", ".ico", "image_captcha", "autocomplete", ".gif", "/alldata/", "/api/", "get / http/1.1", ".jpeg", "/ws/" };
-      for (int i = 0; i < mimeTypes.length; i++) {
-        if (request.contains(mimeTypes[i])) {
-          tag = true;
-          return lineJson;
-        }
-      }
-
-      if (tag == false) {
-        ApacheAccessLog accesslog = new ApacheAccessLog();
-        accesslog.LogType = "PO.DAAC";
-        accesslog.IP = matcher.group(1);
-        accesslog.Request = matcher.group(5);
-        accesslog.Response = matcher.group(6);
-        accesslog.Bytes = Double.parseDouble(bytes);
-        accesslog.Referer = matcher.group(8);
-        accesslog.Browser = matcher.group(9);
-        SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.sss'Z'");
-        accesslog.Time = df.format(date);
-
-        Gson gson = new Gson();
-        lineJson = gson.toJson(accesslog);
-
-        return lineJson;
-      }
-    }
-
-    lineJson = "{}";
-    return lineJson;
-  }
-
-  public static boolean checknull(WebLog s) {
-    if (s == null) {
-      return false;
-    }
-    return true;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/ClickStream.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/ClickStream.java b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/ClickStream.java
deleted file mode 100644
index 76e8d7a..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/ClickStream.java
+++ /dev/null
@@ -1,188 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.weblog.structure;
-
-import org.codehaus.jettison.json.JSONException;
-import org.codehaus.jettison.json.JSONObject;
-
-import java.io.Serializable;
-
-/**
- * ClassName: ClickStream Function: user click stream data related operations.
- */
-public class ClickStream implements Serializable {
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-  // keywords: query words related to the click behaviour
-  private String keywords;
-  // viewDataset: the dataset name user viewed
-  private String viewDataset;
-  // downloadDataset: the dataset name user downloaded
-  private String downloadDataset;
-  // sessionID: session ID
-  private String sessionID;
-  // type: session type name
-  private String type;
-
-  /**
-   * Creates a new instance of ClickStream.
-   *
-   * @param keywords    the query user searched
-   * @param viewDataset the dataset name user viewed
-   * @param download:   if user download the data set after viewing it, this parameter is
-   *                    true, otherwise, it is false.
-   */
-  public ClickStream(String keywords, String viewDataset, boolean download) {
-    this.keywords = keywords;
-    this.viewDataset = viewDataset;
-    this.downloadDataset = "";
-    if (download) {
-      this.downloadDataset = viewDataset;
-    }
-  }
-
-  public ClickStream() {
-    //default constructor
-  }
-
-  public String getSessionID() {
-    return sessionID;
-  }
-
-  /**
-   * setKeyWords: Set the query user searched.
-   *
-   * @param query search words
-   */
-  public void setKeyWords(String query) {
-    this.keywords = query;
-  }
-
-  /**
-   * setViewDataset:Set the data set name user viewed
-   *
-   * @param dataset short name of data set
-   */
-  public void setViewDataset(String dataset) {
-    this.viewDataset = dataset;
-  }
-
-  /**
-   * setDownloadDataset: Set the data set name user downloaded
-   *
-   * @param dataset short name of data set
-   */
-  public void setDownloadDataset(String dataset) {
-    this.downloadDataset = dataset;
-  }
-
-  /**
-   * getKeyWords: Get the query user searched
-   *
-   * @return data set name
-   */
-  public String getKeyWords() {
-    return this.keywords;
-  }
-
-  /**
-   * getViewDataset: Get the data set user viewed
-   *
-   * @return data set name
-   */
-  public String getViewDataset() {
-    return this.viewDataset;
-  }
-
-  /**
-   * isDownload: Show whether the data is downloaded in the session.
-   *
-   * @return True or False
-   */
-  public Boolean isDownload() {
-    if ("".equals(this.downloadDataset)) {
-      return false;
-    }
-    return true;
-  }
-
-  /**
-   * setSessionId: Set ID of session
-   *
-   * @param sessionID session id
-   */
-  public void setSessionId(String sessionID) {
-    this.sessionID = sessionID;
-  }
-
-  /**
-   * setType: Set session type name
-   *
-   * @param type session type name in elasticsearch
-   */
-  public void setType(String type) {
-    this.type = type;
-  }
-
-  /**
-   * Output click stream info in string format
-   *
-   * @see java.lang.Object#toString()
-   */
-  @Override
-  public String toString() {
-    return "Query: " + keywords + " || View Dataset: " + viewDataset + " || Download Dataset: " + downloadDataset;
-  }
-
-  /**
-   * toJson: Output click stream info in Json format
-   *
-   * @return session in string format
-   */
-  public String toJson() {
-    String jsonQuery = "{";
-    jsonQuery += "\"query\":\"" + this.keywords + "\",";
-    jsonQuery += "\"viewdataset\":\"" + this.viewDataset + "\",";
-    jsonQuery += "\"downloaddataset\":\"" + this.downloadDataset + "\",";
-    jsonQuery += "\"sessionId\":\"" + this.sessionID + "\",";
-    jsonQuery += "\"type\":\"" + this.type + "\"";
-    jsonQuery += "},";
-    return jsonQuery;
-  }
-
-  /**
-   * parseFromTextLine: Convert string to click stream data
-   *
-   * @param logline http log line
-   * @return {@link ClickStream}
-   */
-  public static ClickStream parseFromTextLine(String logline) {
-    JSONObject jsonData = null;
-    ClickStream data = null;
-    try {
-      jsonData = new JSONObject(logline);
-      data = new ClickStream();
-      data.setKeyWords(jsonData.getString("query"));
-      data.setViewDataset(jsonData.getString("viewdataset"));
-      data.setDownloadDataset(jsonData.getString("downloaddataset"));
-
-    } catch (JSONException e) {
-      e.printStackTrace();
-    }
-
-    return data;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/Coordinates.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/Coordinates.java b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/Coordinates.java
deleted file mode 100644
index f416eb4..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/Coordinates.java
+++ /dev/null
@@ -1,21 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.weblog.structure;
-
-public class Coordinates {
-  /*
-   * public String lat; public String lon;
-   */
-  public String latlon;
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/FtpLog.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/FtpLog.java b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/FtpLog.java
deleted file mode 100644
index 5ddc717..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/FtpLog.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.weblog.structure;
-
-import com.google.gson.Gson;
-import gov.nasa.jpl.mudrod.weblog.pre.ImportLogFile;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.Serializable;
-import java.text.SimpleDateFormat;
-import java.util.Date;
-
-/**
- * This class represents an FTP access log line.
- */
-public class FtpLog extends WebLog implements Serializable {
-
-  private static final Logger LOG = LoggerFactory.getLogger(ImportLogFile.class);
-
-  public static String parseFromLogLine(String log) {
-
-    try {
-      String ip = log.split(" +")[6];
-
-      String time = log.split(" +")[1] + ":" + log.split(" +")[2] + ":" + log.split(" +")[3] + ":" + log.split(" +")[4];
-
-      time = SwithtoNum(time);
-      SimpleDateFormat formatter = new SimpleDateFormat("MM:dd:HH:mm:ss:yyyy");
-      Date date = formatter.parse(time);
-
-      String bytes = log.split(" +")[7];
-
-      String request = log.split(" +")[8].toLowerCase();
-
-      if (!request.contains("/misc/") && !request.contains("readme")) {
-        FtpLog ftplog = new FtpLog();
-        ftplog.LogType = "ftp";
-        ftplog.IP = ip;
-        ftplog.Request = request;
-        ftplog.Bytes = Double.parseDouble(bytes);
-
-        SimpleDateFormat df = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.sss'Z'");
-        ftplog.Time = df.format(date);
-
-        return new Gson().toJson(ftplog);
-      }
-    } catch (Exception e) {
-      LOG.warn("Error parsing ftp log line [{}]. Skipping this line.", log, e);
-    }
-    return "{}";
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/GeoIp.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/GeoIp.java b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/GeoIp.java
deleted file mode 100644
index 778224b..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/GeoIp.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.weblog.structure;
-
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import com.google.gson.JsonParser;
-import gov.nasa.jpl.mudrod.utils.HttpRequest;
-
-/**
- * ClassName: GeoIp Function: convert IP to geo location
- */
-public class GeoIp {
-
-  /**
-   * toLocation: convert ip to location
-   *
-   * @param ip ip address
-   * @return coordinates
-   */
-  public Coordinates toLocation(String ip) {
-    String url = "http://getcitydetails.geobytes.com/GetCityDetails?fqcn=" + ip;
-    HttpRequest http = new HttpRequest();
-    String response = http.getRequest(url);
-    JsonParser parser = new JsonParser();
-    JsonElement jobSon = parser.parse(response);
-    JsonObject responseObject = jobSon.getAsJsonObject();
-
-    Coordinates co = new Coordinates();
-    String lon = responseObject.get("geobyteslongitude").toString().replace("\"", "");
-    String lat = responseObject.get("geobyteslatitude").toString().replace("\"", "");
-    co.latlon = lat + "," + lon;
-    return co;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/RankingTrainData.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/RankingTrainData.java b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/RankingTrainData.java
deleted file mode 100644
index 7ea17c0..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/RankingTrainData.java
+++ /dev/null
@@ -1,147 +0,0 @@
-package gov.nasa.jpl.mudrod.weblog.structure;
-
-import java.io.Serializable;
-import java.util.Map;
-
-/**
- * ClassName: train data extracted from web logs for training ranking weightss.
- */
-public class RankingTrainData implements Serializable {
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-  // sessionID: session ID
-  private String sessionID;
-  // type: session type name
-  private String index;
-  // query: query words related to the click
-  private String query;
-  // datasetA
-  private String highRankDataset;
-  // datasetB
-  private String lowRankDataset;
-
-  private Map<String, String> filter;
-
-  /**
-   * Creates a new instance of ClickStream.
-   *
-   * @param query           the user query string
-   * @param highRankDataset the dataset name for the highest ranked dataset
-   * @param lowRankDataset  the dataset name for the lowest ranked dataset
-   */
-  public RankingTrainData(String query, String highRankDataset, String lowRankDataset) {
-    this.query = query;
-    this.highRankDataset = highRankDataset;
-    this.lowRankDataset = lowRankDataset;
-  }
-
-  public RankingTrainData() {
-    //default constructor
-  }
-
-  public String getSessionID() {
-    return sessionID;
-  }
-
-  /**
-   * setKeyWords: Set the query user searched.
-   *
-   * @param query search words
-   */
-  public void setQuery(String query) {
-    this.query = query;
-  }
-
-  /**
-   * getKeyWords: Get the query user searched
-   *
-   * @return data set name
-   */
-  public String getQuery() {
-    return this.query;
-  }
-
-  /**
-   * setViewDataset:Set the data set name user viewed
-   *
-   * @param dataset short name of data set
-   */
-  public void setHighRankDataset(String dataset) {
-    this.highRankDataset = dataset;
-  }
-
-  /**
-   * setDownloadDataset: Set the data set name user downloaded
-   *
-   * @param dataset short name of data set
-   */
-  public void setLowRankDataset(String dataset) {
-    this.lowRankDataset = dataset;
-  }
-
-  /**
-   * getViewDataset: Get the data set user viewed
-   *
-   * @return data set name
-   */
-  public String getLowRankDataset() {
-    return this.lowRankDataset;
-  }
-
-  /**
-   * setSessionId: Set ID of session
-   *
-   * @param sessionID session id
-   */
-  public void setSessionId(String sessionID) {
-    this.sessionID = sessionID;
-  }
-
-  /**
-   * setType: Set session type name
-   *
-   * @param index session type name in elasticsearch
-   */
-  public void setIndex(String index) {
-    this.index = index;
-  }
-
-  public void setFilter(Map<String, String> filter) {
-    this.filter = filter;
-  }
-
-  /**
-   * Output click stream info in string format
-   *
-   * @see java.lang.Object#toString()
-   */
-  @Override
-  public String toString() {
-    return "query:" + query + "|| highRankDataset:" + highRankDataset + "|| lowRankDataset:" + lowRankDataset;
-  }
-
-  /**
-   * toJson: Output click stream info in Json format
-   *
-   * @return session in string format
-   */
-  public String toJson() {
-    String jsonQuery = "{";
-    jsonQuery += "\"query\":\"" + this.query + "\",";
-    jsonQuery += "\"highRankDataset\":\"" + this.highRankDataset + "\",";
-    jsonQuery += "\"lowRankDataset\":\"" + this.lowRankDataset + "\",";
-
-    if (this.filter != null) {
-      for (String key : filter.keySet()) {
-        jsonQuery += "\"" + key + "\":\"" + filter.get(key) + "\",";
-      }
-    }
-
-    jsonQuery += "\"sessionId\":\"" + this.sessionID + "\",";
-    jsonQuery += "\"index\":\"" + this.index + "\"";
-    jsonQuery += "},";
-    return jsonQuery;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/RequestUrl.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/RequestUrl.java b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/RequestUrl.java
deleted file mode 100644
index bbfb79c..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/RequestUrl.java
+++ /dev/null
@@ -1,294 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.weblog.structure;
-
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.UnsupportedEncodingException;
-import java.net.URLDecoder;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.List;
-import java.util.Map;
-
-/**
- * ClassName: RequestUrl Function: request url relate operations
- */
-public class RequestUrl {
-
-  private static final Logger LOG = LoggerFactory.getLogger(RequestUrl.class);
-
-  /**
-   * Default Constructor
-   */
-  public RequestUrl() {
-    /* Default Constructor */
-  }
-
-  /**
-   * UrlPage: Get url page from url link
-   *
-   * @param strURL request url
-   * @return page name
-   */
-  public static String urlPage(String strURL) {
-    String strPage = null;
-    String[] arrSplit = null;
-
-    String newURL = strURL.trim().toLowerCase();
-
-    arrSplit = newURL.split("[?]");
-    if (newURL.length() > 0 && arrSplit.length > 1 && arrSplit[0] != null) {
-      strPage = arrSplit[0];
-    }
-
-    return strPage;
-  }
-
-  /**
-   * TruncateUrlPage: Get url params from url link
-   *
-   * @param strURL
-   * @return url params
-   */
-  private static String truncateUrlPage(String strURL) {
-    String strAllParam = null;
-    String[] arrSplit = null;
-
-    strURL = strURL.trim().toLowerCase(); // keep this in mind
-
-    arrSplit = strURL.split("[?]");
-    if (strURL.length() > 1) {
-      if (arrSplit.length > 1) {
-        if (arrSplit[1] != null) {
-          strAllParam = arrSplit[1];
-        }
-      }
-    }
-
-    return strAllParam;
-  }
-
-  /**
-   * URLRequest: Get url params from url link in a map format
-   *
-   * @param URL request url
-   * @return url params key value map
-   */
-  public static Map<String, String> uRLRequest(String URL) {
-    Map<String, String> mapRequest = new HashMap<String, String>();
-
-    String[] arrSplit = null;
-
-    String strUrlParam = truncateUrlPage(URL);
-    if (strUrlParam == null) {
-      return mapRequest;
-    }
-
-    arrSplit = strUrlParam.split("[&]");
-    for (String strSplit : arrSplit) {
-      String[] arrSplitEqual = null;
-      arrSplitEqual = strSplit.split("[=]");
-
-      if (arrSplitEqual.length > 1) {
-
-        mapRequest.put(arrSplitEqual[0], arrSplitEqual[1]);
-
-      } else {
-        if (arrSplitEqual[0] != "") {
-
-          mapRequest.put(arrSplitEqual[0], "");
-        }
-      }
-    }
-    return mapRequest;
-  }
-
-  /**
-   * GetSearchInfo: Get search information from url link
-   *
-   * @param URL request url
-   * @return search params
-   * @throws UnsupportedEncodingException UnsupportedEncodingException
-   */
-  public String getSearchInfo(String URL) throws UnsupportedEncodingException {
-    List<String> info = new ArrayList<String>();
-    String keyword = "";
-    Map<String, String> mapRequest = RequestUrl.uRLRequest(URL);
-    if (mapRequest.get("search") != null) {
-      try {
-        keyword = mapRequest.get("search");
-
-        keyword = URLDecoder.decode(keyword.replaceAll("%(?![0-9a-fA-F]{2})", "%25"), "UTF-8");
-        if (keyword.contains("%2b") || keyword.contains("%20") || keyword.contains("%25")) {
-          keyword = keyword.replace("%2b", " ");
-          keyword = keyword.replace("%20", " ");
-          keyword = keyword.replace("%25", " ");
-        }
-
-        keyword = keyword.replaceAll("[-+^:,*_\"]", " ").replace("\\", " ").replaceAll("\\s+", " ").trim();
-
-      } catch (UnsupportedEncodingException e) {
-        LOG.error(mapRequest.get("search"));
-        e.printStackTrace();
-      }
-      if (!"".equals(keyword)) {
-        info.add(keyword);
-      }
-
-    }
-
-    if (mapRequest.get("ids") != null && mapRequest.get("values") != null) {
-      String id_raw = URLDecoder.decode(mapRequest.get("ids"), "UTF-8");
-      String value_raw = URLDecoder.decode(mapRequest.get("values"), "UTF-8");
-      String[] ids = id_raw.split(":");
-      String[] values = value_raw.split(":");
-
-      int a = ids.length;
-      int b = values.length;
-      int l = a < b ? a : b;
-
-      for (int i = 0; i < l; i++) {
-        if (ids[i].equals("collections") || ids[i].equals("measurement") || ids[i].equals("sensor") || ids[i].equals("platform") || ids[i].equals("variable") || ids[i].equals("spatialcoverage")) {
-          try {
-            values[i] = values[i].replaceAll("%(?![0-9a-fA-F]{2})", "%25");
-            if (!URLDecoder.decode(values[i], "UTF-8").equals(keyword) && !URLDecoder.decode(values[i], "UTF-8").equals("")) {
-              String item = URLDecoder.decode(values[i], "UTF-8").trim();
-              if (item.contains("%2b") || item.contains("%20") || item.contains("%25")) {
-                item = item.replace("%2b", " ");
-                item = item.replace("%20", " ");
-                item = item.replace("%25", " ");
-              }
-              item = item.replaceAll("[-+^:,*_\"]", " ").replace("\\", " ").replaceAll("\\s+", " ").trim();
-              info.add(item);
-            }
-          } catch (Exception e) {
-            LOG.error(values[i]);
-            e.printStackTrace();
-          }
-        }
-
-      }
-    }
-
-    return String.join(",", info);
-  }
-
-  /**
-   * GetSearchWord: Get search words from url link
-   *
-   * @param url request url
-   * @return query
-   */
-  public static String getSearchWord(String url) {
-    String keyword = "";
-
-    Map<String, String> mapRequest = RequestUrl.uRLRequest(url);
-    if (mapRequest.get("search") != null) {
-      try {
-        keyword = mapRequest.get("search");
-
-        keyword = URLDecoder.decode(keyword.replaceAll("%(?![0-9a-fA-F]{2})", "%25"), "UTF-8");
-        if (keyword.contains("%2b") || keyword.contains("%20") || keyword.contains("%25")) {
-          keyword = keyword.replace("%2b", " ");
-          keyword = keyword.replace("%20", " ");
-          keyword = keyword.replace("%25", " ");
-        }
-        keyword = keyword.replaceAll("[-+^:,*_\"]", " ").replace("\\", " ").replaceAll("\\s+", " ").trim();
-      } catch (UnsupportedEncodingException e) {
-        LOG.error(mapRequest.get("search"));
-        e.printStackTrace();
-      }
-    }
-
-    return keyword;
-  }
-
-  /**
-   * GetFilterInfo: Get filter params from url link
-   *
-   * @param url request url
-   * @return filter facet key pair map
-   * @throws UnsupportedEncodingException UnsupportedEncodingException
-   */
-  public static Map<String, String> getFilterInfo(String url) throws UnsupportedEncodingException {
-    List<String> info = new ArrayList<>();
-    Map<String, String> filterValues = new HashMap<>();
-
-    String keyword = "";
-    Map<String, String> mapRequest = RequestUrl.uRLRequest(url);
-    if (mapRequest.get("search") != null) {
-      try {
-        keyword = mapRequest.get("search");
-
-        keyword = URLDecoder.decode(keyword.replaceAll("%(?![0-9a-fA-F]{2})", "%25"), "UTF-8");
-        if (keyword.contains("%2b") || keyword.contains("%20") || keyword.contains("%25")) {
-          keyword = keyword.replace("%2b", " ");
-          keyword = keyword.replace("%20", " ");
-          keyword = keyword.replace("%25", " ");
-        }
-        keyword = keyword.replaceAll("[-+^:,*_\"]", " ").replace("\\", " ").replaceAll("\\s+", " ").trim();
-
-      } catch (UnsupportedEncodingException e) {
-        LOG.error(mapRequest.get("search"));
-        e.printStackTrace();
-      }
-      if (!"".equals(keyword)) {
-        info.add(keyword);
-      }
-
-    }
-
-    if (mapRequest.get("ids") != null && mapRequest.get("values") != null) {
-      String idRaw = URLDecoder.decode(mapRequest.get("ids"), "UTF-8");
-      String valueRaw = URLDecoder.decode(mapRequest.get("values"), "UTF-8");
-      String[] ids = idRaw.split(":");
-      String[] values = valueRaw.split(":");
-
-      int a = ids.length;
-      int b = values.length;
-      int l = a < b ? a : b;
-
-      for (int i = 0; i < l; i++) {
-        try {
-          values[i] = values[i].replaceAll("%(?![0-9a-fA-F]{2})", "%25");
-          if (!URLDecoder.decode(values[i], "UTF-8").equals(keyword) && !URLDecoder.decode(values[i], "UTF-8").equals("")) {
-            String item = URLDecoder.decode(values[i], "UTF-8").trim();
-            if (item.contains("%2b") || item.contains("%20") || item.contains("%25")) {
-              item = item.replace("%2b", " ");
-              item = item.replace("%20", " ");
-              item = item.replace("%25", " ");
-            }
-            item = item.replaceAll("[-+^:,*_\"]", " ").replace("\\", " ").replaceAll("\\s+", " ").trim();
-            filterValues.put(ids[i], item);
-          }
-        } catch (Exception e) {
-          LOG.error(values[i]);
-          e.printStackTrace();
-        }
-      }
-    }
-
-    if (mapRequest.get("temporalsearch") != null) {
-      String temporalsearch = mapRequest.get("temporalsearch");
-      temporalsearch = URLDecoder.decode(temporalsearch.replaceAll("%(?![0-9a-fA-F]{2})", "%25"), "UTF-8");
-
-      filterValues.put("temporalsearch", temporalsearch);
-    }
-
-    return filterValues;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/Session.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/Session.java b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/Session.java
deleted file mode 100644
index 93f4288..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/Session.java
+++ /dev/null
@@ -1,287 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.weblog.structure;
-
-import com.google.gson.Gson;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.elasticsearch.search.sort.SortOrder;
-import org.joda.time.Seconds;
-import org.joda.time.format.DateTimeFormatter;
-import org.joda.time.format.ISODateTimeFormat;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.UnsupportedEncodingException;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-/**
- * ClassName: Session Function: Session operations.
- */
-public class Session /*extends MudrodAbstract*/ implements Comparable<Session> {
-  private static final Logger LOG = LoggerFactory.getLogger(Session.class);
-  // start: start time of session
-  private String start;
-  // end: end time of session
-  private String end;
-  // id: original session ID
-  private String id;
-  // newid: new session ID
-  private String newid = null;
-  // fmt: time formatter
-  private DateTimeFormatter fmt = ISODateTimeFormat.dateTime();
-
-  private ESDriver es;
-  private Properties props;
-
-  /**
-   * Creates a new instance of Session.
-   *
-   * @param props the Mudrod configuration
-   * @param es    the Elasticsearch drive
-   * @param start start time of session
-   * @param end   end time of session
-   * @param id    session ID
-   */
-  public Session(Properties props, ESDriver es, String start, String end, String id) {
-    this.start = start;
-    this.end = end;
-    this.id = id;
-
-    this.props = props;
-    this.es = es;
-  }
-
-  /**
-   * Creates a new instance of Session.
-   *
-   * @param props the Mudrod configuration
-   * @param es    the Elasticsearch drive
-   */
-  public Session(Properties props, ESDriver es) {
-    this.props = props;
-    this.es = es;
-  }
-
-  /**
-   * getID: Get original session ID
-   *
-   * @return session id
-   */
-  public String getID() {
-    return id;
-  }
-
-  /**
-   * getNewID: Get new session ID
-   *
-   * @return new session id
-   */
-  public String getNewID() {
-    return newid;
-  }
-
-  /**
-   * setNewID: Set new session ID
-   *
-   * @param str: session ID
-   * @return new session id
-   */
-  public String setNewID(String str) {
-    return newid = str;
-  }
-
-  /**
-   * getStartTime:Get start time of current session
-   *
-   * @return start time of session
-   */
-  public String getStartTime() {
-    return start;
-  }
-
-  /**
-   * getEndTime:Get end time of current session
-   *
-   * @return end time of session
-   */
-  public String getEndTime() {
-    return end;
-  }
-
-  /**
-   * Compare current session with another session
-   *
-   * @see java.lang.Comparable#compareTo(java.lang.Object)
-   */
-  @Override
-  public int compareTo(Session o) {
-    fmt.parseDateTime(this.end);
-    fmt.parseDateTime(o.end);
-    // ascending order
-    return Seconds.secondsBetween(fmt.parseDateTime(o.end), fmt.parseDateTime(this.end)).getSeconds();
-
-  }
-
-  /**
-   * getSessionDetail:Get detail of current session, which is used for session
-   * tree reconstruct
-   *
-   * @param indexName    name of index from which you wish to obtain session detail.
-   * @param type: Session type name in Elasticsearch
-   * @param sessionID:   Session ID
-   * @return Session details in Json format
-   */
-  public JsonObject getSessionDetail(String indexName, String type, String sessionID) {
-    JsonObject sessionResults = new JsonObject();
-    // for session tree
-    SessionTree tree = null;
-    JsonElement jsonRequest = null;
-    try {
-      tree = this.getSessionTree(indexName, type, sessionID);
-      JsonObject jsonTree = tree.treeToJson(tree.root);
-      sessionResults.add("treeData", jsonTree);
-
-      jsonRequest = this.getRequests(type, sessionID);
-      sessionResults.add("RequestList", jsonRequest);
-    } catch (UnsupportedEncodingException e) {
-      LOG.error("Encoding error detected.", e);
-
-    }
-
-    return sessionResults;
-  }
-
-  /**
-   * getClickStreamList: Extracted click stream list from current session.
-   *
-   * @param indexName    an index from which to query for a session list
-   * @param type: Session type name in Elasticsearch
-   * @param sessionID:   Session ID
-   * @return Click stram data list
-   * {@link ClickStream}
-   */
-  public List<ClickStream> getClickStreamList(String indexName, String type, String sessionID) {
-    SessionTree tree = null;
-    try {
-      tree = this.getSessionTree(indexName, type, sessionID);
-    } catch (UnsupportedEncodingException e) {
-      LOG.error("Erro whilst obtaining the Session Tree: {}", e);
-    }
-
-    List<ClickStream> clickthroughs = tree.getClickStreamList();
-    return clickthroughs;
-  }
-
-  /**
-   * Method of converting a given session to a tree structure
-   *
-   * @param type session type name in Elasticsearch
-   * @param sessionID   ID of session
-   * @return an instance of session tree structure
-   * @throws UnsupportedEncodingException UnsupportedEncodingException
-   */
-  private SessionTree getSessionTree(String indexName, String type, String sessionID) throws UnsupportedEncodingException {
-
-    SearchResponse response = es.getClient().prepareSearch(indexName).setTypes(type).setQuery(QueryBuilders.termQuery("SessionID", sessionID)).setSize(100).addSort("Time", SortOrder.ASC)
-        .execute().actionGet();
-
-    SessionTree tree = new SessionTree(this.props, this.es, sessionID, type);
-    int seq = 1;
-    for (SearchHit hit : response.getHits().getHits()) {
-      Map<String, Object> result = hit.getSource();
-      String request = (String) result.get("Request");
-      String time = (String) result.get("Time");
-      String logType = (String) result.get("LogType");
-      String referer = (String) result.get("Referer");
-
-      SessionNode node = new SessionNode(request, logType, referer, time, seq);
-      tree.insert(node);
-      seq++;
-    }
-
-    return tree;
-  }
-
-  /**
-   * Method of getting all requests from a given current session
-   *
-   * @param cleanuptype Session type name in Elasticsearch
-   * @param sessionID   Session ID
-   * @return all of these requests in JSON
-   * @throws UnsupportedEncodingException UnsupportedEncodingException
-   */
-  private JsonElement getRequests(String cleanuptype, String sessionID) throws UnsupportedEncodingException {
-    SearchResponse response = es.getClient().prepareSearch(props.getProperty("indexName")).setTypes(cleanuptype).setQuery(QueryBuilders.termQuery("SessionID", sessionID)).setSize(100)
-        .addSort("Time", SortOrder.ASC).execute().actionGet();
-
-    Gson gson = new Gson();
-    List<JsonObject> requestList = new ArrayList<>();
-    int seq = 1;
-    for (SearchHit hit : response.getHits().getHits()) {
-      Map<String, Object> result = hit.getSource();
-      String request = (String) result.get("Request");
-      String requestUrl = (String) result.get("RequestUrl");
-      String time = (String) result.get("Time");
-      String logType = (String) result.get("LogType");
-      String referer = (String) result.get("Referer");
-
-      JsonObject req = new JsonObject();
-      req.addProperty("Time", time);
-      req.addProperty("Request", request);
-      req.addProperty("RequestURL", requestUrl);
-      req.addProperty("LogType", logType);
-      req.addProperty("Referer", referer);
-      req.addProperty("Seq", seq);
-      requestList.add(req);
-
-      seq++;
-    }
-    return gson.toJsonTree(requestList);
-  }
-
-  /**
-   * getClickStreamList: Extracted ranking training data from current session.
-   *
-   * @param indexName    an index from which to obtain ranked training data.
-   * @param cleanuptype: Session type name in Elasticsearch
-   * @param sessionID:   Session ID
-   * @return Click stram data list
-   * {@link ClickStream}
-   */
-  public List<RankingTrainData> getRankingTrainData(String indexName, String cleanuptype, String sessionID) {
-    SessionTree tree = null;
-    try {
-      tree = this.getSessionTree(indexName, cleanuptype, sessionID);
-    } catch (UnsupportedEncodingException e) {
-      LOG.error("Error whilst retreiving Session Tree: {}", e);
-    }
-
-    List<RankingTrainData> trainData = new ArrayList<>();
-    try {
-      trainData = tree.getRankingTrainData(indexName, sessionID);
-    } catch (UnsupportedEncodingException e) {
-      LOG.error("Error whilst retreiving ranking training data: {}", e);
-    }
-
-    return trainData;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionExtractor.java b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionExtractor.java
deleted file mode 100644
index edba32e..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionExtractor.java
+++ /dev/null
@@ -1,532 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.weblog.structure;
-
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.main.MudrodConstants;
-
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.Optional;
-import org.apache.spark.api.java.function.FlatMapFunction;
-import org.apache.spark.api.java.function.Function;
-import org.apache.spark.api.java.function.Function2;
-import org.apache.spark.api.java.function.PairFunction;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import scala.Tuple2;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-import java.util.Properties;
-
-/**
- * ClassName: SessionExtractor Function: Extract sessions details from
- * reconstructed sessions.
- */
-public class SessionExtractor implements Serializable {
-
-  private static final Logger LOG = LoggerFactory.getLogger(SessionExtractor.class);
-
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-
-  public SessionExtractor() {
-    // default constructor
-  }
-
-  /**
-   * extractClickStreamFromES:Extract click streams from logs stored in
-   * Elasticsearch
-   *
-   * @param props
-   *          the Mudrod configuration
-   * @param es
-   *          the Elasticsearch drive
-   * @param spark
-   *          the spark driver
-   * @return clickstream list in JavaRDD format {@link ClickStream}
-   */
-  public JavaRDD<ClickStream> extractClickStreamFromES(Properties props, ESDriver es, SparkDriver spark) {
-    switch (props.getProperty(MudrodConstants.PROCESS_TYPE)) {
-      case "sequential":
-        List<ClickStream> queryList = this.getClickStreamList(props, es);
-        return spark.sc.parallelize(queryList);
-      case "parallel":
-        return getClickStreamListInParallel(props, spark, es);
-      default:
-      LOG.error("Error finding processing type for '{}'. Please check your config.xml.", props.getProperty(MudrodConstants.PROCESS_TYPE));
-    }
-    return null;
-  }
-
-  /**
-   * getClickStreamList:Extract click streams from logs stored in Elasticsearch.
-   *
-   * @param props
-   *          the Mudrod configuration
-   * @param es
-   *          the Elasticsearch driver
-   * @return clickstream list {@link ClickStream}
-   */
-  protected List<ClickStream> getClickStreamList(Properties props, ESDriver es) {
-    List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
-
-    List<ClickStream> result = new ArrayList<>();
-    for (int n = 0; n < logIndexList.size(); n++) {
-      String logIndex = logIndexList.get(n);
-      List<String> sessionIdList;
-      try {
-        sessionIdList = this.getSessions(props, es, logIndex);
-        Session session = new Session(props, es);
-        int sessionNum = sessionIdList.size();
-        for (int i = 0; i < sessionNum; i++) {
-          String[] sArr = sessionIdList.get(i).split(",");
-          List<ClickStream> datas = session.getClickStreamList(sArr[1], sArr[2], sArr[0]);
-          result.addAll(datas);
-        }
-      } catch (Exception e) {
-        LOG.error("Error during extraction of Clickstreams from log index. {}", e);
-      }
-    }
-
-    return result;
-  }
-
-  protected JavaRDD<ClickStream> getClickStreamListInParallel(Properties props, SparkDriver spark, ESDriver es) {
-
-    List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
-
-    LOG.info("Retrieved {}", logIndexList.toString());
-
-    List<String> sessionIdList = new ArrayList<>();
-    for (int n = 0; n < logIndexList.size(); n++) {
-      String logIndex = logIndexList.get(n);
-      List<String> tmpsessionList = this.getSessions(props, es, logIndex);
-      sessionIdList.addAll(tmpsessionList);
-    }
-
-    JavaRDD<String> sessionRDD = spark.sc.parallelize(sessionIdList, 16);
-
-    JavaRDD<ClickStream> clickStreamRDD = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, ClickStream>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Iterator<ClickStream> call(Iterator<String> arg0) throws Exception {
-        ESDriver tmpES = new ESDriver(props);
-        tmpES.createBulkProcessor();
-
-        Session session = new Session(props, tmpES);
-        List<ClickStream> clickstreams = new ArrayList<>();
-        while (arg0.hasNext()) {
-          String s = arg0.next();
-          String[] sArr = s.split(",");
-          List<ClickStream> clicks = session.getClickStreamList(sArr[1], sArr[2], sArr[0]);
-          clickstreams.addAll(clicks);
-        }
-        tmpES.destroyBulkProcessor();
-        tmpES.close();
-        return clickstreams.iterator();
-      }
-    });
-
-    LOG.info("Clickstream number: {}", clickStreamRDD.count());
-
-    return clickStreamRDD;
-  }
-
-  // This function is reserved and not being used for now
-
-  /**
-   * loadClickStremFromTxt:Load click stream form txt file
-   *
-   * @param clickthroughFile
-   *          txt file
-   * @param sc
-   *          the spark context
-   * @return clickstream list in JavaRDD format {@link ClickStream}
-   */
-  public JavaRDD<ClickStream> loadClickStremFromTxt(String clickthroughFile, JavaSparkContext sc) {
-    return sc.textFile(clickthroughFile).flatMap(new FlatMapFunction<String, ClickStream>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @SuppressWarnings("unchecked")
-      @Override
-      public Iterator<ClickStream> call(String line) throws Exception {
-        List<ClickStream> clickthroughs = (List<ClickStream>) ClickStream.parseFromTextLine(line);
-        return (Iterator<ClickStream>) clickthroughs;
-      }
-    });
-  }
-
-  /**
-   * bulidDataQueryRDD: convert click stream list to data set queries pairs.
-   *
-   * @param clickstreamRDD:
-   *          click stream data
-   * @param downloadWeight:
-   *          weight of download behavior
-   * @return JavaPairRDD, key is short name of data set, and values are queries
-   */
-  public JavaPairRDD<String, List<String>> bulidDataQueryRDD(JavaRDD<ClickStream> clickstreamRDD, int downloadWeight) {
-    return clickstreamRDD.mapToPair(new PairFunction<ClickStream, String, List<String>>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, List<String>> call(ClickStream click) throws Exception {
-        List<String> query = new ArrayList<>();
-        // important! download behavior is given higher weights
-        // than viewing
-        // behavior
-        boolean download = click.isDownload();
-        int weight = 1;
-        if (download) {
-          weight = downloadWeight;
-        }
-        for (int i = 0; i < weight; i++) {
-          query.add(click.getKeyWords());
-        }
-
-        return new Tuple2<>(click.getViewDataset(), query);
-      }
-    }).reduceByKey(new Function2<List<String>, List<String>, List<String>>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public List<String> call(List<String> v1, List<String> v2) throws Exception {
-        List<String> list = new ArrayList<>();
-        list.addAll(v1);
-        list.addAll(v2);
-        return list;
-      }
-    });
-  }
-
-  /**
-   * getSessions: Get sessions from logs
-   *
-   * @param props
-   *          the Mudrod configuration
-   * @param es
-   *          the Elasticsearch driver
-   * @param logIndex
-   *          a log index name
-   * @return list of session names
-   */
-  protected List<String> getSessions(Properties props, ESDriver es, String logIndex) {
-
-    String cleanupPrefix = props.getProperty(MudrodConstants.CLEANUP_TYPE_PREFIX);
-    String sessionStatPrefix = props.getProperty(MudrodConstants.SESSION_STATS_PREFIX);
-
-    List<String> sessions = new ArrayList<>();
-    SearchResponse scrollResp = es.getClient().prepareSearch(logIndex).setTypes(sessionStatPrefix).setScroll(new TimeValue(60000)).setQuery(QueryBuilders.matchAllQuery()).setSize(100).execute()
-            .actionGet();
-    while (true) {
-      for (SearchHit hit : scrollResp.getHits().getHits()) {
-        Map<String, Object> session = hit.getSource();
-        String sessionID = (String) session.get("SessionID");
-        sessions.add(sessionID + "," + logIndex + "," + cleanupPrefix);
-      }
-
-      scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
-      if (scrollResp.getHits().getHits().length == 0) {
-        break;
-      }
-    }
-
-    return sessions;
-  }
-
-  public JavaPairRDD<String, Double> bulidUserItermRDD(JavaRDD<ClickStream> clickstreamRDD) {
-    return clickstreamRDD.mapToPair(new PairFunction<ClickStream, String, Double>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, Double> call(ClickStream click) throws Exception {
-        double rate = 1;
-        boolean download = click.isDownload();
-        if (download) {
-          rate = 2;
-        }
-
-        String sessionID = click.getSessionID();
-        String user = sessionID.split("@")[0];
-
-        return new Tuple2<>(user + "," + click.getViewDataset(), rate);
-      }
-    }).reduceByKey(new Function2<Double, Double, Double>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Double call(Double v1, Double v2) throws Exception {
-        return v1 >= v2 ? v1 : v2;
-
-      }
-    });
-  }
-
-  public JavaPairRDD<String, Double> bulidSessionItermRDD(JavaRDD<ClickStream> clickstreamRDD) {
-    JavaPairRDD<String, String> sessionItemRDD = clickstreamRDD.mapToPair(new PairFunction<ClickStream, String, String>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, String> call(ClickStream click) throws Exception {
-
-        String sessionID = click.getSessionID();
-        return new Tuple2<>(sessionID, click.getViewDataset());
-      }
-    }).distinct();
-
-    // remove some sessions
-    JavaPairRDD<String, Double> sessionItemNumRDD = sessionItemRDD.keys().mapToPair(new PairFunction<String, String, Double>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, Double> call(String item) throws Exception {
-        return new Tuple2<>(item, 1.0);
-      }
-    }).reduceByKey(new Function2<Double, Double, Double>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Double call(Double v1, Double v2) throws Exception {
-        return v1 + v2;
-      }
-    }).filter(new Function<Tuple2<String, Double>, Boolean>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Boolean call(Tuple2<String, Double> arg0) throws Exception {
-        Boolean b = true;
-        if (arg0._2 < 2) {
-          b = false;
-        }
-        return b;
-      }
-    });
-
-    return sessionItemNumRDD.leftOuterJoin(sessionItemRDD).mapToPair(new PairFunction<Tuple2<String, Tuple2<Double, Optional<String>>>, String, Double>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, Double> call(Tuple2<String, Tuple2<Double, Optional<String>>> arg0) throws Exception {
-
-        Tuple2<Double, Optional<String>> test = arg0._2;
-        Optional<String> optStr = test._2;
-        String item = "";
-        if (optStr.isPresent()) {
-          item = optStr.get();
-        }
-        return new Tuple2<>(arg0._1 + "," + item, 1.0);
-      }
-
-    });
-  }
-
-  public JavaPairRDD<String, List<String>> bulidSessionDatasetRDD(Properties props, ESDriver es, SparkDriver spark) {
-
-    List<String> result = new ArrayList<>();
-    List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
-    for (int n = 0; n < logIndexList.size(); n++) {
-      String logIndex = logIndexList.get(n);
-      SearchResponse scrollResp = es.getClient().prepareSearch(logIndex).setTypes(props.getProperty(MudrodConstants.SESSION_STATS_PREFIX)).setScroll(new TimeValue(60000)).setQuery(QueryBuilders.matchAllQuery())
-              .setSize(100).execute().actionGet();
-      while (true) {
-        for (SearchHit hit : scrollResp.getHits().getHits()) {
-          Map<String, Object> session = hit.getSource();
-          String sessionID = (String) session.get("SessionID");
-          String views = (String) session.get("views");
-          if (views != null && !"".equals(views)) {
-            String sessionItems = sessionID + ":" + views;
-            result.add(sessionItems);
-          }
-        }
-
-        scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
-        if (scrollResp.getHits().getHits().length == 0) {
-          break;
-        }
-      }
-    }
-
-    JavaRDD<String> sessionRDD = spark.sc.parallelize(result);
-
-    return sessionRDD.mapToPair(new PairFunction<String, String, List<String>>() {
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, List<String>> call(String sessionitem) throws Exception {
-        String[] splits = sessionitem.split(":");
-        String sessionId = splits[0];
-        List<String> itemList = new ArrayList<>();
-
-        String items = splits[1];
-        String[] itemArr = items.split(",");
-        int size = itemArr.length;
-        for (int i = 0; i < size; i++) {
-          String item = itemArr[i];
-          if (!itemList.contains(item))
-            itemList.add(itemArr[i]);
-        }
-
-        return new Tuple2<>(sessionId, itemList);
-      }
-    });
-  }
-
-  /**
-   * extractClickStreamFromES:Extract click streams from logs stored in
-   * Elasticsearch
-   *
-   * @param props
-   *          the Mudrod configuration
-   * @param es
-   *          the Elasticsearch drive
-   * @param spark
-   *          the spark driver
-   * @return clickstream list in JavaRDD format {@link ClickStream}
-   */
-  public JavaRDD<RankingTrainData> extractRankingTrainData(Properties props, ESDriver es, SparkDriver spark) {
-
-    List<RankingTrainData> queryList = this.extractRankingTrainData(props, es);
-    return spark.sc.parallelize(queryList);
-
-  }
-
-  /**
-   * getClickStreamList:Extract click streams from logs stored in Elasticsearch.
-   *
-   * @param props
-   *          the Mudrod configuration
-   * @param es
-   *          the Elasticsearch driver
-   * @return clickstream list {@link ClickStream}
-   */
-  protected List<RankingTrainData> extractRankingTrainData(Properties props, ESDriver es) {
-    List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
-
-    LOG.info(logIndexList.toString());
-
-    List<RankingTrainData> result = new ArrayList<>();
-    for (int n = 0; n < logIndexList.size(); n++) {
-      String logIndex = logIndexList.get(n);
-      List<String> sessionIdList;
-      try {
-        sessionIdList = this.getSessions(props, es, logIndex);
-        Session session = new Session(props, es);
-        int sessionNum = sessionIdList.size();
-        for (int i = 0; i < sessionNum; i++) {
-          String[] sArr = sessionIdList.get(i).split(",");
-          List<RankingTrainData> datas = session.getRankingTrainData(sArr[1], sArr[2], sArr[0]);
-          result.addAll(datas);
-        }
-      } catch (Exception e) {
-        LOG.error("Error which extracting ranking train data: {}", e);
-      }
-    }
-
-    return result;
-  }
-
-  protected JavaRDD<RankingTrainData> extractRankingTrainDataInParallel(Properties props, SparkDriver spark, ESDriver es) {
-
-    List<String> logIndexList = es.getIndexListWithPrefix(props.getProperty(MudrodConstants.LOG_INDEX));
-
-    LOG.info(logIndexList.toString());
-
-    List<String> sessionIdList = new ArrayList<>();
-    for (int n = 0; n < logIndexList.size(); n++) {
-      String logIndex = logIndexList.get(n);
-      List<String> tmpsessionList = this.getSessions(props, es, logIndex);
-      sessionIdList.addAll(tmpsessionList);
-    }
-
-    JavaRDD<String> sessionRDD = spark.sc.parallelize(sessionIdList, 16);
-
-    JavaRDD<RankingTrainData> clickStreamRDD = sessionRDD.mapPartitions(new FlatMapFunction<Iterator<String>, RankingTrainData>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Iterator<RankingTrainData> call(Iterator<String> arg0) throws Exception {
-        ESDriver tmpES = new ESDriver(props);
-        tmpES.createBulkProcessor();
-
-        Session session = new Session(props, tmpES);
-        List<RankingTrainData> clickstreams = new ArrayList<>();
-        while (arg0.hasNext()) {
-          String s = arg0.next();
-          String[] sArr = s.split(",");
-          List<RankingTrainData> clicks = session.getRankingTrainData(sArr[1], sArr[2], sArr[0]);
-          clickstreams.addAll(clicks);
-        }
-        tmpES.destroyBulkProcessor();
-        tmpES.close();
-        return clickstreams.iterator();
-      }
-    });
-
-    LOG.info("Clickstream number: {}", clickStreamRDD.count());
-
-    return clickStreamRDD;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionNode.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionNode.java b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionNode.java
deleted file mode 100644
index 958e184..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionNode.java
+++ /dev/null
@@ -1,344 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.weblog.structure;
-
-import java.io.UnsupportedEncodingException;
-import java.net.URLDecoder;
-import java.util.*;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * ClassName: SessionNode Function: Functions related to a node in a session
- * tree sturcture.
- */
-public class SessionNode {
-  // id: Node ID
-  protected String id;
-  // value: Node value
-  protected String value;
-  // parent: Parent node of this node
-  protected SessionNode parent;
-  // children: Child nodes of this node
-  protected List<SessionNode> children = new ArrayList<>();
-  // time: request time of node
-  protected String time;
-  // request: request url of this node
-  protected String request;
-  // referer: previous request url of this node
-  protected String referer;
-  // seq: sequence of this node
-  protected int seq;
-  // key: type of this node extracted from url, including three types -
-  // dataset,datasetlist,ftp
-  protected String key;
-  // logType: log types of this node, including two types - po.dacc, ftp
-  protected String logType;
-  // search: query extracted from this node
-  protected String search;
-  // filter: filter facets extracted from this node
-  protected Map<String, String> filter;
-  // datasetId: viewed/downloaded data set ID
-  protected String datasetId;
-
-  public SessionNode() {
-
-  }
-
-  /**
-   * Creates a new instance of SessionNode.
-   *
-   * @param request: request url
-   * @param logType: including two types - po.dacc, ftp
-   * @param referer: previous request url
-   * @param time:    request time of node
-   * @param seq:     sequence of this node
-   */
-  public SessionNode(String request, String logType, String referer, String time, int seq) {
-    this.logType = logType;
-    this.time = time;
-    this.seq = seq;
-    this.setRequest(request);
-    this.setReferer(referer);
-    this.setKey(request, logType);
-  }
-
-  /**
-   * setReferer: Set previous request url of this node
-   *
-   * @param referer previous request url
-   */
-  public void setReferer(String referer) {
-    if (referer == null) {
-      this.referer = "";
-      return;
-    }
-    this.referer = referer.toLowerCase().replace("http://podaac.jpl.nasa.gov", "");
-  }
-
-  /**
-   * setRequest: Set request url of this node
-   *
-   * @param req request url
-   */
-  public void setRequest(String req) {
-    this.request = req;
-    if (this.logType.equals("PO.DAAC")) {
-      this.parseRequest(req);
-    }
-  }
-
-  /**
-   * getChildren:Get child nodes of this node
-   *
-   * @return child nodes
-   */
-  public List<SessionNode> getChildren() {
-    return this.children;
-  }
-
-  /**
-   * setChildren: Set child nodes of this node
-   *
-   * @param children child nodes of this node
-   */
-  public void setChildren(List<SessionNode> children) {
-    this.children = children;
-  }
-
-  /**
-   * addChildren: Add a children node
-   *
-   * @param node session node
-   */
-  public void addChildren(SessionNode node) {
-    this.children.add(node);
-  }
-
-  /**
-   * getId:Get node ID
-   *
-   * @return node ID of this node
-   */
-  public String getId() {
-    return this.id;
-  }
-
-  /**
-   * bSame:Compare this node with another node
-   *
-   * @param node {@link SessionNode}
-   * @return boolean value, true mean the two nodes are same
-   */
-  public Boolean bSame(SessionNode node) {
-    Boolean bsame = false;
-    if (this.request.equals(node.request)) {
-      bsame = true;
-    }
-    return bsame;
-  }
-
-  /**
-   * setKey:Set request type which contains three categories -
-   * dataset,datasetlist,ftp
-   *
-   * @param request request url
-   * @param logType url type
-   */
-  public void setKey(String request, String logType) {
-    this.key = "";
-    String datasetlist = "/datasetlist?";
-    String dataset = "/dataset/";
-    if (logType.equals("ftp")) {
-      this.key = "ftp";
-    } else if (logType.equals("root")) {
-      this.key = "root";
-    } else {
-      if (request.contains(datasetlist)) {
-        this.key = "datasetlist";
-      } else if (request.contains(dataset) /* || request.contains(granule) */) {
-        this.key = "dataset";
-      }
-    }
-  }
-
-  /**
-   * getKey:Get request type which contains three categories -
-   * dataset,datasetlist,ftp
-   *
-   * @return request url type of this node
-   */
-  public String getKey() {
-    return this.key;
-  }
-
-  /**
-   * getRequest:Get node request
-   *
-   * @return request url of this node
-   */
-  public String getRequest() {
-    return this.request;
-  }
-
-  /**
-   * getReferer:Get previous request url of this node
-   *
-   * @return previous request url of this node
-   */
-  public String getReferer() {
-    return this.referer;
-  }
-
-  /**
-   * getParent:Get parent node of this node
-   *
-   * @return parent node of this node
-   */
-  public SessionNode getParent() {
-    return this.parent;
-  }
-
-  /**
-   * setParent: Set parent node of this node
-   *
-   * @param parent the previous request node of this node
-   */
-  public void setParent(SessionNode parent) {
-    this.parent = parent;
-  }
-
-  /**
-   * getSearch:Get query of this node
-   *
-   * @return search query of this node
-   */
-  public String getSearch() {
-    return this.search;
-  }
-
-  /**
-   * getFilter:Get filter facets of this node
-   *
-   * @return filter values of this node
-   */
-  public Map<String, String> getFilter() {
-    return this.filter;
-  }
-
-  /**
-   * getDatasetId:Get data set ID of this node
-   *
-   * @return viewing/downloading data set of this node
-   */
-  public String getDatasetId() {
-    return this.datasetId;
-  }
-
-  /**
-   * getSeq:Get sequence of this node
-   *
-   * @return request sequence of this node
-   */
-  public int getSeq() {
-    return this.seq;
-  }
-
-  /**
-   * getFilterStr:Get filter facets of this node
-   *
-   * @return filters values of this node
-   */
-  public String getFilterStr() {
-    String filter = "";
-    if (this.filter.size() > 0) {
-      Iterator iter = this.filter.keySet().iterator();
-      while (iter.hasNext()) {
-        String key = (String) iter.next();
-        String val = this.filter.get(key);
-        filter += key + "=" + val + ",";
-      }
-
-      filter = filter.substring(0, filter.length() - 1);
-    }
-
-    return filter;
-  }
-
-  /**
-   * parseRequest:Parse request to extract request type
-   *
-   * @param request request url of this node
-   */
-  public void parseRequest(String request) {
-    Pattern pattern = Pattern.compile("get (.*?) http/*");
-    Matcher matcher = pattern.matcher(request.trim().toLowerCase());
-    while (matcher.find()) {
-      request = matcher.group(1);
-    }
-    if (request.contains("/dataset/")) {
-      this.parseDatasetId(request);
-    }
-
-    this.request = request.toLowerCase();
-  }
-
-  /**
-   * parseFilterParams:Parse filter facets information
-   *
-   * @param params filter key value pairs of this node
-   */
-  private void parseFilterParams(Map<String, String> params) {
-    this.filter = new HashMap<String, String>();
-    if (params.containsKey("ids")) {
-      String idsStr = params.get("ids");
-      if (!idsStr.equals("")) {
-        idsStr = URLDecoder.decode(idsStr);
-        String[] ids = idsStr.split(":");
-        String valueStr = params.get("values");
-        if (valueStr != null) {
-          valueStr = URLDecoder.decode(valueStr);
-          String[] values = valueStr.split(":");
-          int size = ids.length;
-          for (int i = 0; i < size; i++) {
-            this.filter.put(ids[i], values[i]);
-          }
-        }
-      }
-    }
-
-    if (!this.search.equals("")) {
-      this.filter.put("search", this.search);
-    }
-  }
-
-  /**
-   * parseDatasetId:Parse Request to extract data set ID
-   *
-   * @param request request url
-   */
-  public void parseDatasetId(String request) {
-    try {
-      request = URLDecoder.decode(request, "UTF-8");
-    } catch (UnsupportedEncodingException e) {
-      e.printStackTrace();
-    }
-    String[] twoparts = request.split("[?]");
-    String[] parts = twoparts[0].split("/");
-    if (parts.length <= 2) {
-      return;
-    }
-    this.datasetId = parts[2];
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionTree.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionTree.java b/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionTree.java
deleted file mode 100644
index 46c8d0c..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/weblog/structure/SessionTree.java
+++ /dev/null
@@ -1,521 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.weblog.structure;
-
-import com.google.gson.Gson;
-import com.google.gson.JsonElement;
-import com.google.gson.JsonObject;
-import gov.nasa.jpl.mudrod.discoveryengine.MudrodAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.UnsupportedEncodingException;
-import java.util.*;
-import java.util.concurrent.ExecutionException;
-
-/**
- * ClassName: SessionTree Function: Convert request list in a session to a tree
- */
-public class SessionTree extends MudrodAbstract {
-
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-  private static final Logger LOG = LoggerFactory.getLogger(SessionTree.class);
-  // size: node numbers in the session tree
-  public int size = 0;
-  // root: root node of session tree
-  protected SessionNode root = null;
-  // binsert: indicates inserting a node or not
-  public boolean binsert = false;
-  // tmpnode: tempt node
-  public SessionNode tmpnode;
-  // latestDatasetnode: the latest inserted node whose key is "dataset"
-  public SessionNode latestDatasetnode;
-  // sessionID: session ID
-  private String sessionID;
-  // cleanupType: session type in Elasticsearch
-  private String cleanupType;
-
-  /**
-   * Creates a new instance of SessionTree.
-   *
-   * @param props:       the Mudrod configuration
-   * @param es:          the Elasticsearch drive
-   * @param rootData:    root node of the tree
-   * @param sessionID:   session ID
-   * @param cleanupType: session type
-   */
-  public SessionTree(Properties props, ESDriver es, SessionNode rootData, String sessionID, String cleanupType) {
-    super(props, es, null);
-    root = new SessionNode("root", "root", "", "", 0);
-    tmpnode = root;
-    this.sessionID = sessionID;
-    this.cleanupType = cleanupType;
-  }
-
-  /**
-   * Creates a new instance of SessionTree.
-   *
-   * @param props:       the Mudrod configuration
-   * @param es:          the Elasticsearch drive
-   * @param sessionID:   session ID
-   * @param cleanupType: session type
-   */
-  public SessionTree(Properties props, ESDriver es, String sessionID, String cleanupType) {
-    super(props, es, null);
-    root = new SessionNode("root", "root", "", "", 0);
-    root.setParent(root);
-    tmpnode = root;
-    this.sessionID = sessionID;
-    this.cleanupType = cleanupType;
-  }
-
-  /**
-   * insert: insert a node into the session tree.
-   *
-   * @param node {@link SessionNode}
-   * @return session node
-   */
-  public SessionNode insert(SessionNode node) {
-    // begin with datasetlist
-    if (node.getKey().equals("datasetlist")) {
-      this.binsert = true;
-    }
-    if (!this.binsert) {
-      return null;
-    }
-    // remove unrelated node
-    if (!node.getKey().equals("datasetlist") && !node.getKey().equals("dataset") && !node.getKey().equals("ftp")) {
-      return null;
-    }
-    // remove dumplicated click
-    if (node.getRequest().equals(tmpnode.getRequest())) {
-      return null;
-    }
-    // search insert node
-    SessionNode parentnode = this.searchParentNode(node);
-    if (parentnode == null) {
-      return null;
-    }
-    node.setParent(parentnode);
-    parentnode.addChildren(node);
-
-    // record insert node
-    tmpnode = node;
-    if ("dataset".equals(node.getKey())) {
-      latestDatasetnode = node;
-    }
-
-    size++;
-    return node;
-  }
-
-  /**
-   * printTree: Print session tree
-   *
-   * @param node root node of the session tree
-   */
-  public void printTree(SessionNode node) {
-    LOG.info("node: {} \n", node.getRequest());
-    if (node.children.isEmpty()) {
-      for (int i = 0; i < node.children.size(); i++) {
-        printTree(node.children.get(i));
-      }
-    }
-  }
-
-  /**
-   * TreeToJson: Convert the session tree to Json object
-   *
-   * @param node node of the session tree
-   * @return tree content in Json format
-   */
-  public JsonObject treeToJson(SessionNode node) {
-    Gson gson = new Gson();
-    JsonObject json = new JsonObject();
-
-    json.addProperty("seq", node.getSeq());
-    if ("datasetlist".equals(node.getKey())) {
-      json.addProperty("icon", "./resources/images/searching.png");
-      json.addProperty("name", node.getRequest());
-    } else if ("dataset".equals(node.getKey())) {
-      json.addProperty("icon", "./resources/images/viewing.png");
-      json.addProperty("name", node.getDatasetId());
-    } else if ("ftp".equals(node.getKey())) {
-      json.addProperty("icon", "./resources/images/downloading.png");
-      json.addProperty("name", node.getRequest());
-    } else if ("root".equals(node.getKey())) {
-      json.addProperty("name", "");
-      json.addProperty("icon", "./resources/images/users.png");
-    }
-
-    if (!node.children.isEmpty()) {
-      List<JsonObject> jsonChildren = new ArrayList<>();
-      for (int i = 0; i < node.children.size(); i++) {
-        JsonObject jsonChild = treeToJson(node.children.get(i));
-        jsonChildren.add(jsonChild);
-      }
-      JsonElement jsonElement = gson.toJsonTree(jsonChildren);
-      json.add("children", jsonElement);
-    }
-
-    return json;
-  }
-
-  /**
-   * getClickStreamList: Get click stream list in the session
-   *
-   * @return {@link ClickStream}
-   */
-  public List<ClickStream> getClickStreamList() {
-
-    List<ClickStream> clickthroughs = new ArrayList<>();
-    List<SessionNode> viewnodes = this.getViewNodes(this.root);
-    for (int i = 0; i < viewnodes.size(); i++) {
-
-      SessionNode viewnode = viewnodes.get(i);
-      SessionNode parent = viewnode.getParent();
-      List<SessionNode> children = viewnode.getChildren();
-
-      if (!"datasetlist".equals(parent.getKey())) {
-        continue;
-      }
-
-      RequestUrl requestURL = new RequestUrl();
-      String viewquery = "";
-      try {
-        String infoStr = requestURL.getSearchInfo(viewnode.getRequest());
-        viewquery = es.customAnalyzing(props.getProperty("indexName"), infoStr);
-      } catch (UnsupportedEncodingException | InterruptedException | ExecutionException e) {
-        LOG.warn("Exception getting search info. Ignoring...", e);
-      }
-
-      String dataset = viewnode.getDatasetId();
-      boolean download = false;
-      for (int j = 0; j < children.size(); j++) {
-        SessionNode child = children.get(j);
-        if ("ftp".equals(child.getKey())) {
-          download = true;
-          break;
-        }
-      }
-
-      if (viewquery != null && !"".equals(viewquery)) {
-        String[] queries = viewquery.trim().split(",");
-        if (queries.length > 0) {
-          for (int k = 0; k < queries.length; k++) {
-            ClickStream data = new ClickStream(queries[k], dataset, download);
-            data.setSessionId(this.sessionID);
-            data.setType(this.cleanupType);
-            clickthroughs.add(data);
-          }
-        }
-      }
-    }
-
-    return clickthroughs;
-  }
-
-  /**
-   * searchParentNode:Get parent node of a session node
-   *
-   * @param node {@link SessionNode}
-   * @return node {@link SessionNode}
-   */
-  private SessionNode searchParentNode(SessionNode node) {
-
-    String nodeKey = node.getKey();
-
-    if ("datasetlist".equals(nodeKey)) {
-      if ("-".equals(node.getReferer())) {
-        return root;
-      } else {
-        SessionNode tmp = this.findLatestRefer(tmpnode, node.getReferer());
-        if (tmp == null) {
-          return root;
-        } else {
-          return tmp;
-        }
-      }
-    } else if ("dataset".equals(nodeKey)) {
-      if ("-".equals(node.getReferer())) {
-        return null;
-      } else {
-        return this.findLatestRefer(tmpnode, node.getReferer());
-      }
-    } else if ("ftp".equals(nodeKey)) {
-      return latestDatasetnode;
-    }
-
-    return tmpnode;
-  }
-
-  /**
-   * findLatestRefer: Find parent node whose visiting url is equal to the refer
-   * url of a session node
-   *
-   * @param node:  {@link SessionNode}
-   * @param refer: request url
-   * @return
-   */
-  private SessionNode findLatestRefer(SessionNode node, String refer) {
-    while (true) {
-      if ("root".equals(node.getKey())) {
-        return null;
-      }
-      SessionNode parentNode = node.getParent();
-      if (refer.equals(parentNode.getRequest())) {
-        return parentNode;
-      }
-
-      SessionNode tmp = this.iterChild(parentNode, refer);
-      if (tmp == null) {
-        node = parentNode;
-        continue;
-      } else {
-        return tmp;
-      }
-    }
-  }
-
-  /**
-   * iterChild:
-   *
-   * @param start
-   * @param refer
-   * @return
-   */
-  private SessionNode iterChild(SessionNode start, String refer) {
-    List<SessionNode> children = start.getChildren();
-    for (int i = children.size() - 1; i >= 0; i--) {
-      SessionNode tmp = children.get(i);
-      if (tmp.getChildren().isEmpty()) {
-        if (refer.equals(tmp.getRequest())) {
-          return tmp;
-        } else {
-          continue;
-        }
-      } else {
-        iterChild(tmp, refer);
-      }
-    }
-
-    return null;
-  }
-
-  /**
-   * check:
-   *
-   * @param children
-   * @param str
-   * @return
-   */
-  private boolean check(List<SessionNode> children, String str) {
-    for (int i = 0; i < children.size(); i++) {
-      if (children.get(i).key.equals(str)) {
-        return true;
-      }
-    }
-    return false;
-  }
-
-  /**
-   * insertHelperChildren:
-   *
-   * @param entry
-   * @param children
-   * @return
-   */
-  private boolean insertHelperChildren(SessionNode entry, List<SessionNode> children) {
-    for (int i = 0; i < children.size(); i++) {
-      boolean result = insertHelper(entry, children.get(i));
-      if (result) {
-        return result;
-      }
-    }
-    return false;
-
-  }
-
-  /**
-   * insertHelper:
-   *
-   * @param entry
-   * @param node
-   * @return
-   */
-  private boolean insertHelper(SessionNode entry, SessionNode node) {
-    if ("datasetlist".equals(entry.key) || "dataset".equals(entry.key)) {
-      if ("datasetlist".equals(node.key)) {
-        if (node.children.isEmpty()) {
-          node.children.add(entry);
-          return true;
-        } else {
-          boolean flag = check(node.children, "datasetlist");
-          if (!flag) {
-            node.children.add(entry);
-            return true;
-          } else {
-            insertHelperChildren(entry, node.children);
-          }
-        }
-      } else {
-        insertHelperChildren(entry, node.children);
-      }
-    } else if ("ftp".equals(entry.key)) {
-      if ("dataset".equals(node.key)) {
-        if (node.children.isEmpty()) {
-          node.children.add(entry);
-          return true;
-        } else {
-          boolean flag = check(node.children, "dataset");
-          if (!flag) {
-            node.children.add(entry);
-            return true;
-          } else {
-            insertHelperChildren(entry, node.children);
-          }
-        }
-      } else {
-        insertHelperChildren(entry, node.children);
-      }
-    }
-
-    return false;
-  }
-
-  /**
-   * getViewNodes: Get a session node's child nodes whose key is "dataset".
-   *
-   * @param node
-   * @return a list of session node
-   */
-  private List<SessionNode> getViewNodes(SessionNode node) {
-
-    List<SessionNode> viewnodes = new ArrayList<>();
-    if ("dataset".equals(node.getKey())) {
-      viewnodes.add(node);
-    }
-
-    if (!node.children.isEmpty()) {
-      for (int i = 0; i < node.children.size(); i++) {
-        SessionNode childNode = node.children.get(i);
-        viewnodes.addAll(getViewNodes(childNode));
-      }
-    }
-
-    return viewnodes;
-  }
-
-  private List<SessionNode> getQueryNodes(SessionNode node) {
-    return this.getNodes(node, "datasetlist");
-  }
-
-  private List<SessionNode> getNodes(SessionNode node, String nodeKey) {
-
-    List<SessionNode> nodes = new ArrayList<>();
-    if (node.getKey().equals(nodeKey)) {
-      nodes.add(node);
-    }
-
-    if (!node.children.isEmpty()) {
-      for (int i = 0; i < node.children.size(); i++) {
-        SessionNode childNode = node.children.get(i);
-        nodes.addAll(getNodes(childNode, nodeKey));
-      }
-    }
-
-    return nodes;
-  }
-
-  /**
-   * Obtain the ranking training data.
-   *
-   * @param indexName   the index from whcih to obtain the data
-   * @param sessionID   a valid session identifier
-   * @return {@link ClickStream}
-   * @throws UnsupportedEncodingException if there is an error whilst
-   *                                      processing the ranking training data.
-   */
-  public List<RankingTrainData> getRankingTrainData(String indexName, String sessionID) throws UnsupportedEncodingException {
-
-    List<RankingTrainData> trainDatas = new ArrayList<>();
-
-    List<SessionNode> queryNodes = this.getQueryNodes(this.root);
-    for (int i = 0; i < queryNodes.size(); i++) {
-      SessionNode querynode = queryNodes.get(i);
-      List<SessionNode> children = querynode.getChildren();
-
-      LinkedHashMap<String, Boolean> datasetOpt = new LinkedHashMap<>();
-      int ndownload = 0;
-      for (int j = 0; j < children.size(); j++) {
-        SessionNode node = children.get(j);
-        if ("dataset".equals(node.getKey())) {
-          Boolean bDownload = false;
-          List<SessionNode> nodeChildren = node.getChildren();
-          int childSize = nodeChildren.size();
-          for (int k = 0; k < childSize; k++) {
-            if ("ftp".equals(nodeChildren.get(k).getKey())) {
-              bDownload = true;
-              ndownload += 1;
-              break;
-            }
-          }
-          datasetOpt.put(node.datasetId, bDownload);
-        }
-      }
-
-      // method 1: The priority of download data are higher
-      if (datasetOpt.size() > 1 && ndownload > 0) {
-        // query
-        RequestUrl requestURL = new RequestUrl();
-        String queryUrl = querynode.getRequest();
-        String infoStr = requestURL.getSearchInfo(queryUrl);
-        String query = null;
-        try {
-          query = es.customAnalyzing(props.getProperty("indexName"), infoStr);
-        } catch (InterruptedException | ExecutionException e) {
-          throw new RuntimeException("Error performing custom analyzing", e);
-        }
-        Map<String, String> filter = RequestUrl.getFilterInfo(queryUrl);
-
-        for (String datasetA : datasetOpt.keySet()) {
-          Boolean bDownloadA = datasetOpt.get(datasetA);
-          if (bDownloadA) {
-            for (String datasetB : datasetOpt.keySet()) {
-              Boolean bDownloadB = datasetOpt.get(datasetB);
-              if (!bDownloadB) {
-
-                String[] queries = query.split(",");
-                for (int l = 0; l < queries.length; l++) {
-                  RankingTrainData trainData = new RankingTrainData(queries[l], datasetA, datasetB);
-
-                  trainData.setSessionId(this.sessionID);
-                  trainData.setIndex(indexName);
-                  trainData.setFilter(filter);
-                  trainDatas.add(trainData);
-                }
-              }
-            }
-          }
-        }
-      }
-    }
-
-    return trainDatas;
-  }
-}