You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@sdap.apache.org by le...@apache.org on 2017/12/19 14:13:16 UTC

[15/17] incubator-sdap-mudrod git commit: SDAP-7 Change all package namespaces to org.apache.sdap

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/metadata/structure/MetadataExtractor.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/metadata/structure/MetadataExtractor.java b/core/src/main/java/gov/nasa/jpl/mudrod/metadata/structure/MetadataExtractor.java
deleted file mode 100644
index a79ca87..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/metadata/structure/MetadataExtractor.java
+++ /dev/null
@@ -1,145 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.metadata.structure;
-
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.JavaRDD;
-import org.apache.spark.api.java.JavaSparkContext;
-import org.apache.spark.api.java.function.Function2;
-import org.apache.spark.api.java.function.PairFunction;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import scala.Tuple2;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Map;
-import java.util.concurrent.ExecutionException;
-
-public class MetadataExtractor implements Serializable {
-
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-
-  public MetadataExtractor() {
-  }
-
-  /**
-   * loadMetadata:Load all metadata from Elasticsearch and convert them to
-   * pairRDD Please make sure metadata has been already harvested from web
-   * service and stored in Elasticsearch.
-   *
-   * @param es    an Elasticsearch client node instance
-   * @param sc    spark context
-   * @param index index name of log processing application
-   * @param type  metadata type name
-   * @return PairRDD, in each pair key is metadata short name and value is term
-   * list extracted from metadata variables.
-   */
-  public JavaPairRDD<String, List<String>> loadMetadata(ESDriver es, JavaSparkContext sc, String index, String type) {
-    List<PODAACMetadata> metadatas = this.loadMetadataFromES(es, index, type);
-    JavaPairRDD<String, List<String>> metadataTermsRDD = this.buildMetadataRDD(es, sc, index, metadatas);
-    return metadataTermsRDD;
-  }
-
-  /**
-   * loadMetadataFromES: Load all metadata from Elasticsearch.
-   *
-   * @param es    an Elasticsearch client node instance
-   * @param index index name of log processing application
-   * @param type  metadata type name
-   * @return metadata list
-   */
-  protected List<PODAACMetadata> loadMetadataFromES(ESDriver es, String index, String type) {
-
-    List<PODAACMetadata> metadatas = new ArrayList<PODAACMetadata>();
-    SearchResponse scrollResp = es.getClient().prepareSearch(index).setTypes(type).setQuery(QueryBuilders.matchAllQuery()).setScroll(new TimeValue(60000)).setSize(100).execute().actionGet();
-
-    while (true) {
-      for (SearchHit hit : scrollResp.getHits().getHits()) {
-        Map<String, Object> result = hit.getSource();
-        String shortname = (String) result.get("Dataset-ShortName");
-        List<String> topic = (List<String>) result.get("DatasetParameter-Topic");
-        List<String> term = (List<String>) result.get("DatasetParameter-Term");
-        List<String> keyword = (List<String>) result.get("Dataset-Metadata");
-        List<String> variable = (List<String>) result.get("DatasetParameter-Variable");
-        List<String> longname = (List<String>) result.get("DatasetProject-Project-LongName");
-
-        List<String> region = (List<String>) result.get("DatasetRegion-Region");
-
-        PODAACMetadata metadata = null;
-        try {
-          metadata = new PODAACMetadata(shortname, longname, es.customAnalyzing(index, topic), es.customAnalyzing(index, term), es.customAnalyzing(index, variable), es.customAnalyzing(index, keyword),
-              es.customAnalyzing(index, region));
-        } catch (InterruptedException | ExecutionException e) {
-          e.printStackTrace();
-
-        }
-        metadatas.add(metadata);
-      }
-      scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
-      if (scrollResp.getHits().getHits().length == 0) {
-        break;
-      }
-    }
-
-    return metadatas;
-  }
-
-  /**
-   * buildMetadataRDD: Convert metadata list to JavaPairRDD
-   *
-   * @param es        an Elasticsearch client node instance
-   * @param sc        spark context
-   * @param index     index name of log processing application
-   * @param metadatas metadata list
-   * @return PairRDD, in each pair key is metadata short name and value is term
-   * list extracted from metadata variables.
-   */
-  protected JavaPairRDD<String, List<String>> buildMetadataRDD(ESDriver es, JavaSparkContext sc, String index, List<PODAACMetadata> metadatas) {
-    JavaRDD<PODAACMetadata> metadataRDD = sc.parallelize(metadatas);
-    JavaPairRDD<String, List<String>> metadataTermsRDD = metadataRDD.mapToPair(new PairFunction<PODAACMetadata, String, List<String>>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, List<String>> call(PODAACMetadata metadata) throws Exception {
-        return new Tuple2<String, List<String>>(metadata.getShortName(), metadata.getAllTermList());
-      }
-    }).reduceByKey(new Function2<List<String>, List<String>, List<String>>() {
-      /**
-       *
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public List<String> call(List<String> v1, List<String> v2) throws Exception {
-        List<String> list = new ArrayList<String>();
-        list.addAll(v1);
-        list.addAll(v2);
-        return list;
-      }
-    });
-
-    return metadataTermsRDD;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/metadata/structure/PODAACMetadata.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/metadata/structure/PODAACMetadata.java b/core/src/main/java/gov/nasa/jpl/mudrod/metadata/structure/PODAACMetadata.java
deleted file mode 100644
index 50b17c0..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/metadata/structure/PODAACMetadata.java
+++ /dev/null
@@ -1,337 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.metadata.structure;
-
-import java.io.Serializable;
-import java.util.ArrayList;
-import java.util.List;
-
-/**
- * ClassName: PODAACMetadata Function: PODAACMetadata setter and getter methods
- */
-public class PODAACMetadata implements Serializable {
-
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-  // shortname: data set short name
-  private String shortname;
-  // abstractStr: data set abstract
-  private String abstractStr;
-  // isoTopic: data set topic
-  private String isoTopic;
-  // sensor: sensor
-  private String sensor;
-  // source: data source
-  private String source;
-  // project: data project
-  private String project;
-  // hasAbstarct: whether data set has abstract
-  boolean hasAbstarct;
-
-  // longnameList: data set long name list
-  private List<String> longnameList;
-  // keywordList:data set key word list
-  private List<String> keywordList;
-  // termList: data set term list
-  private List<String> termList;
-  // topicList: data set topic list
-  private List<String> topicList;
-  // variableList: data set variable list
-  private List<String> variableList;
-  // abstractList: data set abstract term list
-  private List<String> abstractList;
-  // isotopicList: data set iso topic list
-  private List<String> isotopicList;
-  // sensorList: data set sensor list
-  private List<String> sensorList;
-  // sourceList: data set source list
-  private List<String> sourceList;
-  // projectList: data set project list
-  private List<String> projectList;
-  // regionList: data set region list
-  private List<String> regionList;
-
-  public PODAACMetadata() {
-    // Default constructor
-  }
-
-  /**
-   * Creates a new instance of PODAACMetadata.
-   *
-   * @param shortname data set short name
-   * @param longname  data set long name
-   * @param topics    data set topics
-   * @param terms     data set terms
-   * @param variables data set variables
-   * @param keywords  data set keywords
-   * @param region    list of regions
-   */
-  public PODAACMetadata(String shortname, List<String> longname, List<String> topics, List<String> terms, List<String> variables, List<String> keywords, List<String> region) {
-    this.shortname = shortname;
-    this.longnameList = longname;
-    this.keywordList = keywords;
-    this.termList = terms;
-    this.topicList = topics;
-    this.variableList = variables;
-    this.regionList = region;
-  }
-
-  /**
-   * setTerms: set term of data set
-   *
-   * @param termstr data set terms
-   */
-  public void setTerms(String termstr) {
-    this.splitString(termstr, this.termList);
-  }
-
-  /**
-   * setKeywords: set key word of data set
-   *
-   * @param keywords data set keywords
-   */
-  public void setKeywords(String keywords) {
-    this.splitString(keywords, this.keywordList);
-  }
-
-  /**
-   * setTopicList: set topic of data set
-   *
-   * @param topicStr data set topics
-   */
-  public void setTopicList(String topicStr) {
-    this.splitString(topicStr, this.topicList);
-  }
-
-  /**
-   * setVaraliableList: set varilable of data set
-   *
-   * @param varilableStr data set variables
-   */
-  public void setVaraliableList(String varilableStr) {
-    this.splitString(varilableStr, this.variableList);
-  }
-
-  /**
-   * setProjectList:set project of data set
-   *
-   * @param project data set projects
-   */
-  public void setProjectList(String project) {
-    this.splitString(project, this.projectList);
-  }
-
-  /**
-   * setSourceList: set source of data set
-   *
-   * @param source data set sources
-   */
-  public void setSourceList(String source) {
-    this.splitString(source, this.sourceList);
-  }
-
-  /**
-   * setSensorList: set sensor of data set
-   *
-   * @param sensor data set sensors
-   */
-  public void setSensorList(String sensor) {
-    this.splitString(sensor, this.sensorList);
-  }
-
-  /**
-   * setISOTopicList:set iso topic of data set
-   *
-   * @param isoTopic data set iso topics
-   */
-  public void setISOTopicList(String isoTopic) {
-    this.splitString(isoTopic, this.isotopicList);
-  }
-
-  /**
-   * getKeywordList: get key word of data set
-   *
-   * @return data set keyword list
-   */
-  public List<String> getKeywordList() {
-    return this.keywordList;
-  }
-
-  /**
-   * getTermList:get term list of data set
-   *
-   * @return data set term list
-   */
-  public List<String> getTermList() {
-    return this.termList;
-  }
-
-  /**
-   * getShortName:get short name of data set
-   *
-   * @return data set short name
-   */
-  public String getShortName() {
-    return this.shortname;
-  }
-
-  /**
-   * getKeyword:get key word of data set
-   *
-   * @return data set keyword string
-   */
-  public String getKeyword() {
-    return String.join(",", this.keywordList);
-  }
-
-  /**
-   * getTerm:get term of data set
-   *
-   * @return data set term string
-   */
-  public String getTerm() {
-    return String.join(",", this.termList);
-  }
-
-  /**
-   * getTopic:get topic of data set
-   *
-   * @return data set topic string
-   */
-  public String getTopic() {
-    return String.join(",", this.topicList);
-  }
-
-  /**
-   * getVariable:get variable of data set
-   *
-   * @return data set variable string
-   */
-  public String getVariable() {
-    return String.join(",", this.variableList);
-  }
-
-  /**
-   * getAbstract:get abstract of data set
-   *
-   * @return data set abstract
-   */
-  public String getAbstract() {
-    return this.abstractStr;
-  }
-
-  /**
-   * getProject:get project of data set
-   *
-   * @return data set project string
-   */
-  public String getProject() {
-    return this.project;
-  }
-
-  /**
-   * getSource:get source of data set
-   *
-   * @return data set source string
-   */
-  public String getSource() {
-    return this.source;
-  }
-
-  /**
-   * getSensor:get sensor of data set
-   *
-   * @return data set sensor string
-   */
-  public String getSensor() {
-    return this.sensor;
-  }
-
-  /**
-   * getISOTopic:get iso topic of data set
-   *
-   * @return data set ISO topic string
-   */
-  public String getISOTopic() {
-    return this.isoTopic;
-  }
-
-  /**
-   * getAllTermList: get all term list of data set
-   *
-   * @return data set term list
-   */
-  public List<String> getAllTermList() {
-    List<String> allterms = new ArrayList<>();
-
-    if (this.termList != null && !this.termList.isEmpty()) {
-      allterms.addAll(this.termList);
-    }
-
-    if (this.keywordList != null && !this.keywordList.isEmpty()) {
-      allterms.addAll(this.keywordList);
-    }
-
-    if (this.topicList != null && !this.topicList.isEmpty()) {
-      allterms.addAll(this.topicList);
-    }
-
-    if (this.variableList != null && !this.variableList.isEmpty()) {
-      allterms.addAll(this.variableList);
-    }
-
-    if (this.regionList != null && !this.regionList.isEmpty()) {
-      allterms.addAll(this.regionList);
-    }
-    return allterms;
-  }
-
-  /**
-   * splitString: split value of fields of data set
-   *
-   * @param oristr original string
-   * @param list   result after splitting
-   */
-  private void splitString(String oristr, List<String> list) {
-    if (oristr == null) {
-      return;
-    }
-
-    if (oristr.startsWith("\"")) {
-      oristr = oristr.substring(1);
-    }
-    if (oristr.endsWith("\"")) {
-      oristr = oristr.substring(0, oristr.length() - 1);
-    }
-
-    String strs[] = oristr.trim().split(",");
-    if (strs != null) {
-      for (int i = 0; i < strs.length; i++) {
-        String str = strs[i].trim();
-        if (str.startsWith(",") || str.startsWith("\"")) {
-          str = str.substring(1);
-        }
-        if (str.endsWith(",") || str.endsWith("\"")) {
-          str = str.substring(0, str.length() - 1);
-        }
-        if (str == "") {
-          continue;
-        }
-        list.add(str);
-      }
-    }
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/metadata/structure/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/metadata/structure/package-info.java b/core/src/main/java/gov/nasa/jpl/mudrod/metadata/structure/package-info.java
deleted file mode 100644
index d7de65d..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/metadata/structure/package-info.java
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes classes needed for metadata analysis
- */
-package gov.nasa.jpl.mudrod.metadata.structure;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ontology/Ontology.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/Ontology.java b/core/src/main/java/gov/nasa/jpl/mudrod/ontology/Ontology.java
deleted file mode 100644
index 7bc76fb..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/Ontology.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ontology;
-
-import java.util.Iterator;
-
-/**
- * Base class for working with ontologies. Methods indicate ability
- * to load, merge e.g. merge relevant ontology subgraphs into a new
- * subgraph which can be used within Mudrod, subclass retreival,
- * synonym expansion, etc.
- *
- * @author lewismc
- */
-public interface Ontology {
-
-  /**
-   * Load an array URIs which resolve to ontology resources.
-   *
-   * @param urls a {@link java.lang.String} containing ontology URIs.
-   */
-  public void load(String[] urls);
-
-  /**
-   * Load a collection of default ontology resources.
-   */
-  public void load() ;
-
-  /**
-   * merge relevant ontology subgraphs into a new subgraph which can
-   * be used within Mudrod
-   *
-   * @param o an ontology to merge with the current ontology held
-   *          within Mudrod.
-   */
-  public void merge(Ontology o);
-
-  /**
-   * Retreive all subclasses for a particular entity provided within the
-   * search term e.g.subclass-based query expansion.
-   *
-   * @param entitySearchTerm an input search term
-   * @return an {@link java.util.Iterator} object containing subClass entries.
-   */
-  public Iterator<String> subclasses(String entitySearchTerm);
-
-  /**
-   * Retreive all synonyms for a particular entity provided within the
-   * search term e.g.synonym-based query expansion.
-   *
-   * @param queryKeyPhrase a phrase to undertake synonym expansion on.
-   * @return an {@link java.util.Iterator} object containing synonym entries.
-   */
-  public Iterator<String> synonyms(String queryKeyPhrase);
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ontology/OntologyFactory.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/OntologyFactory.java b/core/src/main/java/gov/nasa/jpl/mudrod/ontology/OntologyFactory.java
deleted file mode 100644
index f0ef6cd..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/OntologyFactory.java
+++ /dev/null
@@ -1,83 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ontology;
-
-import gov.nasa.jpl.mudrod.main.MudrodConstants;
-import gov.nasa.jpl.mudrod.ontology.process.EsipCOROntology;
-import gov.nasa.jpl.mudrod.ontology.process.EsipPortalOntology;
-import gov.nasa.jpl.mudrod.ontology.process.LocalOntology;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.Properties;
-
-/**
- * The mechanism for creating an {@link Ontology}
- * implementation. The {@link Ontology} implementation
- * should be specified in
- * <a href="https://github.com/mudrod/mudrod/blob/master/core/src/main/resources/config.xml">
- * config.xml</a> with configuration key
- * <code>mudrod.ontology.implementation</code>.
- * This property can also be accessed via
- * {@link MudrodConstants#ONTOLOGY_IMPL}.
- *
- * @author lewismc
- */
-public class OntologyFactory {
-
-  public static final Logger LOG = LoggerFactory.getLogger(OntologyFactory.class);
-
-  private Properties props;
-
-  /**
-   * The mechanism for creating an {@link Ontology}
-   * implementation.
-   *
-   * @param props a populated Mudrod {@link java.util.Properties} object.
-   */
-  public OntologyFactory(Properties props) {
-    this.props = props;
-  }
-
-  /**
-   * Obtain the {@link Ontology}
-   * implementation for use within Mudrod.
-   *
-   * @return Returns the ontology implementation specified
-   * in <a href="https://github.com/mudrod/mudrod/blob/master/core/src/main/resources/config.xml">
-   * config.xml</a> with configuration key
-   * <code>mudrod.ontology.implementation</code>. This property can also be accessed via
-   * {@link MudrodConstants#ONTOLOGY_IMPL}.
-   */
-  public Ontology getOntology() {
-
-    String ontologyImpl = this.props.getProperty(MudrodConstants.ONTOLOGY_IMPL, "Local");
-
-    LOG.info("Using ontology extension: {}", ontologyImpl);
-    Ontology ontImpl;
-    switch (ontologyImpl) {
-    case "EsipCOR":
-      ontImpl = new EsipCOROntology();
-      break;
-    case "EsipPortal":
-      ontImpl = new EsipPortalOntology();
-      break;
-    default:
-      ontImpl = new LocalOntology();
-      break;
-    }
-    return ontImpl;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ontology/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/package-info.java b/core/src/main/java/gov/nasa/jpl/mudrod/ontology/package-info.java
deleted file mode 100644
index 3763634..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/package-info.java
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes ontology pre-processing and processing classes.
- */
-package gov.nasa.jpl.mudrod.ontology;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ontology/pre/AggregateTriples.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/pre/AggregateTriples.java b/core/src/main/java/gov/nasa/jpl/mudrod/ontology/pre/AggregateTriples.java
deleted file mode 100644
index 99de87d..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/pre/AggregateTriples.java
+++ /dev/null
@@ -1,225 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ontology.pre;
-
-import gov.nasa.jpl.mudrod.discoveryengine.DiscoveryStepAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import org.apache.commons.io.FilenameUtils;
-import org.jdom2.Document;
-import org.jdom2.Element;
-import org.jdom2.JDOMException;
-import org.jdom2.Namespace;
-import org.jdom2.filter.ElementFilter;
-import org.jdom2.input.SAXBuilder;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.BufferedWriter;
-import java.io.File;
-import java.io.FileWriter;
-import java.io.IOException;
-import java.util.Arrays;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Properties;
-
-/**
- * Supports ability to extract triples (subclassOf, equivalent class) from OWL file
- */
-public class AggregateTriples extends DiscoveryStepAbstract {
-  private static final long serialVersionUID = 1L;
-  private static final Logger LOG = LoggerFactory.getLogger(AggregateTriples.class);
-
-  public AggregateTriples(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-  }
-
-  /**
-   * Method of executing triple aggregation
-   */
-  @Override
-  public Object execute() {
-    File file = new File(this.props.getProperty("oceanTriples"));
-    if (file.exists()) {
-      file.delete();
-    }
-    try {
-      file.createNewFile();
-    } catch (IOException e2) {
-      e2.printStackTrace();
-    }
-
-    FileWriter fw;
-    try {
-      fw = new FileWriter(file.getAbsoluteFile());
-      bw = new BufferedWriter(fw);
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-
-    File[] files = new File(this.props.getProperty("ontologyInputDir")).listFiles();
-    for (File file_in : files) {
-      String ext = FilenameUtils.getExtension(file_in.getAbsolutePath());
-      if ("owl".equals(ext)) {
-        try {
-          loadxml(file_in.getAbsolutePath());
-          getAllClass();
-        } catch (JDOMException e1) {
-          e1.printStackTrace();
-        } catch (IOException e1) {
-          e1.printStackTrace();
-        }
-
-      }
-    }
-
-    try {
-      bw.close();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-    return null;
-  }
-
-  public Document document;
-  public Element rootNode = null;
-  final static String owl_namespace = "http://www.w3.org/2002/07/owl#";
-  final static String rdf_namespace = "http://www.w3.org/1999/02/22-rdf-syntax-ns#";
-  final static String rdfs_namespace = "http://www.w3.org/2000/01/rdf-schema#";
-
-  BufferedWriter bw = null;
-
-  /**
-   * Load OWL file into memory
-   *
-   * @param filePathName local path of OWL file
-   * @throws JDOMException JDOMException
-   * @throws IOException   IOException
-   */
-  public void loadxml(String filePathName) throws JDOMException, IOException {
-    SAXBuilder saxBuilder = new SAXBuilder();
-    File file = new File(filePathName);
-
-    document = saxBuilder.build(file);
-    rootNode = document.getRootElement();
-  }
-
-  /**
-   * Method of going through OWL structure
-   */
-  public void loopxml() {
-    Iterator<?> processDescendants = rootNode.getDescendants(new ElementFilter());
-    String text = "";
-
-    while (processDescendants.hasNext()) {
-      Element e = (Element) processDescendants.next();
-      String currentName = e.getName();
-      text = e.getTextTrim();
-      if ("".equals(text)) {
-        LOG.info(currentName);
-      } else {
-        LOG.info("{} : {}", currentName, text);
-      }
-    }
-  }
-
-  /**
-   * Method of identifying a specific child given a element name
-   *
-   * @param str element name
-   * @param ele parent element
-   * @return the element of child
-   */
-  public Element findChild(String str, Element ele) {
-    Iterator<?> processDescendants = ele.getDescendants(new ElementFilter());
-    String name = "";
-    Element result = null;
-
-    while (processDescendants.hasNext()) {
-      Element e = (Element) processDescendants.next();
-      name = e.getName();
-      if (name.equals(str)) {
-        result = e;
-        return result;
-      }
-    }
-    return result;
-
-  }
-
-  /**
-   * Method of extract triples (subclassOf, equivalent class) from OWL file
-   *
-   * @throws IOException IOException
-   */
-  public void getAllClass() throws IOException {
-    List<?> classElements = rootNode.getChildren("Class", Namespace.getNamespace("owl", owl_namespace));
-
-    for (int i = 0; i < classElements.size(); i++) {
-      Element classElement = (Element) classElements.get(i);
-      String className = classElement.getAttributeValue("about", Namespace.getNamespace("rdf", rdf_namespace));
-
-      if (className == null) {
-        className = classElement.getAttributeValue("ID", Namespace.getNamespace("rdf", rdf_namespace));
-      }
-
-      List<?> subclassElements = classElement.getChildren("subClassOf", Namespace.getNamespace("rdfs", rdfs_namespace));
-      for (int j = 0; j < subclassElements.size(); j++) {
-        Element subclassElement = (Element) subclassElements.get(j);
-        String subclassName = subclassElement.getAttributeValue("resource", Namespace.getNamespace("rdf", rdf_namespace));
-        if (subclassName == null) {
-          Element allValuesFromEle = findChild("allValuesFrom", subclassElement);
-          if (allValuesFromEle != null) {
-            subclassName = allValuesFromEle.getAttributeValue("resource", Namespace.getNamespace("rdf", rdf_namespace));
-            bw.write(cutString(className) + ",SubClassOf," + cutString(subclassName) + "\n");
-          }
-        } else {
-          bw.write(cutString(className) + ",SubClassOf," + cutString(subclassName) + "\n");
-        }
-
-      }
-
-      List equalClassElements = classElement.getChildren("equivalentClass", Namespace.getNamespace("owl", owl_namespace));
-      for (int k = 0; k < equalClassElements.size(); k++) {
-        Element equalClassElement = (Element) equalClassElements.get(k);
-        String equalClassElementName = equalClassElement.getAttributeValue("resource", Namespace.getNamespace("rdf", rdf_namespace));
-
-        if (equalClassElementName != null) {
-          bw.write(cutString(className) + ",equivalentClass," + cutString(equalClassElementName) + "\n");
-        }
-      }
-
-    }
-  }
-
-  /**
-   * Method of cleaning up a string
-   *
-   * @param str String needed to be processed
-   * @return the processed string
-   */
-  public String cutString(String str) {
-    str = str.substring(str.indexOf("#") + 1);
-    String[] strArray = str.split("(?=[A-Z])");
-    str = Arrays.toString(strArray);
-    return str.substring(1, str.length() - 1).replace(",", "");
-  }
-
-  @Override
-  public Object execute(Object o) {
-    return null;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ontology/pre/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/pre/package-info.java b/core/src/main/java/gov/nasa/jpl/mudrod/ontology/pre/package-info.java
deleted file mode 100644
index 0570bc7..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/pre/package-info.java
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes ontology pre-processing classes.
- */
-package gov.nasa.jpl.mudrod.ontology.pre;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/EsipCOROntology.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/EsipCOROntology.java b/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/EsipCOROntology.java
deleted file mode 100644
index 6194197..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/EsipCOROntology.java
+++ /dev/null
@@ -1,69 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ontology.process;
-
-import gov.nasa.jpl.mudrod.ontology.Ontology;
-
-import java.util.Iterator;
-
-/**
- * @author lewismc
- */
-public class EsipCOROntology implements Ontology {
-
-  /**
-   *
-   */
-  public EsipCOROntology() {
-    //default constructor
-  }
-
-  @Override
-  public void load() {
-    // to be completed
-  }
-
-  /* (non-Javadoc)
-   * @see Ontology#load(java.lang.String[])
-   */
-  @Override
-  public void load(String[] urls) {
-    // to be completed
-  }
-
-  /* (non-Javadoc)
-   * @see Ontology#merge(Ontology)
-   */
-  @Override
-  public void merge(Ontology o) {
-    // to be completed
-  }
-
-  /* (non-Javadoc)
-   * @see Ontology#subclasses(java.lang.String)
-   */
-  @Override
-  public Iterator<String> subclasses(String entitySearchTerm) {
-    return null;
-  }
-
-  /* (non-Javadoc)
-   * @see Ontology#synonyms(java.lang.String)
-   */
-  @Override
-  public Iterator<String> synonyms(String queryKeyPhrase) {
-    return null;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/EsipPortalOntology.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/EsipPortalOntology.java b/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/EsipPortalOntology.java
deleted file mode 100644
index 9c4888b..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/EsipPortalOntology.java
+++ /dev/null
@@ -1,72 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ontology.process;
-
-import gov.nasa.jpl.mudrod.ontology.Ontology;
-
-import java.util.Iterator;
-
-/**
- * @author lewismc
- */
-public class EsipPortalOntology implements Ontology {
-
-  /**
-   *
-   */
-  public EsipPortalOntology() {
-    //default constructor
-  }
-
-  /* (non-Javadoc)
-   * @see Ontology#load(java.lang.String[])
-   */
-  @Override
-  public void load(String[] urls) {
-    // to be completed
-  }
-
-  /* (non-Javadoc)
-   * @see Ontology#load()
-   */
-  @Override
-  public void load() {
-    // to be completed
-  }
-
-  /* (non-Javadoc)
-   * @see Ontology#merge(Ontology)
-   */
-  @Override
-  public void merge(Ontology o) {
-    // to be completed
-  }
-
-  /* (non-Javadoc)
-   * @see Ontology#subclasses(java.lang.String)
-   */
-  @Override
-  public Iterator<String> subclasses(String entitySearchTerm) {
-    return null;
-  }
-
-  /* (non-Javadoc)
-   * @see Ontology#synonyms(java.lang.String)
-   */
-  @Override
-  public Iterator<String> synonyms(String queryKeyPhrase) {
-    return null;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/LocalOntology.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/LocalOntology.java b/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/LocalOntology.java
deleted file mode 100644
index 55ca51d..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/LocalOntology.java
+++ /dev/null
@@ -1,392 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ontology.process;
-
-import gov.nasa.jpl.mudrod.ontology.Ontology;
-
-import org.apache.jena.ontology.Individual;
-import org.apache.jena.ontology.OntClass;
-import org.apache.jena.ontology.OntModel;
-import org.apache.jena.ontology.OntModelSpec;
-import org.apache.jena.ontology.OntResource;
-import org.apache.jena.ontology.Restriction;
-import org.apache.jena.rdf.model.AnonId;
-import org.apache.jena.rdf.model.Literal;
-import org.apache.jena.rdf.model.ModelFactory;
-import org.apache.jena.rdf.model.Resource;
-import org.apache.jena.shared.PrefixMapping;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.PrintStream;
-import java.net.MalformedURLException;
-import java.net.URISyntaxException;
-import java.net.URL;
-import java.util.ArrayList;
-import java.util.HashMap;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Map;
-
-/**
- * The LocalOntology implementation enables us to work with Ontology files
- * whcih are cached locally and available on the runtime classpath e.g.
- * in <code>src/main/resource/ontology/...</code>.
- * From here we can test and iterate on how use of ontology can enhance search.
- */
-public class LocalOntology implements Ontology {
-
-  public static final Logger LOG = LoggerFactory.getLogger(LocalOntology.class);
-
-  public static final String DELIMITER_SEARCHTERM = " ";
-
-  private Map<Object, Object> searchTerms = new HashMap<>();
-  private static OntologyParser parser;
-  private static OntModel ontologyModel;
-  private Ontology ontology;
-  private static Map<AnonId, String> mAnonIDs = new HashMap<>();
-  private static int mAnonCount = 0;
-  private List<String> ontArrayList;
-
-  public LocalOntology() {
-    //only initialize all the static variables
-    //if first time called to this ontology constructor
-    if (ontology == null) {
-      if (LOG.isInfoEnabled()) {
-        LOG.info("Creating new ontology");
-      }
-      parser = new OwlParser();
-      ontology = this;
-    }
-    if (ontologyModel == null)
-      ontologyModel = ModelFactory.createOntologyModel(OntModelSpec.OWL_MEM, null);
-    load();
-  }
-
-  /**
-   * Static accessor for {@link LocalOntology}
-   * instance implementation defined within <code>config.xml</code>.
-   *
-   * @return a {@link LocalOntology}
-   */
-  public Ontology getInstance() {
-    if (ontology == null) {
-      ontology = new LocalOntology();
-    }
-    return ontology;
-  }
-
-  /**
-   * Load the default <i>sweetAll.owl</i> ontology
-   * from <a href="https://raw.githubusercontent.com/ESIPFed/sweet/master/2.4/sweetAll.owl">
-   * https://raw.githubusercontent.com/ESIPFed/sweet/master/2.4/sweetAll.owl</a>
-   */
-  @Override
-  public void load() {
-    URL ontURL = null;
-    try {
-      ontURL = new URL("https://raw.githubusercontent.com/ESIPFed/sweet/master/2.4/sweetAll.owl");
-      //ontURL = new URL("https://raw.githubusercontent.com/ESIPFed/sweet/master/2.4/reprDataProduct.owl");
-    } catch (MalformedURLException e) {
-      LOG.error("Error when attempting to create URL resource: ", e);
-    }
-    ontArrayList = new ArrayList<>();
-    try {
-      ontArrayList.add(ontURL.toURI().toString());
-    } catch (URISyntaxException e) {
-      LOG.error("Error in URL syntax, please check your Ontology resource: ", e);
-    }
-    if (!ontArrayList.isEmpty()) {
-      load(ontArrayList.stream().toArray(String[]::new));
-    }
-  }
-
-  /**
-   * Load a string array of local URIs which refernece .owl files.
-   */
-  @Override
-  public void load(String[] urls) {
-    for (int i = 0; i < urls.length; i++) {
-      String url = urls[i].trim();
-      if (!"".equals(url))
-        if (LOG.isInfoEnabled()) {
-          LOG.info("Reading and processing {}", url);
-        }
-      load(ontologyModel, url);
-    }
-    parser.parse(ontology, ontologyModel);
-  }
-
-  private void load(Object m, String url) {
-    try {
-      ((OntModel) m).read(url, null, null);
-      LOG.info("Successfully processed {}", url);
-    } catch (Exception e) {
-      LOG.error("Failed whilst attempting to read ontology {}: Error: ", url, e);
-    }
-  }
-
-  /**
-   * Get the {@link gov.nasa.jpl.mudrod.ontology.process.OntologyParser}
-   * implementation being used to process the input ontology resources.
-   * @return an {@link gov.nasa.jpl.mudrod.ontology.process.OntologyParser} implementation
-   */
-  public OntologyParser getParser() {
-    if (parser == null) {
-      parser = new OwlParser();
-    }
-    return parser;
-  }
-
-  /**
-   * Return the {@link org.apache.jena.ontology.OntModel} instance
-   * which created from input ontology resources.
-   * @return a constructed {@link org.apache.jena.ontology.OntModel}
-   */
-  public static OntModel getModel() {
-    return ontologyModel;
-  }
-
-  /**
-   * Return the loaded Ontology resources.
-   * @return a {@link java.util.List} of resources.
-   */
-  public List<String> getLoadedOntologyResources() {
-    if (ontArrayList != null) {
-      return ontArrayList;
-    } else {
-      return new ArrayList<>();
-    }
-  }
-  /**
-   * Not yet implemented.
-   */
-  @Override
-  public void merge(Ontology o) {
-    // not yet implemented
-  }
-
-  /**
-   * Retrieve all subclasses of entity(ies) hashed to searchTerm
-   * @param entitySearchTerm a query (keywords) for which to obtain
-   * subclasses.
-   * @return an {@link java.util.Iterator} containing the subclass as Strings.
-   */
-  @Override
-  public Iterator<String> subclasses(String entitySearchTerm) {
-    Map<OntResource, String> classMap = retrieve(entitySearchTerm);
-    Map<String, String> subclasses = new HashMap<>();
-
-    Iterator<OntResource> iter = classMap.keySet().iterator();
-    while (iter.hasNext()) {
-      OntResource resource = iter.next();
-
-      if (resource instanceof OntClass) {
-        //get subclasses N.B. we only get direct sub-classes e.g. direct children
-        //it is possible for us to navigate the entire class tree if we wish, we simply
-        //need to pass the .listSubClasses(true) boolean parameter.
-        for (Iterator<?> i = ((OntClass) resource).listSubClasses(); i.hasNext();) {
-          OntResource subclass = (OntResource) i.next();
-          for (Iterator<?> j = subclass.listLabels(null); j.hasNext();) {
-            Literal l = (Literal) j.next();
-            subclasses.put(l.toString(), "1");
-          }
-        }
-        //get individuals
-        for (Iterator<?> i = ((OntClass) resource).listInstances(); i.hasNext(); ) {
-          OntResource subclass = (OntResource) i.next();
-          for (Iterator<?> j = subclass.listLabels(null); j.hasNext();) {
-            Literal l = (Literal) j.next();
-            subclasses.put(l.toString(), "1");
-          }
-        }
-      } else if (resource instanceof Individual) {
-        for (Iterator<?> i = resource.listSameAs(); i.hasNext();) {
-          OntResource subclass = (OntResource) i.next();
-          for (Iterator<?> j = subclass.listLabels(null); j.hasNext();) {
-            Literal l = (Literal) j.next();
-            subclasses.put(l.toString(), "1");
-          }
-        }
-      }
-    }
-    return subclasses.keySet().iterator();
-  }
-
-  /**
-   * Retreives synonyms for an given phrase if the phrase
-   * is present in the ontology
-   * @param queryKeyPhrase an input string representing a phrase
-   * for which we wish to obtain synonyms.
-   * @return an {@link java.util.Iterator} containing synonyms string tokens
-   * or an empty if no synonyms exist for the given queryKeyPhrase.
-   */
-  @Override
-  public Iterator synonyms(String queryKeyPhrase) {
-
-    Map<?, ?> classMap = retrieve(queryKeyPhrase);
-
-    Map<Object, Object> synonyms = new HashMap<>();
-
-    Iterator<?> iter = classMap.keySet().iterator();
-    while (iter.hasNext()) {
-      OntResource resource = (OntResource) iter.next();
-
-      //listLabels
-      for (Iterator<?> i = resource.listLabels(null); i.hasNext();) {
-        Literal l = (Literal) i.next();
-        synonyms.put(l.toString(), "1");
-      }
-
-      if (resource instanceof Individual) {
-        //get all individuals same as this one
-        for (Iterator<?> i = resource.listSameAs(); i.hasNext();) {
-          Individual individual = (Individual) i.next();
-          //add labels
-          for (Iterator<?> j = individual.listLabels(null); j.hasNext();) {
-            Literal l = (Literal) i.next();
-            synonyms.put(l.toString(), "1");
-          }
-        }
-      } else if (resource instanceof OntClass) {
-        //list equivalent classes
-        for (Iterator<?> i = ((OntClass) resource).listEquivalentClasses(); i.hasNext();) {
-          OntClass equivClass = (OntClass) i.next();
-          //add labels
-          for (Iterator<?> j = equivClass.listLabels(null); j.hasNext();) {
-            Literal l = (Literal) j.next();
-            synonyms.put(l.toString(), "1");
-          }
-        }
-      }
-    }
-
-    return synonyms.keySet().iterator();
-  }
-
-  public void addSearchTerm(String label, OntResource resource) {
-    Map<OntResource, String> m = retrieve(label);
-    if (m == null) {
-      m = new HashMap<>();
-    }
-    m.put(resource, "1");
-    searchTerms.put(label.toLowerCase(), m);
-  }
-
-  /**
-   * A basic lookup function for retrieving keys (phrases or tokens)
-   * from the ontology search terms map. Right now only exact lookups
-   * will retrieve a result... this could be improved by using some
-   * advanced parsing logic... such as Lucene query parser.
-   * @param label the label (phrases or tokens) to retrieve from the 
-   * ontology search terms map.
-   * @return an {@link java.util.Map} if there are match(es)
-   * or an empty {@link java.util.HashMap} if there are no
-   * matches.
-   */
-  public Map<OntResource, String> retrieve(String label) {
-    @SuppressWarnings("unchecked")
-    Map<OntResource, String> m = (Map<OntResource, String>) searchTerms.get(label.toLowerCase());
-    if (m == null) {
-      m = new HashMap<>();
-    }
-    return m;
-  }
-
-  protected static void renderHierarchy(PrintStream out, OntClass cls, List<Object> occurs, int depth) {
-    renderClassDescription(out, cls, depth);
-    out.println();
-
-    // recurse to the next level down
-    if (cls.canAs(OntClass.class) && !occurs.contains(cls)) {
-      for (Iterator<?> i = cls.listSubClasses(true); i.hasNext(); ) {
-        OntClass sub = (OntClass) i.next();
-
-        // we push this expression on the occurs list before we recurse
-        occurs.add(cls);
-        renderHierarchy(out, sub, occurs, depth + 1);
-        occurs.remove(cls);
-      }
-      for (Iterator<?> i = cls.listInstances(); i.hasNext(); ) {
-        Individual individual = (Individual) i.next();
-        renderURI(out, individual.getModel(), individual.getURI());
-        out.print(" [");
-        for (Iterator<?> j = individual.listLabels(null); j.hasNext(); ) {
-          out.print(((Literal) j.next()).getString() + ", ");
-        }
-        out.print("] ");
-        out.println();
-      }
-    }
-  }
-
-  public static void renderClassDescription(PrintStream out, OntClass c, int depth) {
-    indent(out, depth);
-
-    if (c.isRestriction()) {
-      renderRestriction(out, (Restriction) c.as(Restriction.class));
-    } else {
-      if (!c.isAnon()) {
-        out.print("Class ");
-        renderURI(out, c.getModel(), c.getURI());
-
-        out.print(c.getLocalName());
-
-        out.print(" [");
-        for (Iterator<?> i = c.listLabels(null); i.hasNext(); ) {
-          out.print(((Literal) i.next()).getString() + ", ");
-        }
-        out.print("] ");
-      } else {
-        renderAnonymous(out, c, "class");
-      }
-    }
-  }
-
-  protected static void renderRestriction(PrintStream out, Restriction r) {
-    if (!r.isAnon()) {
-      out.print("Restriction ");
-      renderURI(out, r.getModel(), r.getURI());
-    } else {
-      renderAnonymous(out, r, "restriction");
-    }
-
-    out.print(" on property ");
-    renderURI(out, r.getModel(), r.getOnProperty().getURI());
-  }
-
-  protected static void renderURI(PrintStream out, PrefixMapping prefixes, String uri) {
-    out.print(prefixes.expandPrefix(uri));
-  }
-
-  protected static void renderAnonymous(PrintStream out, Resource anon, String name) {
-    String anonID = mAnonIDs.get(anon.getId());
-    if (anonID == null) {
-      anonID = "a-" + mAnonCount++;
-      mAnonIDs.put(anon.getId(), anonID);
-    }
-
-    out.print("Anonymous ");
-    out.print(name);
-    out.print(" with ID ");
-    out.print(anonID);
-  }
-
-  protected static void indent(PrintStream out, int depth) {
-    for (int i = 0; i < depth; i++) {
-      out.print(" ");
-    }
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/OntologyLinkCal.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/OntologyLinkCal.java b/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/OntologyLinkCal.java
deleted file mode 100644
index a68a0cb..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/OntologyLinkCal.java
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ontology.process;
-
-import gov.nasa.jpl.mudrod.discoveryengine.DiscoveryStepAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import org.elasticsearch.action.index.IndexRequest;
-import org.elasticsearch.common.xcontent.XContentBuilder;
-import org.elasticsearch.index.query.QueryBuilders;
-
-import java.io.BufferedReader;
-import java.io.FileReader;
-import java.io.IOException;
-import java.util.Properties;
-import java.util.concurrent.ExecutionException;
-
-import static org.elasticsearch.common.xcontent.XContentFactory.jsonBuilder;
-
-/**
- * Supports ability to parse and process FTP and HTTP log files
- */
-public class OntologyLinkCal extends DiscoveryStepAbstract {
-
-  public OntologyLinkCal(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-    es.deleteAllByQuery(props.getProperty("indexName"), props.getProperty("ontologyLinkageType"), QueryBuilders.matchAllQuery());
-    addSWEETMapping();
-  }
-
-  /**
-   * Method of adding mapping for triples extracted from SWEET
-   */
-  public void addSWEETMapping() {
-    XContentBuilder Mapping;
-    try {
-      Mapping = jsonBuilder().startObject().startObject(props.getProperty("ontologyLinkageType")).startObject("properties").startObject("concept_A").field("type", "string")
-          .field("index", "not_analyzed").endObject().startObject("concept_B").field("type", "string").field("index", "not_analyzed").endObject()
-
-          .endObject().endObject().endObject();
-
-      es.getClient().admin().indices().preparePutMapping(props.getProperty("indexName")).setType(props.getProperty("ontologyLinkageType")).setSource(Mapping).execute().actionGet();
-    } catch (IOException e) {
-      e.printStackTrace();
-    }
-  }
-
-  /**
-   * Method of calculating and importing SWEET triples into Elasticsearch
-   */
-  @Override
-  public Object execute() {
-    es.deleteType(props.getProperty("indexName"), props.getProperty("ontologyLinkageType"));
-    es.createBulkProcessor();
-
-    BufferedReader br = null;
-    String line = "";
-    double weight = 0;
-
-    try {
-      br = new BufferedReader(new FileReader(props.getProperty("oceanTriples")));
-      while ((line = br.readLine()) != null) {
-        String[] strList = line.toLowerCase().split(",");
-        if (strList[1].equals("subclassof")) {
-          weight = 0.75;
-        } else {
-          weight = 0.9;
-        }
-
-        IndexRequest ir = new IndexRequest(props.getProperty("indexName"), props.getProperty("ontologyLinkageType")).source(
-            jsonBuilder().startObject().field("concept_A", es.customAnalyzing(props.getProperty("indexName"), strList[2]))
-                .field("concept_B", es.customAnalyzing(props.getProperty("indexName"), strList[0])).field("weight", weight).endObject());
-        es.getBulkProcessor().add(ir);
-
-      }
-
-    } catch (IOException e) {
-      e.printStackTrace();
-    } catch (InterruptedException e) {
-      e.printStackTrace();
-    } catch (ExecutionException e) {
-      e.printStackTrace();
-    } finally {
-      if (br != null) {
-        try {
-          br.close();
-          es.destroyBulkProcessor();
-          es.refreshIndex();
-        } catch (IOException e) {
-          e.printStackTrace();
-        }
-      }
-    }
-    return null;
-  }
-
-  @Override
-  public Object execute(Object o) {
-    return null;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/OntologyParser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/OntologyParser.java b/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/OntologyParser.java
deleted file mode 100644
index eca6252..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/OntologyParser.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ontology.process;
-
-import org.apache.jena.ontology.OntClass;
-import org.apache.jena.ontology.OntModel;
-
-import gov.nasa.jpl.mudrod.ontology.Ontology;
-
-import java.util.Iterator;
-
-/**
- * Interface for specific ontology parsers e.g. .ttl, RDFXML,
- * etc.
- */
-public interface OntologyParser {
-
-  /**
-   * An ontology model (RDF graph) to parse for literals.
-   *
-   * @param ont the associated {@link gov.nasa.jpl.mudrod.ontology.Ontology}
-   * implementation processing the ontology operation(s).
-   * @param ontModel the {@link org.apache.jena.ontology.OntModel}
-   */
-  public void parse(Ontology ont, OntModel ontModel);
-
-  /**
-   * An ontology model (RDF graph) for which to obtain an
-   * {@link java.util.Iterator} instance of all root classes.
-   *
-   * @param ontModel the {@link org.apache.jena.ontology.OntModel}
-   * @return an {@link java.util.Iterator} instance containing all root classes.
-   */
-  public Iterator<OntClass> rootClasses(OntModel ontModel);
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/OwlParser.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/OwlParser.java b/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/OwlParser.java
deleted file mode 100644
index e43f04d..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/OwlParser.java
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.ontology.process;
-
-import org.apache.jena.ontology.Individual;
-import org.apache.jena.ontology.OntClass;
-import org.apache.jena.ontology.OntModel;
-import org.apache.jena.rdf.model.Literal;
-
-import com.esotericsoftware.minlog.Log;
-
-import gov.nasa.jpl.mudrod.ontology.Ontology;
-
-import java.util.ArrayList;
-import java.util.Iterator;
-import java.util.List;
-import java.util.Set;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
-
-/**
- * {@link gov.nasa.jpl.mudrod.ontology.process.OntologyParser}
- * implementation for <a href="http://www.w3.org/TR/owl-features/">W3C OWL</a> 
- * files.
- */
-public class OwlParser implements OntologyParser {
-  
-  private Ontology ont;
-  private List<OntClass> roots = new ArrayList<>();
-
-  public OwlParser() {
-    //default constructor
-  }
-
-  /**
-   * Parse OWL ontology files using Apache Jena
-   */
-  @Override
-  public void parse(Ontology ont, OntModel m) {
-    this.ont = ont;
-    for (Iterator<OntClass> i = rootClasses(m); i.hasNext(); ) {
-      OntClass c = i.next();
-
-      //dont deal with anonymous classes
-      if (c.isAnon()) {
-        continue;
-      }
-
-      parseClass(c, new ArrayList<>(), 0);
-    }
-  }
-
-  protected void parseClass(OntClass cls, List<Object> occurs, int depth) {
-    //dont deal with anonymous classes
-    if (cls.isAnon()) {
-      return;
-    }
-
-    //add cls to Ontology searchterms
-    //list labels
-    Iterator<?> labelIter = cls.listLabels(null);
-    //if has no labels
-    if (!labelIter.hasNext()) {
-      //add rdf:ID as a label
-      cls.addLabel(rdfidToLabel(cls.getLocalName()), null);
-    }
-    //reset the label iterator
-    labelIter = cls.listLabels(null);
-
-    while (labelIter.hasNext()) {
-      Literal l = (Literal) labelIter.next();
-      ((LocalOntology) ont).addSearchTerm(l.toString(), cls);
-    }
-
-    // recurse to the next level down
-    if (cls.canAs(OntClass.class) && !occurs.contains(cls)) {
-      //list subclasses
-      for (Iterator<?> i = cls.listSubClasses(true); i.hasNext(); ) {
-        OntClass sub = (OntClass) i.next();
-
-        // we push this expression on the occurs list before we recurse
-        occurs.add(cls);
-        parseClass(sub, occurs, depth + 1);
-        occurs.remove(cls);
-      }
-
-      //list instances
-      for (Iterator<?> i = cls.listInstances(); i.hasNext(); ) {
-        //add search terms for each instance
-
-        //list labels
-        Individual individual = (Individual) i.next();
-        for (Iterator<?> j = individual.listLabels(null); j.hasNext(); ) {
-          Literal l = (Literal) j.next();
-          ((LocalOntology) ont).addSearchTerm(l.toString(), individual);
-        }
-      }
-    }
-  }
-
-  /**
-   * Parses out all root classes of the given 
-   * {@link org.apache.jena.ontology.OntModel}
-   * @param m the {@link org.apache.jena.ontology.OntModel} we wish to obtain 
-   * all root classes for.
-   * @return an {@link java.util.Iterator} of {@link org.apache.jena.ontology.OntClass}
-   * elements representing all root classes.
-   */
-  @Override
-  public Iterator<OntClass> rootClasses(OntModel m) {
-    Iterator<?> i = m.listClasses();
-    if (i.hasNext() && i.next() instanceof OntClass) {
-      //assume ontology has root classes
-      processSingle(m);
-    } else {
-      //check for presence of aggregate/collection ontologies such as sweetAll.owl
-      processCollection(m);
-    }
-
-    return roots.iterator();
-  }
-
-  private void processSingle(OntModel m) {
-    for (Iterator<?> i = m.listClasses(); i.hasNext(); ) {
-      OntClass c = (OntClass) i.next();
-      try {
-        // too confusing to list all the restrictions as root classes 
-        if (c.isAnon()) {
-          continue;
-        }
-
-        if (c.hasSuperClass(m.getProfile().THING(), true) || c.getCardinality(m.getProfile().SUB_CLASS_OF()) == 0) {
-          // this class is directly descended from Thing
-          roots.add(c);
-        }
-      } catch (Exception e) {
-        Log.error("Error during extraction or root Classes from Ontology Model: ", e);
-      }
-    }
-  }
-
-  private void processCollection(OntModel m) {
-    for (Iterator<?> i = m.listSubModels(true); i.hasNext(); ) {
-      OntModel ontModel = (OntModel) i.next();
-      processSingle(ontModel);
-    }
-  }
-
-  public String rdfidToLabel(String idString) {
-    Pattern p = Pattern.compile("([a-z0-9])([A-Z])");
-    Matcher m = p.matcher(idString);
-
-    String labelString = idString;
-    while (m.find()) {
-      labelString = labelString.replaceAll(m.group(1) + m.group(2), m.group(1) + " " + m.group(2));
-    }
-    return labelString;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/package-info.java b/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/package-info.java
deleted file mode 100644
index 3447426..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/ontology/process/package-info.java
+++ /dev/null
@@ -1,17 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes ontology processing classes.
- */
-package gov.nasa.jpl.mudrod.ontology.process;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/package-info.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/package-info.java b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/package-info.java
deleted file mode 100644
index 1e5d8bf..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/package-info.java
+++ /dev/null
@@ -1,18 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you 
- * may not use this file except in compliance with the License. 
- * You may obtain a copy of the License at
- * 
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-/**
- * This package includes the preprocessing, processing, and data structure used
- * by recommendation module.
- */
-package gov.nasa.jpl.mudrod.recommendation;
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/ImportMetadata.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/ImportMetadata.java b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/ImportMetadata.java
deleted file mode 100644
index c174f31..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/ImportMetadata.java
+++ /dev/null
@@ -1,111 +0,0 @@
-/*
- * Licensed under the Apache License, Version 2.0 (the "License"); you
- * may not use this file except in compliance with the License.
- * You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package gov.nasa.jpl.mudrod.recommendation.pre;
-
-import com.google.gson.JsonElement;
-import com.google.gson.JsonParser;
-import gov.nasa.jpl.mudrod.discoveryengine.DiscoveryStepAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.main.MudrodConstants;
-import gov.nasa.jpl.mudrod.metadata.pre.ApiHarvester;
-import org.apache.commons.io.IOUtils;
-import org.elasticsearch.action.index.IndexRequest;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.*;
-import java.util.Properties;
-
-/**
- * ClassName: Import Metadata to elasticsearch
- */
-
-public class ImportMetadata extends DiscoveryStepAbstract {
-
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-  private static final Logger LOG = LoggerFactory.getLogger(ApiHarvester.class);
-
-  public ImportMetadata(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-  }
-
-  @Override
-  public Object execute() {
-    LOG.info("Starting Metadata Harvesting");
-    startTime = System.currentTimeMillis();
-    addMetadataMapping();
-    importToES();
-    endTime = System.currentTimeMillis();
-    es.refreshIndex();
-    LOG.info("Finished Metadata Harvesting time elapsed: {}s", (endTime - startTime) / 1000);
-    return null;
-  }
-
-  /**
-   * addMetadataMapping: Add mapping to index metadata in Elasticsearch. Please
-   * invoke this method before import metadata to Elasticsearch.
-   */
-  public void addMetadataMapping() {
-    String mappingJson = "{\r\n   \"dynamic_templates\": " + "[\r\n      " + "{\r\n         \"strings\": " + "{\r\n            \"match_mapping_type\": \"string\","
-        + "\r\n            \"mapping\": {\r\n               \"type\": \"string\"," + "\r\n               \"analyzer\": \"csv\"\r\n            }" + "\r\n         }\r\n      }\r\n   ]\r\n}";
-
-    es.getClient().admin().indices().preparePutMapping(props.getProperty(MudrodConstants.ES_INDEX_NAME)).setType(props.getProperty("recom_metadataType")).setSource(mappingJson).execute().actionGet();
-
-  }
-
-  /**
-   * importToES: Index metadata into elasticsearch from local file directory.
-   * Please make sure metadata have been harvest from web service before
-   * invoking this method.
-   */
-  private void importToES() {
-    es.deleteType(props.getProperty("indexName"), props.getProperty("recom_metadataType"));
-
-    es.createBulkProcessor();
-    File directory = new File(props.getProperty(MudrodConstants.RAW_METADATA_PATH));
-    File[] fList = directory.listFiles();
-    for (File file : fList) {
-      InputStream is;
-      try {
-        is = new FileInputStream(file);
-        try {
-          String jsonTxt = IOUtils.toString(is);
-          JsonParser parser = new JsonParser();
-          JsonElement item = parser.parse(jsonTxt);
-          IndexRequest ir = new IndexRequest(props.getProperty(MudrodConstants.ES_INDEX_NAME), props.getProperty("recom_metadataType")).source(item.toString());
-
-          // preprocessdata
-
-          es.getBulkProcessor().add(ir);
-        } catch (IOException e) {
-          e.printStackTrace();
-        }
-      } catch (FileNotFoundException e) {
-        e.printStackTrace();
-      }
-
-    }
-
-    es.destroyBulkProcessor();
-  }
-
-  @Override
-  public Object execute(Object o) {
-    return null;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/MetadataTFIDFGenerator.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/MetadataTFIDFGenerator.java b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/MetadataTFIDFGenerator.java
deleted file mode 100644
index 02c74f0..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/MetadataTFIDFGenerator.java
+++ /dev/null
@@ -1,100 +0,0 @@
-/**
- * Project Name:mudrod-core
- * File Name:TFIDFGenerator.java
- * Package Name:gov.nasa.jpl.mudrod.recommendation.pre
- * Date:Aug 22, 201612:39:52 PM
- * Copyright (c) 2016, chenzhou1025@126.com All Rights Reserved.
- */
-
-package gov.nasa.jpl.mudrod.recommendation.pre;
-
-import gov.nasa.jpl.mudrod.discoveryengine.DiscoveryStepAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.recommendation.structure.MetadataOpt;
-import gov.nasa.jpl.mudrod.utils.LabeledRowMatrix;
-import gov.nasa.jpl.mudrod.utils.MatrixUtil;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.ArrayList;
-import java.util.List;
-import java.util.Properties;
-
-/**
- * ClassName: Generate TFIDF information of all metadata
- */
-public class MetadataTFIDFGenerator extends DiscoveryStepAbstract {
-
-  private static final long serialVersionUID = 1L;
-  private static final Logger LOG = LoggerFactory.getLogger(MetadataTFIDFGenerator.class);
-
-  /**
-   * Creates a new instance of MatrixGenerator.
-   *
-   * @param props the Mudrod configuration
-   * @param es    the Elasticsearch drive
-   * @param spark the spark drive
-   */
-  public MetadataTFIDFGenerator(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-  }
-
-  @Override
-  public Object execute() {
-
-    LOG.info("Starting Dataset TF_IDF Matrix Generator");
-    startTime = System.currentTimeMillis();
-    try {
-      generateWordBasedTFIDF();
-    } catch (Exception e) {
-      LOG.error("Error during Dataset TF_IDF Matrix Generation: {}", e);
-    }
-    endTime = System.currentTimeMillis();
-
-    LOG.info("Dataset TF_IDF Matrix Generation complete, time elaspsed: {}s", (endTime - startTime) / 1000);
-
-    return null;
-  }
-
-  @Override
-  public Object execute(Object o) {
-    return null;
-  }
-
-  public LabeledRowMatrix generateWordBasedTFIDF() throws Exception {
-
-    MetadataOpt opt = new MetadataOpt(props);
-
-    JavaPairRDD<String, String> metadataContents = opt.loadAll(es, spark);
-
-    JavaPairRDD<String, List<String>> metadataWords = opt.tokenizeData(metadataContents, " ");
-
-    LabeledRowMatrix wordtfidfMatrix = opt.tFIDFTokens(metadataWords, spark);
-
-    MatrixUtil.exportToCSV(wordtfidfMatrix.rowMatrix, wordtfidfMatrix.rowkeys, wordtfidfMatrix.colkeys, props.getProperty("metadata_word_tfidf_matrix"));
-
-    return wordtfidfMatrix;
-  }
-
-  public LabeledRowMatrix generateTermBasedTFIDF() throws Exception {
-
-    MetadataOpt opt = new MetadataOpt(props);
-
-    List<String> variables = new ArrayList<>();
-    variables.add("DatasetParameter-Term");
-    variables.add("DatasetParameter-Variable");
-    variables.add("Dataset-ExtractTerm");
-
-    JavaPairRDD<String, String> metadataContents = opt.loadAll(es, spark, variables);
-
-    JavaPairRDD<String, List<String>> metadataTokens = opt.tokenizeData(metadataContents, ",");
-
-    LabeledRowMatrix tokentfidfMatrix = opt.tFIDFTokens(metadataTokens, spark);
-
-    MatrixUtil.exportToCSV(tokentfidfMatrix.rowMatrix, tokentfidfMatrix.rowkeys, tokentfidfMatrix.colkeys, props.getProperty("metadata_term_tfidf_matrix"));
-
-    return tokentfidfMatrix;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/NormalizeVariables.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/NormalizeVariables.java b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/NormalizeVariables.java
deleted file mode 100644
index f5eaa9c..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/NormalizeVariables.java
+++ /dev/null
@@ -1,223 +0,0 @@
-package gov.nasa.jpl.mudrod.recommendation.pre;
-
-import gov.nasa.jpl.mudrod.discoveryengine.DiscoveryStepAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.action.update.UpdateRequest;
-import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.util.HashMap;
-import java.util.Map;
-import java.util.Properties;
-import java.util.regex.Pattern;
-
-public class NormalizeVariables extends DiscoveryStepAbstract {
-
-  /**
-   *
-   */
-  private static final long serialVersionUID = 1L;
-  private static final Logger LOG = LoggerFactory.getLogger(NormalizeVariables.class);
-  // index name
-  private String indexName;
-  // type name of metadata in ES
-  private String metadataType;
-
-  /**
-   * Creates a new instance of OHEncoder.
-   *
-   * @param props the Mudrod configuration
-   * @param es    an instantiated {@link ESDriver}
-   * @param spark an instantiated {@link SparkDriver}
-   */
-  public NormalizeVariables(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-    indexName = props.getProperty("indexName");
-    metadataType = props.getProperty("recom_metadataType");
-  }
-
-  @Override
-  public Object execute() {
-    LOG.info("*****************processing metadata variables starts******************");
-    startTime = System.currentTimeMillis();
-
-    normalizeMetadataVariables(es);
-
-    endTime = System.currentTimeMillis();
-    LOG.info("*****************processing metadata variables ends******************Took {}s", (endTime - startTime) / 1000);
-
-    return null;
-  }
-
-  @Override
-  public Object execute(Object o) {
-    return null;
-  }
-
-  public void normalizeMetadataVariables(ESDriver es) {
-
-    es.createBulkProcessor();
-
-    SearchResponse scrollResp = es.getClient().prepareSearch(indexName).setTypes(metadataType).setScroll(new TimeValue(60000)).setQuery(QueryBuilders.matchAllQuery()).setSize(100).execute()
-        .actionGet();
-    while (true) {
-      for (SearchHit hit : scrollResp.getHits().getHits()) {
-        Map<String, Object> metadata = hit.getSource();
-        Map<String, Object> updatedValues = new HashMap<>();
-
-        this.normalizeSpatialVariables(metadata, updatedValues);
-        this.normalizeTemporalVariables(metadata, updatedValues);
-        this.normalizeOtherVariables(metadata, updatedValues);
-
-        UpdateRequest ur = es.generateUpdateRequest(indexName, metadataType, hit.getId(), updatedValues);
-        es.getBulkProcessor().add(ur);
-      }
-
-      scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
-      if (scrollResp.getHits().getHits().length == 0) {
-        break;
-      }
-    }
-
-    es.destroyBulkProcessor();
-  }
-
-  private void normalizeOtherVariables(Map<String, Object> metadata, Map<String, Object> updatedValues) {
-    String shortname = (String) metadata.get("Dataset-ShortName");
-    double versionNUm = getVersionNum(shortname);
-    updatedValues.put("Dataset-Derivative-VersionNum", versionNUm);
-
-  }
-
-  private Double getVersionNum(String version) {
-    if (version == null) {
-      return 0.0;
-    }
-    Double versionNum = 0.0;
-    Pattern p = Pattern.compile(".*[a-zA-Z].*");
-    if ("Operational/Near-Real-Time".equals(version)) {
-      versionNum = 2.0;
-    } else if (version.matches("[0-9]{1}[a-zA-Z]{1}")) {
-      versionNum = Double.parseDouble(version.substring(0, 1));
-    } else if (p.matcher(version).find()) {
-      versionNum = 0.0;
-    } else {
-      versionNum = Double.parseDouble(version);
-      if (versionNum >= 5) {
-        versionNum = 20.0;
-      }
-    }
-    return versionNum;
-  }
-
-  private void normalizeSpatialVariables(Map<String, Object> metadata, Map<String, Object> updatedValues) {
-
-    // get spatial resolution
-    Double spatialR;
-    if (metadata.get("Dataset-SatelliteSpatialResolution") != null) {
-      spatialR = (Double) metadata.get("Dataset-SatelliteSpatialResolution");
-    } else {
-      Double gridR = (Double) metadata.get("Dataset-GridSpatialResolution");
-      if (gridR != null) {
-        spatialR = 111 * gridR;
-      } else {
-        spatialR = 25.0;
-      }
-    }
-    updatedValues.put("Dataset-Derivative-SpatialResolution", spatialR);
-
-    // Transform Longitude and calculate coverage area
-    double top = parseDouble((String) metadata.get("DatasetCoverage-NorthLat"));
-    double bottom = parseDouble((String) metadata.get("DatasetCoverage-SouthLat"));
-    double left = parseDouble((String) metadata.get("DatasetCoverage-WestLon"));
-    double right = parseDouble((String) metadata.get("DatasetCoverage-EastLon"));
-
-    if (left > 180) {
-      left = left - 360;
-    }
-
-    if (right > 180) {
-      right = right - 360;
-    }
-
-    if (left == right) {
-      left = -180;
-      right = 180;
-    }
-
-    double area = (top - bottom) * (right - left);
-
-    updatedValues.put("DatasetCoverage-Derivative-EastLon", right);
-    updatedValues.put("DatasetCoverage-Derivative-WestLon", left);
-    updatedValues.put("DatasetCoverage-Derivative-NorthLat", top);
-    updatedValues.put("DatasetCoverage-Derivative-SouthLat", bottom);
-    updatedValues.put("DatasetCoverage-Derivative-Area", area);
-
-    // get processing level
-    String processingLevel = (String) metadata.get("Dataset-ProcessingLevel");
-    double dProLevel = this.getProLevelNum(processingLevel);
-    updatedValues.put("Dataset-Derivative-ProcessingLevel", dProLevel);
-  }
-
-  private void normalizeTemporalVariables(Map<String, Object> metadata, Map<String, Object> updatedValues) {
-
-    String trStr = (String) metadata.get("Dataset-TemporalResolution");
-    if ("".equals(trStr)) {
-      trStr = (String) metadata.get("Dataset-TemporalRepeat");
-    }
-
-    updatedValues.put("Dataset-Derivative-TemporalResolution", covertTimeUnit(trStr));
-  }
-
-  private Double covertTimeUnit(String str) {
-    Double timeInHour;
-    if (str.contains("Hour")) {
-      timeInHour = Double.parseDouble(str.split(" ")[0]);
-    } else if (str.contains("Day")) {
-      timeInHour = Double.parseDouble(str.split(" ")[0]) * 24;
-    } else if (str.contains("Week")) {
-      timeInHour = Double.parseDouble(str.split(" ")[0]) * 24 * 7;
-    } else if (str.contains("Month")) {
-      timeInHour = Double.parseDouble(str.split(" ")[0]) * 24 * 7 * 30;
-    } else if (str.contains("Year")) {
-      timeInHour = Double.parseDouble(str.split(" ")[0]) * 24 * 7 * 30 * 365;
-    } else {
-      timeInHour = 0.0;
-    }
-
-    return timeInHour;
-  }
-
-  public Double getProLevelNum(String pro) {
-    if (pro == null) {
-      return 1.0;
-    }
-    Double proNum = 0.0;
-    Pattern p = Pattern.compile(".*[a-zA-Z].*");
-    if (pro.matches("[0-9]{1}[a-zA-Z]{1}")) {
-      proNum = Double.parseDouble(pro.substring(0, 1));
-    } else if (p.matcher(pro).find()) {
-      proNum = 1.0;
-    } else {
-      proNum = Double.parseDouble(pro);
-    }
-
-    return proNum;
-  }
-
-  private double parseDouble(String strNumber) {
-    if (strNumber != null && strNumber.length() > 0) {
-      try {
-        return Double.parseDouble(strNumber);
-      } catch (Exception e) {
-        return -1;
-      }
-    } else
-      return 0;
-  }
-}

http://git-wip-us.apache.org/repos/asf/incubator-sdap-mudrod/blob/39379fa9/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/SessionCooccurence.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/SessionCooccurence.java b/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/SessionCooccurence.java
deleted file mode 100644
index 2aecce3..0000000
--- a/core/src/main/java/gov/nasa/jpl/mudrod/recommendation/pre/SessionCooccurence.java
+++ /dev/null
@@ -1,152 +0,0 @@
-/**
- * Project Name:mudrod-core
- * File Name:SessionCooccurenceMatrix.java
- * Package Name:gov.nasa.jpl.mudrod.recommendation.pre
- * Date:Aug 19, 20163:06:33 PM
- * Copyright (c) 2016, chenzhou1025@126.com All Rights Reserved.
- */
-
-package gov.nasa.jpl.mudrod.recommendation.pre;
-
-import gov.nasa.jpl.mudrod.discoveryengine.DiscoveryStepAbstract;
-import gov.nasa.jpl.mudrod.driver.ESDriver;
-import gov.nasa.jpl.mudrod.driver.SparkDriver;
-import gov.nasa.jpl.mudrod.main.MudrodConstants;
-import gov.nasa.jpl.mudrod.utils.LabeledRowMatrix;
-import gov.nasa.jpl.mudrod.utils.MatrixUtil;
-import gov.nasa.jpl.mudrod.weblog.structure.SessionExtractor;
-import org.apache.spark.api.java.JavaPairRDD;
-import org.apache.spark.api.java.function.PairFunction;
-import org.elasticsearch.action.search.SearchResponse;
-import org.elasticsearch.common.unit.TimeValue;
-import org.elasticsearch.index.query.QueryBuilders;
-import org.elasticsearch.search.SearchHit;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-import scala.Tuple2;
-
-import java.util.*;
-
-/**
- * ClassName: SessionCooccurenceMatrix Function: Generate metadata session
- * coocucurence matrix from web logs. Each row in the matrix is corresponding to
- * a metadata, and each column is a session.
- */
-public class SessionCooccurence extends DiscoveryStepAbstract {
-
-  private static final long serialVersionUID = 1L;
-  private static final Logger LOG = LoggerFactory.getLogger(SessionCooccurence.class);
-
-  /**
-   * Creates a new instance of SessionCooccurence.
-   *
-   * @param props
-   *          the Mudrod configuration
-   * @param es
-   *          the Elasticsearch drive
-   * @param spark
-   *          the spark driver
-   */
-  public SessionCooccurence(Properties props, ESDriver es, SparkDriver spark) {
-    super(props, es, spark);
-  }
-
-  @Override
-  public Object execute() {
-
-    LOG.info("Starting dataset session-based similarity generation...");
-
-    startTime = System.currentTimeMillis();
-
-    // get all metadata session cooccurance data
-    SessionExtractor extractor = new SessionExtractor();
-    JavaPairRDD<String, List<String>> sessionDatasetRDD = extractor.bulidSessionDatasetRDD(props, es, spark);
-
-    // remove retired datasets
-    JavaPairRDD<String, List<String>> sessionFiltedDatasetsRDD = removeRetiredDataset(es, sessionDatasetRDD);
-    LabeledRowMatrix datasetSessionMatrix = MatrixUtil.createWordDocMatrix(sessionFiltedDatasetsRDD);
-
-    // export
-    MatrixUtil.exportToCSV(datasetSessionMatrix.rowMatrix, datasetSessionMatrix.rowkeys, datasetSessionMatrix.colkeys, props.getProperty("session_metadata_Matrix"));
-
-    endTime = System.currentTimeMillis();
-
-    LOG.info("Completed dataset session-based  similarity generation. Time elapsed: {}s", (endTime - startTime) / 1000);
-
-    return null;
-  }
-
-  @Override
-  public Object execute(Object o) {
-    return null;
-  }
-
-  /**
-   * filter out-of-data metadata
-   *
-   * @param es
-   *          the Elasticsearch drive
-   * @param userDatasetsRDD
-   *          dataset extracted from session
-   * @return filtered session datasets
-   */
-  public JavaPairRDD<String, List<String>> removeRetiredDataset(ESDriver es, JavaPairRDD<String, List<String>> userDatasetsRDD) {
-
-    Map<String, String> nameMap = this.getOnServiceMetadata(es);
-
-    return userDatasetsRDD.mapToPair(new PairFunction<Tuple2<String, List<String>>, String, List<String>>() {
-      /**
-       * 
-       */
-      private static final long serialVersionUID = 1L;
-
-      @Override
-      public Tuple2<String, List<String>> call(Tuple2<String, List<String>> arg0) throws Exception {
-        List<String> oriDatasets = arg0._2;
-        List<String> newDatasets = new ArrayList<>();
-        int size = oriDatasets.size();
-        for (int i = 0; i < size; i++) {
-          String name = oriDatasets.get(i);
-          if (nameMap.containsKey(name)) {
-            newDatasets.add(nameMap.get(name));
-          }
-        }
-        return new Tuple2<>(arg0._1, newDatasets);
-      }
-    });
-
-  }
-
-  /**
-   * getMetadataNameMap: Get on service metadata names, key is lowcase of short
-   * name and value is the original short name
-   *
-   * @param es
-   *          the elasticsearch client
-   * @return a map from lower case metadata name to original metadata name
-   */
-  private Map<String, String> getOnServiceMetadata(ESDriver es) {
-
-    String indexName = props.getProperty(MudrodConstants.ES_INDEX_NAME);
-    String metadataType = props.getProperty("recom_metadataType");
-
-    Map<String, String> shortnameMap = new HashMap<>();
-    SearchResponse scrollResp = es.getClient().prepareSearch(indexName).setTypes(metadataType).setScroll(new TimeValue(60000)).setQuery(QueryBuilders.matchAllQuery()).setSize(100).execute()
-        .actionGet();
-    while (true) {
-      for (SearchHit hit : scrollResp.getHits().getHits()) {
-        Map<String, Object> metadata = hit.getSource();
-        String shortName = (String) metadata.get("Dataset-ShortName");
-        shortnameMap.put(shortName.toLowerCase(), shortName);
-      }
-
-      scrollResp = es.getClient().prepareSearchScroll(scrollResp.getScrollId()).setScroll(new TimeValue(600000)).execute().actionGet();
-      if (scrollResp.getHits().getHits().length == 0) {
-        break;
-      }
-    }
-
-    return shortnameMap;
-  }
-
-}