You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2017/03/24 16:37:39 UTC

[58/62] lucene-solr:branch_6x: SOLR-9221: Remove Solr contribs: map-reduce, morphlines-core and morphlines-cell

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java
deleted file mode 100644
index 24ea936..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.dedup;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Reducer.Context;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.hadoop.HdfsFileFieldNames;
-
-/**
- * UpdateConflictResolver implementation that orders colliding updates ascending
- * from least recent to most recent (partial) update, based on a configurable
- * numeric Solr field, which defaults to the file_last_modified timestamp.
- */
-public class SortingUpdateConflictResolver implements UpdateConflictResolver, Configurable {
-
-  private Configuration conf;
-  private String orderByFieldName = ORDER_BY_FIELD_NAME_DEFAULT;
-  
-  public static final String ORDER_BY_FIELD_NAME_KEY = 
-      SortingUpdateConflictResolver.class.getName() + ".orderByFieldName";
-  
-  public static final String ORDER_BY_FIELD_NAME_DEFAULT = HdfsFileFieldNames.FILE_LAST_MODIFIED;
-
-  @Override
-  public void setConf(Configuration conf) {
-    this.conf = conf;
-    this.orderByFieldName = conf.get(ORDER_BY_FIELD_NAME_KEY, orderByFieldName);
-  }
-
-  @Override
-  public Configuration getConf() {
-    return conf;
-  }
-  
-  protected String getOrderByFieldName() {
-    return orderByFieldName;
-  }
-  
-  @Override
-  public Iterator<SolrInputDocument> orderUpdates(Text key, Iterator<SolrInputDocument> updates, Context ctx) {    
-    return sort(updates, getOrderByFieldName(), new SolrInputDocumentComparator.TimeStampComparator());
-  }
-
-  protected Iterator<SolrInputDocument> sort(Iterator<SolrInputDocument> updates, String fieldName, Comparator child) {
-    // TODO: use an external merge sort in the pathological case where there are a huge amount of collisions
-    List<SolrInputDocument> sortedUpdates = new ArrayList(1); 
-    while (updates.hasNext()) {
-      sortedUpdates.add(updates.next());
-    }
-    if (sortedUpdates.size() > 1) { // conflicts are rare
-      Collections.sort(sortedUpdates, new SolrInputDocumentComparator(fieldName, child));
-    }
-    return sortedUpdates.iterator();
-  }
-    
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java
deleted file mode 100644
index 94e23e1..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.dedup;
-
-import java.util.Iterator;
-
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.Reducer.Context;
-import org.apache.solr.common.SolrInputDocument;
-
-/**
- * Interface that enables deduplication and ordering of a series of document
- * updates for the same unique document key.
- * 
- * For example, a MapReduce batch job might index multiple files in the same job
- * where some of the files contain old and new versions of the very same
- * document, using the same unique document key.
- * 
- * Typically, implementations of this interface forbid collisions by throwing an
- * exception, or ignore all but the most recent document version, or, in the
- * general case, order colliding updates ascending from least recent to most
- * recent (partial) update.
- * 
- * The caller of this interface (i.e. the Hadoop Reducer) will then apply the
- * updates to Solr in the order returned by the orderUpdates() method.
- * 
- * Configuration: If an UpdateConflictResolver implementation also implements
- * {@link Configurable} then the Hadoop Reducer will call
- * {@link Configurable#setConf(org.apache.hadoop.conf.Configuration)} on
- * instance construction and pass the standard Hadoop configuration information.
- */
-public interface UpdateConflictResolver {
-  
-  /**
-   * Given a list of all colliding document updates for the same unique document
-   * key, this method returns zero or more documents in an application specific
-   * order.
-   * 
-   * The caller will then apply the updates for this key to Solr in the order
-   * returned by the orderUpdate() method.
-   * 
-   * @param uniqueKey
-   *          the document key common to all collidingUpdates mentioned below
-   * @param collidingUpdates
-   *          all updates in the MapReduce job that have a key equal to
-   *          {@code uniqueKey} mentioned above. The input order is unspecified.
-   * @param context
-   *          The <code>Context</code> passed from the {@link Reducer}
-   *          implementations.
-   * @return the order in which the updates shall be applied to Solr
-   */
-  Iterator<SolrInputDocument> orderUpdates(
-      Text uniqueKey, Iterator<SolrInputDocument> collidingUpdates, Context context);
-  
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/package-info.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/package-info.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/package-info.java
deleted file mode 100644
index a021282..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/package-info.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- 
-/** 
- * Dedupe related code.
- */
-package org.apache.solr.hadoop.dedup;
-
-
-
-

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java
deleted file mode 100644
index 5ba98ff..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.morphline;
-
-import org.apache.solr.hadoop.Utils;
-
-public enum MorphlineCounters {
-
-  FILES_READ (getClassName(MorphlineMapper.class) + ": Number of files read"),
-
-  FILE_BYTES_READ (getClassName(MorphlineMapper.class) + ": Number of file bytes read"),
-
-  DOCS_READ (getClassName(MorphlineMapper.class) + ": Number of documents read"),
-
-  PARSER_OUTPUT_BYTES (getClassName(MorphlineMapper.class) + ": Number of document bytes generated by Tika parser"),
-
-  ERRORS (getClassName(MorphlineMapper.class) + ": Number of errors");
-
-  private final String label;
-  
-  private MorphlineCounters(String label) {
-    this.label = label;
-  }
-  
-  public String toString() {
-    return label;
-  }
-  
-  private static String getClassName(Class clazz) {
-    return Utils.getShortClassName(clazz);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java
deleted file mode 100644
index 372c48b..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.morphline;
-
-import java.io.BufferedInputStream;
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.invoke.MethodHandles;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.stream.Collectors;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.mapreduce.Mapper.Context;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.hadoop.HdfsFileFieldNames;
-import org.apache.solr.hadoop.PathParts;
-import org.apache.solr.hadoop.Utils;
-import org.apache.solr.morphlines.solr.DocumentLoader;
-import org.apache.solr.morphlines.solr.SolrLocator;
-import org.apache.solr.morphlines.solr.SolrMorphlineContext;
-import org.apache.solr.schema.IndexSchema;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.kitesdk.morphline.api.Command;
-import org.kitesdk.morphline.api.MorphlineCompilationException;
-import org.kitesdk.morphline.api.MorphlineContext;
-import org.kitesdk.morphline.api.Record;
-import org.kitesdk.morphline.base.Compiler;
-import org.kitesdk.morphline.base.FaultTolerance;
-import org.kitesdk.morphline.base.Fields;
-import org.kitesdk.morphline.base.Metrics;
-import org.kitesdk.morphline.base.Notifications;
-import com.codahale.metrics.MetricRegistry;
-import com.codahale.metrics.Timer;
-import com.google.common.annotations.Beta;
-import com.typesafe.config.Config;
-import com.typesafe.config.ConfigFactory;
-
-/**
- * Internal helper for {@link MorphlineMapper} and dryRun mode; This API is for *INTERNAL* use only
- * and should not be considered public.
- */
-@Beta
-public final class MorphlineMapRunner {
-
-  private MorphlineContext morphlineContext;
-  private Command morphline;
-  private IndexSchema schema;
-  private Map<String, String> commandLineMorphlineHeaders;
-  private boolean disableFileOpen;
-  private String morphlineFileAndId;
-  private final Timer elapsedTime;   
-  
-  public static final String MORPHLINE_FILE_PARAM = "morphlineFile";
-  public static final String MORPHLINE_ID_PARAM = "morphlineId";
-  
-  /**
-   * Morphline variables can be passed from the CLI to the Morphline, e.g.:
-   * hadoop ... -D morphlineVariable.zkHost=127.0.0.1:2181/solr
-   */
-  public static final String MORPHLINE_VARIABLE_PARAM = "morphlineVariable";
-  
-  /**
-   * Headers, including MIME types, can also explicitly be passed by force from the CLI to Morphline, e.g:
-   * hadoop ... -D morphlineField._attachment_mimetype=text/csv
-   */
-  public static final String MORPHLINE_FIELD_PREFIX = "morphlineField.";
-  
-  /**
-   * Flag to disable reading of file contents if indexing just file metadata is sufficient. 
-   * This improves performance and confidentiality.
-   */
-  public static final String DISABLE_FILE_OPEN = "morphlineDisableFileOpen";
-  
-  private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-  
-  MorphlineContext getMorphlineContext() {
-    return morphlineContext;
-  }
-  
-  IndexSchema getSchema() {
-    return schema;
-  }
-
-  public MorphlineMapRunner(Configuration configuration, DocumentLoader loader, String solrHomeDir) throws IOException {
-    if (LOG.isTraceEnabled()) {
-      LOG.trace("CWD is {}", new File(".").getCanonicalPath());
-      TreeMap map = new TreeMap();
-      for (Map.Entry<String,String> entry : configuration) {
-        map.put(entry.getKey(), entry.getValue());
-      }
-      LOG.trace("Configuration:\n" +
-          map.entrySet().stream().map(Object::toString).collect(Collectors.joining("\n")));
-    }
-    
-    FaultTolerance faultTolerance = new FaultTolerance(
-        configuration.getBoolean(FaultTolerance.IS_PRODUCTION_MODE, false), 
-        configuration.getBoolean(FaultTolerance.IS_IGNORING_RECOVERABLE_EXCEPTIONS, false),
-        configuration.get(FaultTolerance.RECOVERABLE_EXCEPTION_CLASSES, SolrServerException.class.getName())        
-        );
-    
-    morphlineContext = new SolrMorphlineContext.Builder()
-      .setDocumentLoader(loader)
-      .setExceptionHandler(faultTolerance)
-      .setMetricRegistry(new MetricRegistry())
-      .build();
-
-    class MySolrLocator extends SolrLocator { // trick to access protected ctor
-      public MySolrLocator(MorphlineContext ctx) {
-        super(ctx);
-      }
-    }
-
-    SolrLocator locator = new MySolrLocator(morphlineContext);
-    locator.setSolrHomeDir(solrHomeDir);
-    schema = locator.getIndexSchema();
-
-    // rebuild context, now with schema
-    morphlineContext = new SolrMorphlineContext.Builder()
-      .setIndexSchema(schema)
-      .setDocumentLoader(loader)
-      .setExceptionHandler(faultTolerance)
-      .setMetricRegistry(morphlineContext.getMetricRegistry())
-      .build();
-
-    String morphlineFile = configuration.get(MORPHLINE_FILE_PARAM);
-    String morphlineId = configuration.get(MORPHLINE_ID_PARAM);
-    if (morphlineFile == null || morphlineFile.trim().length() == 0) {
-      throw new MorphlineCompilationException("Missing parameter: " + MORPHLINE_FILE_PARAM, null);
-    }
-    Map morphlineVariables = new HashMap();
-    for (Map.Entry<String, String> entry : configuration) {
-      String variablePrefix = MORPHLINE_VARIABLE_PARAM + ".";
-      if (entry.getKey().startsWith(variablePrefix)) {
-        morphlineVariables.put(entry.getKey().substring(variablePrefix.length()), entry.getValue());
-      }
-    }
-    Config override = ConfigFactory.parseMap(morphlineVariables);
-    morphline = new Compiler().compile(new File(morphlineFile), morphlineId, morphlineContext, null, override);
-    morphlineFileAndId = morphlineFile + "@" + morphlineId;
-    
-    disableFileOpen = configuration.getBoolean(DISABLE_FILE_OPEN, false);
-    LOG.debug("disableFileOpen: {}", disableFileOpen);
-        
-    commandLineMorphlineHeaders = new HashMap();
-    for (Map.Entry<String,String> entry : configuration) {     
-      if (entry.getKey().startsWith(MORPHLINE_FIELD_PREFIX)) {
-        commandLineMorphlineHeaders.put(entry.getKey().substring(MORPHLINE_FIELD_PREFIX.length()), entry.getValue());
-      }
-    }
-    LOG.debug("Headers, including MIME types, passed by force from the CLI to morphline: {}", commandLineMorphlineHeaders);
-
-    String metricName = MetricRegistry.name(Utils.getShortClassName(getClass()), Metrics.ELAPSED_TIME);
-    this.elapsedTime = morphlineContext.getMetricRegistry().timer(metricName);
-    Notifications.notifyBeginTransaction(morphline);
-  }
-
-  /**
-   * Extract content from the path specified in the value. Key is useless.
-   */
-  public void map(String value, Configuration configuration, Context context) throws IOException {
-    LOG.info("Processing file {}", value);
-    InputStream in = null;
-    Record record = null;
-    Timer.Context timerContext = elapsedTime.time();
-    try {
-      PathParts parts = new PathParts(value.toString(), configuration);
-      record = getRecord(parts);
-      if (record == null) {
-        return; // ignore
-      }
-      for (Map.Entry<String, String> entry : commandLineMorphlineHeaders.entrySet()) {
-        record.replaceValues(entry.getKey(), entry.getValue());
-      }
-      long fileLength = parts.getFileStatus().getLen();
-      if (disableFileOpen) {
-        in = new ByteArrayInputStream(new byte[0]);
-      } else {
-        in = new BufferedInputStream(parts.getFileSystem().open(parts.getUploadPath()));
-      }
-      record.put(Fields.ATTACHMENT_BODY, in);
-      Notifications.notifyStartSession(morphline);
-      if (!morphline.process(record)) {
-        LOG.warn("Morphline {} failed to process record: {}", morphlineFileAndId, record);
-      }
-      if (context != null) {
-        context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.FILES_READ.toString()).increment(1);
-        context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.FILE_BYTES_READ.toString()).increment(fileLength);
-      }
-    } catch (Exception e) {
-      LOG.error("Unable to process file " + value, e);
-      if (context != null) {
-        context.getCounter(getClass().getName() + ".errors", e.getClass().getName()).increment(1);
-      }
-      morphlineContext.getExceptionHandler().handleException(e, record);
-    } finally {
-      timerContext.stop();
-      if (in != null) {
-        in.close();
-      }
-    }
-  }
-  
-  protected Record getRecord(PathParts parts) {
-    FileStatus stats;
-    try {
-      stats = parts.getFileStatus();
-    } catch (IOException e) {
-      stats = null;
-    }
-    if (stats == null) {
-      LOG.warn("Ignoring file that somehow has become unavailable since the job was submitted: {}",
-          parts.getUploadURL());
-      return null;
-    }
-    
-    Record headers = new Record();
-    //headers.put(getSchema().getUniqueKeyField().getName(), parts.getId()); // use HDFS file path as docId if no docId is specified
-    headers.put(Fields.BASE_ID, parts.getId()); // with sanitizeUniqueKey command, use HDFS file path as docId if no docId is specified
-    headers.put(Fields.ATTACHMENT_NAME, parts.getName()); // Tika can use the file name in guessing the right MIME type
-    
-    // enable indexing and storing of file meta data in Solr
-    headers.put(HdfsFileFieldNames.FILE_UPLOAD_URL, parts.getUploadURL());
-    headers.put(HdfsFileFieldNames.FILE_DOWNLOAD_URL, parts.getDownloadURL());
-    headers.put(HdfsFileFieldNames.FILE_SCHEME, parts.getScheme()); 
-    headers.put(HdfsFileFieldNames.FILE_HOST, parts.getHost()); 
-    headers.put(HdfsFileFieldNames.FILE_PORT, String.valueOf(parts.getPort())); 
-    headers.put(HdfsFileFieldNames.FILE_PATH, parts.getURIPath()); 
-    headers.put(HdfsFileFieldNames.FILE_NAME, parts.getName());     
-    headers.put(HdfsFileFieldNames.FILE_LAST_MODIFIED, String.valueOf(stats.getModificationTime())); // FIXME also add in SpoolDirectorySource
-    headers.put(HdfsFileFieldNames.FILE_LENGTH, String.valueOf(stats.getLen())); // FIXME also add in SpoolDirectorySource
-    headers.put(HdfsFileFieldNames.FILE_OWNER, stats.getOwner());
-    headers.put(HdfsFileFieldNames.FILE_GROUP, stats.getGroup());
-    headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_USER, stats.getPermission().getUserAction().SYMBOL);
-    headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_GROUP, stats.getPermission().getGroupAction().SYMBOL);
-    headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_OTHER, stats.getPermission().getOtherAction().SYMBOL);
-    headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_STICKYBIT, String.valueOf(stats.getPermission().getStickyBit()));
-    // TODO: consider to add stats.getAccessTime(), stats.getReplication(), stats.isSymlink(), stats.getBlockSize()
-    
-    return headers;
-  }
-
-  public void cleanup() {
-    Notifications.notifyCommitTransaction(morphline);
-    Notifications.notifyShutdown(morphline);
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java
deleted file mode 100644
index 8ba2ea6..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.morphline;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.Collection;
-import java.util.Map;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.response.SolrPingResponse;
-import org.apache.solr.client.solrj.response.UpdateResponse;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.SolrInputField;
-import org.apache.solr.hadoop.HeartBeater;
-import org.apache.solr.hadoop.SolrInputDocumentWritable;
-import org.apache.solr.hadoop.SolrMapper;
-import org.apache.solr.morphlines.solr.DocumentLoader;
-import org.apache.solr.schema.IndexSchema;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.codahale.metrics.Counter;
-import com.codahale.metrics.Counting;
-import com.codahale.metrics.Histogram;
-import com.codahale.metrics.Meter;
-import com.codahale.metrics.MetricRegistry;
-import com.codahale.metrics.Timer;
-
-/**
- * This class takes the input files, extracts the relevant content, transforms
- * it and hands SolrInputDocuments to a set of reducers.
- * 
- * More specifically, it consumes a list of &lt;offset, hdfsFilePath&gt; input pairs.
- * For each such pair extracts a set of zero or more SolrInputDocuments and
- * sends them to a downstream Reducer. The key for the reducer is the unique id
- * of the SolrInputDocument specified in Solr schema.xml.
- */
-public class MorphlineMapper extends SolrMapper<LongWritable, Text> {
-
-  private Context context;
-  private MorphlineMapRunner runner;
-  private HeartBeater heartBeater;
-  
-  private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-  
-  protected IndexSchema getSchema() {
-    return runner.getSchema();
-  }
-
-  protected Context getContext() {
-    return context;
-  }
-
-  @Override
-  protected void setup(Context context) throws IOException, InterruptedException {
-    super.setup(context);
-    this.context = context;
-    heartBeater = new HeartBeater(context);
-    this.runner = new MorphlineMapRunner(
-        context.getConfiguration(), new MyDocumentLoader(), getSolrHomeDir().toString());
-  }
-
-  /**
-   * Extract content from the path specified in the value. Key is useless.
-   */
-  @Override
-  public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
-    heartBeater.needHeartBeat();
-    try {
-      runner.map(value.toString(), context.getConfiguration(), context);
-    } finally {
-      heartBeater.cancelHeartBeat();
-    }
-  }
-  
-  @Override
-  protected void cleanup(Context context) throws IOException, InterruptedException {
-    heartBeater.close();
-    runner.cleanup();
-    addMetricsToMRCounters(runner.getMorphlineContext().getMetricRegistry(), context);
-    super.cleanup(context);
-  }
-
-  private void addMetricsToMRCounters(MetricRegistry metricRegistry, Context context) {
-    for (Map.Entry<String, Counter> entry : metricRegistry.getCounters().entrySet()) {
-      addCounting(entry.getKey(),  entry.getValue(), 1);
-    }
-    for (Map.Entry<String, Histogram> entry : metricRegistry.getHistograms().entrySet()) {
-      addCounting(entry.getKey(),  entry.getValue(), 1);
-    }
-    for (Map.Entry<String, Meter> entry : metricRegistry.getMeters().entrySet()) {
-      addCounting(entry.getKey(), entry.getValue(), 1);
-    }
-    for (Map.Entry<String, Timer> entry : metricRegistry.getTimers().entrySet()) {
-      long nanosPerMilliSec = 1000 * 1000;
-      addCounting(entry.getKey(), entry.getValue(), nanosPerMilliSec);
-    }
-  }
-  
-  private void addCounting(String metricName, Counting value, long scale) {
-    context.getCounter("morphline", metricName).increment(value.getCount() / scale);
-  }
-  
-  ///////////////////////////////////////////////////////////////////////////////
-  // Nested classes:
-  ///////////////////////////////////////////////////////////////////////////////
-  private final class MyDocumentLoader implements DocumentLoader {
-
-    @Override
-    public void beginTransaction() {
-    }
-
-    @Override
-    public void load(SolrInputDocument doc) throws IOException, SolrServerException {
-      String uniqueKeyFieldName = getSchema().getUniqueKeyField().getName();
-      Object id = doc.getFieldValue(uniqueKeyFieldName);
-      if (id == null) {
-        throw new IllegalArgumentException("Missing value for (required) unique document key: " + uniqueKeyFieldName
-            + " (see Solr schema.xml)");
-      }
-      try {
-        context.write(new Text(id.toString()), new SolrInputDocumentWritable(doc));
-      } catch (InterruptedException e) {
-        throw new IOException("Interrupted while writing " + doc, e);
-      }
-
-      if (LOG.isDebugEnabled()) {
-        long numParserOutputBytes = 0;
-        for (SolrInputField field : doc.values()) {
-          numParserOutputBytes += sizeOf(field.getValue());
-        }
-        context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.PARSER_OUTPUT_BYTES.toString()).increment(numParserOutputBytes);
-      }
-      context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.DOCS_READ.toString()).increment(1);
-    }
-
-    // just an approximation
-    private long sizeOf(Object value) {
-      if (value instanceof CharSequence) {
-        return ((CharSequence) value).length();
-      } else if (value instanceof Integer) {
-        return 4;
-      } else if (value instanceof Long) {
-        return 8;
-      } else if (value instanceof Collection) {
-        long size = 0;
-        for (Object val : (Collection) value) {
-          size += sizeOf(val);
-        }
-        return size;      
-      } else {
-        return String.valueOf(value).length();
-      }
-    }
-
-    @Override
-    public void commitTransaction() {
-    }
-
-    @Override
-    public UpdateResponse rollbackTransaction() throws SolrServerException, IOException {
-      return new UpdateResponse();
-    }
-
-    @Override
-    public void shutdown() {
-    }
-
-    @Override
-    public SolrPingResponse ping() throws SolrServerException, IOException {
-      return new SolrPingResponse();
-    }
-
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/package-info.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/package-info.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/package-info.java
deleted file mode 100644
index c754f79..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/package-info.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- 
-/** 
- * Morphlines related code.
- */
-package org.apache.solr.hadoop.morphline;
-
-
-
-

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/package-info.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/package-info.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/package-info.java
deleted file mode 100644
index 043a150..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/package-info.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
- 
-/** 
- * {@link org.apache.solr.hadoop.MapReduceIndexerTool} and related code.
- */
-package org.apache.solr.hadoop;
-
-
-
-

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/overview.html
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/overview.html b/solr/contrib/map-reduce/src/java/overview.html
deleted file mode 100644
index ad7c1c0..0000000
--- a/solr/contrib/map-reduce/src/java/overview.html
+++ /dev/null
@@ -1,21 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<body>
-Apache Solr Search Server: Solr MapReduce contrib
-</body>
-</html>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test-files/README.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test-files/README.txt b/solr/contrib/map-reduce/src/test-files/README.txt
deleted file mode 100644
index 8905df2..0000000
--- a/solr/contrib/map-reduce/src/test-files/README.txt
+++ /dev/null
@@ -1 +0,0 @@
-The test-files by this module are located in the morphlines-core module.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/AlphaNumericComparatorTest.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/AlphaNumericComparatorTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/AlphaNumericComparatorTest.java
deleted file mode 100644
index cab29e7..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/AlphaNumericComparatorTest.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.util.Comparator;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-public class AlphaNumericComparatorTest extends Assert {
-
-  @Test
-  public void testBasic() {
-    Comparator c = new AlphaNumericComparator();
-    assertTrue(c.compare("a", "b") < 0);
-    assertTrue(c.compare("shard1", "shard1") == 0);
-    //assertTrue(c.compare("shard01", "shard1") == 0);
-    assertTrue(c.compare("shard10", "shard10") == 0);
-    assertTrue(c.compare("shard1", "shard2") < 0);
-    assertTrue(c.compare("shard9", "shard10") < 0);
-    assertTrue(c.compare("shard09", "shard10") < 0);
-    assertTrue(c.compare("shard019", "shard10") > 0);
-    assertTrue(c.compare("shard10", "shard11") < 0);
-    assertTrue(c.compare("shard10z", "shard10z") == 0);
-    assertTrue(c.compare("shard10z", "shard11z") < 0);
-    assertTrue(c.compare("shard10a", "shard10z") < 0);
-    assertTrue(c.compare("shard10z", "shard10a") > 0);
-    assertTrue(c.compare("shard1z", "shard1z") == 0);
-    assertTrue(c.compare("shard2", "shard1") > 0);
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityMapper.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityMapper.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityMapper.java
deleted file mode 100644
index 9ba70d5..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityMapper.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class IdentityMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
-
-  private static final Logger LOGGER = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
-  @Override
-  protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
-    LOGGER.info("map key: {}, value: {}", key, value);
-    context.write(value, NullWritable.get());
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityReducer.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityReducer.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityReducer.java
deleted file mode 100644
index 1c248c6..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityReducer.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class IdentityReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
-
-  private static final Logger LOGGER = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
-  @Override
-  protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
-    LOGGER.info("reduce key: {}, value: {}", key, values);
-    context.write(key, NullWritable.get());
-  }
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java
deleted file mode 100644
index 2354fdd..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
-import org.apache.hadoop.mrunit.types.Pair;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-public class LineRandomizerMapperReducerTest extends Assert {
-
-  private MapReduceDriver<LongWritable, Text, LongWritable, Text, Text, NullWritable> mapReduceDriver;
-
-  @Before
-  public void setUp() {
-    LineRandomizerMapper mapper = new LineRandomizerMapper();
-    LineRandomizerReducer reducer = new LineRandomizerReducer();
-    mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer);
-  }
-
-  @Test
-  public void testMapReduce1Item() throws IOException {
-    mapReduceDriver.withInput(new LongWritable(0), new Text("hello"));
-    mapReduceDriver.withOutput(new Text("hello"), NullWritable.get());
-    mapReduceDriver.runTest();
-  }
-  
-  @Test
-  public void testMapReduce2Items() throws IOException {
-    mapReduceDriver.withAll(Arrays.asList(
-        new Pair<>(new LongWritable(0), new Text("hello")),
-        new Pair<>(new LongWritable(1), new Text("world"))
-        ));
-    mapReduceDriver.withAllOutput(Arrays.asList(
-        new Pair<>(new Text("world"), NullWritable.get()),
-        new Pair<>(new Text("hello"), NullWritable.get())
-        ));
-    mapReduceDriver.runTest();
-  }
-  
-  @Test
-  public void testMapReduce3Items() throws IOException {
-    mapReduceDriver.withAll(Arrays.asList(
-        new Pair<>(new LongWritable(0), new Text("hello")),
-        new Pair<>(new LongWritable(1), new Text("world")),
-        new Pair<>(new LongWritable(2), new Text("nadja"))
-        ));
-    mapReduceDriver.withAllOutput(Arrays.asList(
-        new Pair<>(new Text("nadja"), NullWritable.get()),
-        new Pair<>(new Text("world"), NullWritable.get()),
-        new Pair<>(new Text("hello"), NullWritable.get())
-        ));
-    mapReduceDriver.runTest();
-  }
-  
-  @Test
-  public void testMapReduce4Items() throws IOException {
-    mapReduceDriver.withAll(Arrays.asList(
-        new Pair<>(new LongWritable(0), new Text("hello")),
-        new Pair<>(new LongWritable(1), new Text("world")),
-        new Pair<>(new LongWritable(2), new Text("nadja")),
-        new Pair<>(new LongWritable(3), new Text("basti"))
-        ));
-    mapReduceDriver.withAllOutput(Arrays.asList(
-        new Pair<>(new Text("nadja"), NullWritable.get()),
-        new Pair<>(new Text("world"), NullWritable.get()),
-        new Pair<>(new Text("basti"), NullWritable.get()),
-        new Pair<>(new Text("hello"), NullWritable.get())
-        ));
-    mapReduceDriver.runTest();
-  }
-  
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java
deleted file mode 100644
index 558d662..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.util.Locale;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.hadoop.morphline.MorphlineMapRunner;
-import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-
-public abstract class MRUnitBase extends SolrTestCaseJ4 {
-  
-  protected static final String RESOURCES_DIR = getFile("morphlines-core.marker").getParent();
-  protected static final String DOCUMENTS_DIR = RESOURCES_DIR + "/test-documents";
-  protected static File solrHomeZip;
-
-  @BeforeClass
-  public static void setupClass() throws Exception {
-    assumeFalse("This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/SOLR-6387)",
-        new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage()));
-    solrHomeZip = SolrOutputFormat.createSolrHomeZip(new File(RESOURCES_DIR + "/solr/mrunit"));
-    assertNotNull(solrHomeZip);
-  }
-
-  @AfterClass
-  public static void teardownClass() throws Exception {
-    if (solrHomeZip != null) Files.delete(solrHomeZip.toPath());
-    solrHomeZip = null;
-  }
-  
-  protected void setupHadoopConfig(Configuration config) throws IOException {
-    
-    String tempDir = createTempDir().toFile().getAbsolutePath();
-
-    FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml"));
-
-    AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", true);
-    
-    config.set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, tempDir + "/test-morphlines/solrCellDocumentTypes.conf");
-    config.set(SolrOutputFormat.ZIP_NAME, solrHomeZip.getName());
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java
deleted file mode 100644
index 1aebcf7..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java
+++ /dev/null
@@ -1,468 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Locale;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.lucene.util.Constants;
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.cloud.AbstractZkTestCase;
-import org.apache.solr.hadoop.dedup.NoChangeUpdateConflictResolver;
-import org.apache.solr.hadoop.dedup.RetainMostRecentUpdateConflictResolver;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class MapReduceIndexerToolArgumentParserTest extends SolrTestCaseJ4 {
-  
-  private Configuration conf; 
-  private MapReduceIndexerTool.MyArgumentParser parser;
-  private MapReduceIndexerTool.Options opts;
-  private PrintStream oldSystemOut;
-  private PrintStream oldSystemErr;
-  private ByteArrayOutputStream bout;
-  private ByteArrayOutputStream berr;
-  
-  private static final String RESOURCES_DIR = getFile("morphlines-core.marker").getParent();  
-  private static final File MINIMR_INSTANCE_DIR = new File(RESOURCES_DIR + "/solr/minimr");
-
-  private static final String MORPHLINE_FILE = RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf";
-    
-  private final File solrHomeDirectory = createTempDir().toFile();
-  
-  @BeforeClass
-  public static void beforeClass() {
-    assumeFalse("Does not work on Windows, because it uses UNIX shell commands or POSIX paths", Constants.WINDOWS);
-    assumeFalse("This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/SOLR-6387)",
-                new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage()));
-  }
-  
-  @Before
-  public void setUp() throws Exception {
-    super.setUp();
-    AbstractZkTestCase.SOLRHOME = solrHomeDirectory;
-    FileUtils.copyDirectory(MINIMR_INSTANCE_DIR, solrHomeDirectory);
-    
-    conf = new Configuration();
-    parser = new MapReduceIndexerTool.MyArgumentParser();
-    opts = new MapReduceIndexerTool.Options();
-    oldSystemOut = System.out;
-    bout = new ByteArrayOutputStream();
-    System.setOut(new PrintStream(bout, true, "UTF-8"));
-    oldSystemErr = System.err;
-    berr = new ByteArrayOutputStream();
-    System.setErr(new PrintStream(berr, true, "UTF-8"));
-  }
-
-  @After
-  public void tearDown() throws Exception {
-    super.tearDown();
-    System.setOut(oldSystemOut);
-    System.setErr(oldSystemErr);
-  }
-
-  @Test
-  public void testArgsParserTypicalUse() {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--morphline-id", "morphline_xyz",
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
-        "--mappers", "10", 
-        "--reducers", "9", 
-        "--fanout", "8", 
-        "--max-segments", "7", 
-        "--shards", "1",
-        "--verbose", 
-        "file:///home",
-        "file:///dev",
-        };
-    Integer res = parser.parseArgs(args, conf, opts);
-    assertNull(res != null ? res.toString() : "", res);
-    assertEquals(Collections.singletonList(new Path("file:///tmp")), opts.inputLists);
-    assertEquals(new Path("file:/tmp/foo"), opts.outputDir);
-    assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir);
-    assertEquals(10, opts.mappers);
-    assertEquals(9, opts.reducers);
-    assertEquals(8, opts.fanout);
-    assertEquals(7, opts.maxSegments);
-    assertEquals(new Integer(1), opts.shards);
-    assertEquals(null, opts.fairSchedulerPool);
-    assertTrue(opts.isVerbose);
-    assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles);
-    assertEquals(RetainMostRecentUpdateConflictResolver.class.getName(), opts.updateConflictResolver);
-    assertEquals(MORPHLINE_FILE, opts.morphlineFile.getPath());
-    assertEquals("morphline_xyz", opts.morphlineId);
-    assertEmptySystemErrAndEmptySystemOut();
-  }
-
-  @Test
-  public void testArgsParserMultipleSpecsOfSameKind() {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--input-list", "file:///",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--shards", "1",
-        "file:///home",
-        "file:///dev",
-        };
-    assertNull(parser.parseArgs(args, conf, opts));
-    assertEquals(Arrays.asList(new Path("file:///tmp"), new Path("file:///")), opts.inputLists);
-    assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles);
-    assertEquals(new Path("file:/tmp/foo"), opts.outputDir);
-    assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir);
-    assertEmptySystemErrAndEmptySystemOut();
-  }
-
-  @Test
-  public void testArgsParserTypicalUseWithEqualsSign() {
-    String[] args = new String[] { 
-        "--input-list=file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir=file:/tmp/foo",
-        "--solr-home-dir=" + MINIMR_INSTANCE_DIR.getPath(), 
-        "--mappers=10", 
-        "--shards", "1",
-        "--verbose", 
-        "file:///home",
-        "file:///dev",
-        };
-    assertNull(parser.parseArgs(args, conf, opts));
-    assertEquals(Collections.singletonList(new Path("file:///tmp")), opts.inputLists);
-    assertEquals(new Path("file:/tmp/foo"), opts.outputDir);
-    assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir);
-    assertEquals(10, opts.mappers);
-    assertEquals(new Integer(1), opts.shards);
-    assertEquals(null, opts.fairSchedulerPool);
-    assertTrue(opts.isVerbose);
-    assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles);
-    assertEmptySystemErrAndEmptySystemOut();
-  }
-
-  @Test
-  public void testArgsParserMultipleSpecsOfSameKindWithEqualsSign() {
-    String[] args = new String[] { 
-        "--input-list=file:///tmp",
-        "--input-list=file:///",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir=file:/tmp/foo",
-        "--solr-home-dir=" + MINIMR_INSTANCE_DIR.getPath(), 
-        "--shards", "1",
-        "file:///home",
-        "file:///dev",
-        };
-    assertNull(parser.parseArgs(args, conf, opts));
-    assertEquals(Arrays.asList(new Path("file:///tmp"), new Path("file:///")), opts.inputLists);
-    assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles);
-    assertEquals(new Path("file:/tmp/foo"), opts.outputDir);
-    assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir);
-    assertEmptySystemErrAndEmptySystemOut();
-  }
-
-  @Test
-  public void testArgsParserHelp() throws UnsupportedEncodingException  {
-    String[] args = new String[] { "--help" };
-    assertEquals(new Integer(0), parser.parseArgs(args, conf, opts));
-    String helpText = new String(bout.toByteArray(), StandardCharsets.UTF_8);
-    assertTrue(helpText.contains("MapReduce batch job driver that "));
-    assertTrue(helpText.contains("bin/hadoop command"));
-    assertEquals(0, berr.toByteArray().length);
-  }
-  
-  @Test
-  public void testArgsParserOk() {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--shards", "1",
-        };
-    assertNull(parser.parseArgs(args, conf, opts));
-    assertEquals(new Integer(1), opts.shards);
-    assertEmptySystemErrAndEmptySystemOut();
-  }
-
-  @Test
-  public void testArgsParserUpdateConflictResolver() {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--shards", "1",
-        "--update-conflict-resolver", NoChangeUpdateConflictResolver.class.getName(),
-        };
-    assertNull(parser.parseArgs(args, conf, opts));
-    assertEquals(NoChangeUpdateConflictResolver.class.getName(), opts.updateConflictResolver);
-    assertEmptySystemErrAndEmptySystemOut();
-  }
-
-  @Test
-  public void testArgsParserUnknownArgName() throws Exception {
-    String[] args = new String[] { 
-        "--xxxxxxxxinputlist", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--shards", "1",
-        };
-    assertArgumentParserException(args);
-  }
-
-  @Test
-  public void testArgsParserFileNotFound1() throws Exception {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/fileNotFound/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--shards", "1",
-        };
-    assertArgumentParserException(args);
-  }
-  
-  @Test
-  public void testArgsParserFileNotFound2() throws Exception {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", "/fileNotFound", 
-        "--shards", "1",
-        };
-    assertArgumentParserException(args);
-  }
-  
-  @Test
-  public void testArgsParserIntOutOfRange() throws Exception {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--shards", "1",
-        "--mappers", "-20"
-        };
-    assertArgumentParserException(args);
-  }
-  
-  @Test
-  public void testArgsParserIllegalFanout() throws Exception {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--shards", "1",
-        "--fanout", "1" // must be >= 2
-        };
-    assertArgumentParserException(args);
-  }
-  
-  @Test
-  public void testArgsParserSolrHomeMustContainSolrConfigFile() throws Exception {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--shards", "1",
-        "--solr-home-dir", "/",
-        };
-    assertArgumentParserException(args);
-  }
-
-  @Test
-  public void testArgsShardUrlOk() {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--shard-url", "http://localhost:8983/solr/collection1",
-        "--shard-url", "http://localhost:8983/solr/collection2",
-        };
-    assertNull(parser.parseArgs(args, conf, opts));
-    assertEquals(Arrays.asList(
-        Collections.singletonList("http://localhost:8983/solr/collection1"),
-        Collections.singletonList("http://localhost:8983/solr/collection2")),
-        opts.shardUrls);
-    assertEquals(new Integer(2), opts.shards);
-    assertEmptySystemErrAndEmptySystemOut();
-  }
-  
-  @Test
-  public void testArgsShardUrlMustHaveAParam() throws Exception {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--shard-url",
-        };
-    assertArgumentParserException(args);
-  }
-  
-  @Test
-  public void testArgsShardUrlAndShardsSucceeds() {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--shards", "1", 
-        "--shard-url", "http://localhost:8983/solr/collection1",
-        };
-    assertNull(parser.parseArgs(args, conf, opts));
-    assertEmptySystemErrAndEmptySystemOut();
-  }
-  
-  @Test
-  public void testArgsShardUrlNoGoLive() {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--shard-url", "http://localhost:8983/solr/collection1"
-        };
-    assertNull(parser.parseArgs(args, conf, opts));
-    assertEmptySystemErrAndEmptySystemOut();
-    assertEquals(new Integer(1), opts.shards);
-  }
-  
-  @Test
-  public void testArgsShardUrlsAndZkhostAreMutuallyExclusive() throws Exception {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--shard-url", "http://localhost:8983/solr/collection1",
-        "--shard-url", "http://localhost:8983/solr/collection1",
-        "--zk-host", "http://localhost:2185",
-        "--go-live"
-        };
-    assertArgumentParserException(args);
-  }
-  
-  @Test
-  public void testArgsGoLiveAndSolrUrl() {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--shard-url", "http://localhost:8983/solr/collection1",
-        "--shard-url", "http://localhost:8983/solr/collection1",
-        "--go-live"
-        };
-    Integer result = parser.parseArgs(args, conf, opts);
-    assertNull(result);
-    assertEmptySystemErrAndEmptySystemOut();
-  }
-  
-  @Test
-  public void testArgsZkHostNoGoLive() throws Exception {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--zk-host", "http://localhost:2185",
-        };
-    assertArgumentParserException(args);
-  }
-  
-  @Test
-  public void testArgsGoLiveZkHostNoCollection() throws Exception {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--zk-host", "http://localhost:2185",
-        "--go-live"
-        };
-    assertArgumentParserException(args);
-  }
-  
-  @Test
-  public void testArgsGoLiveNoZkHostOrSolrUrl() throws Exception {
-    String[] args = new String[] { 
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(), 
-        "--go-live"
-        };
-    assertArgumentParserException(args);
-  }  
-
-  @Test
-  public void testNoSolrHomeDirOrZKHost() throws Exception {
-    String[] args = new String[] {
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--shards", "1",
-        };
-    assertArgumentParserException(args);
-  }
-
-  @Test
-  public void testZKHostNoSolrHomeDirOk() {
-    String[] args = new String[] {
-        "--input-list", "file:///tmp",
-        "--morphline-file", MORPHLINE_FILE,
-        "--output-dir", "file:/tmp/foo",
-        "--zk-host", "http://localhost:2185",
-        "--collection", "collection1",
-        };
-    assertNull(parser.parseArgs(args, conf, opts));
-    assertEmptySystemErrAndEmptySystemOut();
-  }
-
-  private void assertEmptySystemErrAndEmptySystemOut() {
-    assertEquals(0, bout.toByteArray().length);
-    assertEquals(0, berr.toByteArray().length);
-  }
-  
-  private void assertArgumentParserException(String[] args) throws UnsupportedEncodingException {
-    assertEquals("should have returned fail code", new Integer(1), parser.parseArgs(args, conf, opts));
-    assertEquals("no sys out expected:" + new String(bout.toByteArray(), StandardCharsets.UTF_8), 0, bout.toByteArray().length);
-    String usageText;
-    usageText = new String(berr.toByteArray(), StandardCharsets.UTF_8);
-
-    assertTrue("should start with usage msg \"usage: hadoop \":" + usageText, usageText.startsWith("usage: hadoop "));
-  }
-  
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java
deleted file mode 100644
index 6479a20..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-import java.lang.reflect.Array;
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.security.authorize.ProxyUsers;
-import org.apache.hadoop.util.JarFinder;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.lucene.util.Constants;
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-import org.apache.lucene.util.LuceneTestCase.Slow;
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.cloud.AbstractZkTestCase;
-import org.apache.solr.hadoop.hack.MiniMRCluster;
-import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
-import org.apache.solr.util.BadHdfsThreadsFilter;
-import org.apache.solr.util.BadMrClusterThreadsFilter;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-import com.carrotsearch.randomizedtesting.annotations.Nightly;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
-
-@ThreadLeakAction({Action.WARN})
-@ThreadLeakLingering(linger = 0)
-@ThreadLeakZombies(Consequence.CONTINUE)
-@ThreadLeakFilters(defaultFilters = true, filters = {
-    BadHdfsThreadsFilter.class, BadMrClusterThreadsFilter.class // hdfs currently leaks thread(s)
-})
-@Slow
-@Nightly
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-9076")
-public class MorphlineBasicMiniMRTest extends SolrTestCaseJ4 {
-  
-  private static final boolean ENABLE_LOCAL_JOB_RUNNER = false; // for debugging only
-  private static final String RESOURCES_DIR = getFile("morphlines-core.marker").getParent();  
-  private static final String DOCUMENTS_DIR = RESOURCES_DIR + "/test-documents";
-  private static final File MINIMR_CONF_DIR = new File(RESOURCES_DIR + "/solr/minimr");
-  
-  private static String SEARCH_ARCHIVES_JAR;
-
-  private static MiniDFSCluster dfsCluster = null;
-  private static MiniMRCluster mrCluster = null;
-  private static int numRuns = 0;
-
-  private final String inputAvroFile;
-  private final int count;
-  
-  private static String tempDir;
-  
-  private static File solrHomeDirectory;
-  
-  protected MapReduceIndexerTool createTool() {
-    return new MapReduceIndexerTool();
-  }
-
-  public MorphlineBasicMiniMRTest() {
-    int data = random().nextInt(3);
-    switch (data) {
-      case 0:
-        this.inputAvroFile = "sample-statuses-20120906-141433.avro";
-        this.count = 2;
-        break;
-      case 1:
-        this.inputAvroFile = "sample-statuses-20120521-100919.avro";
-        this.count = 20;
-        break;
-      case 2:
-      this.inputAvroFile = "sample-statuses-20120906-141433-medium.avro";
-      this.count = 2104;
-        break;
-      default:
-        throw new RuntimeException("Test setup is broken");
-    }
-
-  }
-
-  @BeforeClass
-  public static void setupClass() throws Exception {
-    solrHomeDirectory = createTempDir().toFile();
-    
-    assumeFalse("HDFS tests were disabled by -Dtests.disableHdfs",
-        Boolean.parseBoolean(System.getProperty("tests.disableHdfs", "false")));
-    
-    assumeFalse("FIXME: This test does not work with Windows because of native library requirements", Constants.WINDOWS);
-    
-    AbstractZkTestCase.SOLRHOME = solrHomeDirectory;
-    FileUtils.copyDirectory(MINIMR_CONF_DIR, solrHomeDirectory);
-    File dataDir = createTempDir().toFile();
-    tempDir = dataDir.getAbsolutePath();
-    new File(tempDir).mkdirs();
-    FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml"));
-    
-    AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", true);
-    
-    System.setProperty("hadoop.log.dir", new File(solrHomeDirectory, "logs").getAbsolutePath());
-    
-    int taskTrackers = 1;
-    int dataNodes = 2;
-//    String proxyUser = System.getProperty("user.name");
-//    String proxyGroup = "g";
-//    StringBuilder sb = new StringBuilder();
-//    sb.append("127.0.0.1,localhost");
-//    for (InetAddress i : InetAddress.getAllByName(InetAddress.getLocalHost().getHostName())) {
-//      sb.append(",").append(i.getCanonicalHostName());
-//    }
-    
-    new File(dataDir, "nm-local-dirs").mkdirs();
-    
-    System.setProperty("solr.hdfs.blockcache.enabled", "false");
-    
-    System.setProperty("test.build.dir", dataDir + File.separator + "hdfs" + File.separator + "test-build-dir");
-    System.setProperty("test.build.data", dataDir + File.separator + "hdfs" + File.separator + "build");
-    System.setProperty("test.cache.data", dataDir + File.separator + "hdfs" + File.separator + "cache");
-
-    // Initialize AFTER test.build.dir is set, JarFinder uses it.
-    SEARCH_ARCHIVES_JAR = JarFinder.getJar(MapReduceIndexerTool.class);
-
-    JobConf conf = new JobConf();
-    conf.set("dfs.block.access.token.enable", "false");
-    conf.set("dfs.permissions", "true");
-    conf.set("hadoop.security.authentication", "simple");
-    conf.set(YarnConfiguration.NM_LOCAL_DIRS, dataDir.getPath() + File.separator +  "nm-local-dirs");
-    conf.set(YarnConfiguration.DEFAULT_NM_LOG_DIRS, dataDir + File.separator +  "nm-logs");
-    conf.set("testWorkDir", dataDir.getPath() + File.separator +  "testWorkDir");
-    conf.set("mapreduce.jobhistory.minicluster.fixed.ports", "false");
-    conf.set("mapreduce.jobhistory.admin.address", "0.0.0.0:0");
-
-    dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null);
-    FileSystem fileSystem = dfsCluster.getFileSystem();
-    fileSystem.mkdirs(new Path("/tmp"));
-    fileSystem.mkdirs(new Path("/user"));
-    fileSystem.mkdirs(new Path("/hadoop/mapred/system"));
-    fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx"));
-    fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx"));
-    fileSystem.setPermission(new Path("/hadoop/mapred/system"), FsPermission.valueOf("-rwx------"));
-    String nnURI = fileSystem.getUri().toString();
-    int numDirs = 1;
-    String[] racks = null;
-    String[] hosts = null;
-
-    mrCluster = new MiniMRCluster(0, 0, taskTrackers, nnURI, numDirs, racks, hosts, null, conf);
-    ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
-  }
-
-  @AfterClass
-  public static void teardownClass() throws Exception {
-    System.clearProperty("solr.hdfs.blockcache.enabled");
-    System.clearProperty("test.build.dir");
-    System.clearProperty("test.build.data");
-    System.clearProperty("test.cache.data");
-
-    if (mrCluster != null) {
-      mrCluster.shutdown();
-      mrCluster = null;
-    }
-    if (dfsCluster != null) {
-      dfsCluster.shutdown();
-      dfsCluster = null;
-    }
-    
-    FileSystem.closeAll();
-  }
-  
-  @After
-  public void tearDown() throws Exception {
-    System.clearProperty("hadoop.log.dir");
-    System.clearProperty("solr.hdfs.blockcache.enabled");
-    
-    super.tearDown();
-  }
-
-  private JobConf getJobConf() {
-    return mrCluster.createJobConf();
-  }
-
-  @Test
-  public void testPathParts() throws Exception { // see PathParts
-    FileSystem fs = dfsCluster.getFileSystem();
-    int dfsClusterPort = fs.getWorkingDirectory().toUri().getPort();
-    assertTrue(dfsClusterPort > 0);
-    JobConf jobConf = getJobConf();
-    Configuration simpleConf = new Configuration();
-    
-    for (Configuration conf : Arrays.asList(jobConf, simpleConf)) {
-      for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) {
-        for (String up : Arrays.asList("", "../")) {
-          String down = up.length() == 0 ? "foo/" : "";
-          String uploadURL = "hdfs://localhost:12345/user/foo/" + up + "bar.txt" + queryAndFragment;
-          PathParts parts = new PathParts(uploadURL, conf);
-          assertEquals(uploadURL, parts.getUploadURL());
-          assertEquals("/user/" + down + "bar.txt", parts.getURIPath());
-          assertEquals("bar.txt", parts.getName());
-          assertEquals("hdfs", parts.getScheme());
-          assertEquals("localhost", parts.getHost());
-          assertEquals(12345, parts.getPort());
-          assertEquals("hdfs://localhost:12345/user/" + down + "bar.txt", parts.getId());
-          assertEquals(parts.getId(), parts.getDownloadURL());
-          assertFileNotFound(parts);
-    
-          uploadURL = "hdfs://localhost/user/foo/" + up + "bar.txt" + queryAndFragment;
-          parts = new PathParts(uploadURL, conf);
-          assertEquals(uploadURL, parts.getUploadURL());
-          assertEquals("/user/" + down + "bar.txt", parts.getURIPath());
-          assertEquals("bar.txt", parts.getName());
-          assertEquals("hdfs", parts.getScheme());
-          assertEquals("localhost", parts.getHost());
-          assertEquals(8020, parts.getPort());
-          assertEquals("hdfs://localhost:8020/user/" + down + "bar.txt", parts.getId());
-          assertEquals(parts.getId(), parts.getDownloadURL());
-          assertFileNotFound(parts);
-        }
-      }
-    }    
-
-    for (Configuration conf : Arrays.asList(jobConf)) {
-      for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) {
-        for (String up : Arrays.asList("", "../")) {
-          // verify using absolute path
-          String down = up.length() == 0 ? "foo/" : "";
-          String uploadURL = "/user/foo/" + up + "bar.txt" + queryAndFragment;
-          PathParts parts = new PathParts(uploadURL, conf);
-          assertEquals(uploadURL, parts.getUploadURL());
-          assertEquals("/user/" + down + "bar.txt", parts.getURIPath());
-          assertEquals("bar.txt", parts.getName());
-          assertEquals("hdfs", parts.getScheme());
-          assertTrue("localhost".equals(parts.getHost()) || "localhost.localdomain".equals(parts.getHost()));
-          assertEquals(dfsClusterPort, parts.getPort());
-          assertTrue(parts.getId().equals("hdfs://localhost:" + dfsClusterPort + "/user/" + down + "bar.txt")
-                  || parts.getId().equals("hdfs://localhost.localdomain:" + dfsClusterPort + "/user/" + down + "bar.txt")
-          );
-          assertFileNotFound(parts);          
-          
-          // verify relative path is interpreted to be relative to user's home dir and resolved to an absolute path
-          uploadURL = "xuser/foo/" + up + "bar.txt" + queryAndFragment;
-          parts = new PathParts(uploadURL, conf);
-          assertEquals(uploadURL, parts.getUploadURL());
-          String homeDir = "/user/" + System.getProperty("user.name");
-          assertEquals(homeDir + "/xuser/" + down + "bar.txt", parts.getURIPath());
-          assertEquals("bar.txt", parts.getName());
-          assertEquals("hdfs", parts.getScheme());
-          assertTrue("localhost".equals(parts.getHost()) || "localhost.localdomain".equals(parts.getHost()));
-          assertEquals(dfsClusterPort, parts.getPort());
-          assertTrue(parts.getId().equals("hdfs://localhost:" + dfsClusterPort + homeDir + "/xuser/" + down + "bar.txt")
-                  || parts.getId().equals("hdfs://localhost.localdomain:" + dfsClusterPort + homeDir + "/xuser/" + down + "bar.txt")
-          );
-          assertFileNotFound(parts);
-        }
-      }
-    }
-    
-    try {
-      new PathParts("/user/foo/bar.txt", simpleConf);
-      fail("host/port resolution requires minimr conf, not a simple conf");
-    } catch (IllegalArgumentException e) {
-      ; // expected
-    }    
-  }
-
-  private void assertFileNotFound(PathParts parts) {
-    try {
-      parts.getFileSystem().getFileStatus(parts.getUploadPath());
-      fail();
-    } catch (IOException e) {
-      ; // expected
-    }
-  }
-
-  @Test
-  public void mrRun() throws Exception {
-    FileSystem fs = dfsCluster.getFileSystem();
-    Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
-    fs.delete(inDir, true);
-    String DATADIR = "/user/testing/testMapperReducer/data";
-    Path dataDir = fs.makeQualified(new Path(DATADIR));
-    fs.delete(dataDir, true);
-    Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
-    fs.delete(outDir, true);
-
-    assertTrue(fs.mkdirs(inDir));
-    Path INPATH = new Path(inDir, "input.txt");
-    OutputStream os = fs.create(INPATH);
-    Writer wr = new OutputStreamWriter(os, StandardCharsets.UTF_8);
-    wr.write(DATADIR + "/" + inputAvroFile);
-    wr.close();
-
-    assertTrue(fs.mkdirs(dataDir));
-    fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir);
-    
-    JobConf jobConf = getJobConf();
-    jobConf.set("jobclient.output.filter", "ALL");
-    if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints
-      jobConf.set("mapred.job.tracker", "local");
-    }
-    jobConf.setMaxMapAttempts(1);
-    jobConf.setMaxReduceAttempts(1);
-    jobConf.setJar(SEARCH_ARCHIVES_JAR);
-    
-    int shards = 2;
-    int maxReducers = Integer.MAX_VALUE;
-    if (ENABLE_LOCAL_JOB_RUNNER) {
-      // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work.
-      // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/
-      maxReducers = 1;
-      shards = 1;
-    }
-    
-    String[] args = new String[] {
-        "--morphline-file=" + tempDir + "/test-morphlines/solrCellDocumentTypes.conf",
-        "--morphline-id=morphline1",
-        "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
-        "--output-dir=" + outDir.toString(),
-        "--shards=" + shards,
-        "--verbose",
-        numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
-        numRuns % 3 == 0 ? "--reducers=" + shards : (numRuns % 3 == 1  ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers))
-    };
-    if (numRuns % 3 == 2) {
-      args = concat(args, new String[] {"--fanout=2"});
-    }
-    if (numRuns == 0) {
-      // force (slow) MapReduce based randomization to get coverage for that as well
-      args = concat(new String[] {"-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1"}, args); 
-    }
-    MapReduceIndexerTool tool = createTool();
-    int res = ToolRunner.run(jobConf, tool, args);
-    assertEquals(0, res);
-    Job job = tool.job;
-    assertTrue(job.isComplete());
-    assertTrue(job.isSuccessful());
-
-    if (numRuns % 3 != 2) {
-      // Only run this check if mtree merge is disabled.
-      // With mtree merge enabled the BatchWriter counters aren't available anymore because 
-      // variable "job" now refers to the merge job rather than the indexing job
-      assertEquals("Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN,
-          count, job.getCounters().findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString()).getValue());
-    }
-    
-    // Check the output is as expected
-    outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR);
-    Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir));
-
-    System.out.println("outputfiles:" + Arrays.toString(outputFiles));
-
-    UtilsForTests.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards);
-    
-    // run again with --dryrun mode:  
-    tool = createTool();
-    args = concat(args, new String[] {"--dry-run"});
-    res = ToolRunner.run(jobConf, tool, args);
-    assertEquals(0, res);
-
-    numRuns++;
-  }
-  
-  protected static <T> T[] concat(T[]... arrays) {
-    if (arrays.length <= 0) {
-      throw new IllegalArgumentException();
-    }
-    Class clazz = null;
-    int length = 0;
-    for (T[] array : arrays) {
-      clazz = array.getClass();
-      length += array.length;
-    }
-    T[] result = (T[]) Array.newInstance(clazz.getComponentType(), length);
-    int pos = 0;
-    for (T[] array : arrays) {
-      System.arraycopy(array, 0, result, pos, array.length);
-      pos += array.length;
-    }
-    return result;
-  }
-
-}