You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2017/03/24 16:37:39 UTC
[58/62] lucene-solr:branch_6x: SOLR-9221: Remove Solr contribs:
map-reduce, morphlines-core and morphlines-cell
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java
deleted file mode 100644
index 24ea936..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/SortingUpdateConflictResolver.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.dedup;
-
-import java.util.ArrayList;
-import java.util.Collections;
-import java.util.Comparator;
-import java.util.Iterator;
-import java.util.List;
-
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Reducer.Context;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.hadoop.HdfsFileFieldNames;
-
-/**
- * UpdateConflictResolver implementation that orders colliding updates ascending
- * from least recent to most recent (partial) update, based on a configurable
- * numeric Solr field, which defaults to the file_last_modified timestamp.
- */
-public class SortingUpdateConflictResolver implements UpdateConflictResolver, Configurable {
-
- private Configuration conf;
- private String orderByFieldName = ORDER_BY_FIELD_NAME_DEFAULT;
-
- public static final String ORDER_BY_FIELD_NAME_KEY =
- SortingUpdateConflictResolver.class.getName() + ".orderByFieldName";
-
- public static final String ORDER_BY_FIELD_NAME_DEFAULT = HdfsFileFieldNames.FILE_LAST_MODIFIED;
-
- @Override
- public void setConf(Configuration conf) {
- this.conf = conf;
- this.orderByFieldName = conf.get(ORDER_BY_FIELD_NAME_KEY, orderByFieldName);
- }
-
- @Override
- public Configuration getConf() {
- return conf;
- }
-
- protected String getOrderByFieldName() {
- return orderByFieldName;
- }
-
- @Override
- public Iterator<SolrInputDocument> orderUpdates(Text key, Iterator<SolrInputDocument> updates, Context ctx) {
- return sort(updates, getOrderByFieldName(), new SolrInputDocumentComparator.TimeStampComparator());
- }
-
- protected Iterator<SolrInputDocument> sort(Iterator<SolrInputDocument> updates, String fieldName, Comparator child) {
- // TODO: use an external merge sort in the pathological case where there are a huge amount of collisions
- List<SolrInputDocument> sortedUpdates = new ArrayList(1);
- while (updates.hasNext()) {
- sortedUpdates.add(updates.next());
- }
- if (sortedUpdates.size() > 1) { // conflicts are rare
- Collections.sort(sortedUpdates, new SolrInputDocumentComparator(fieldName, child));
- }
- return sortedUpdates.iterator();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java
deleted file mode 100644
index 94e23e1..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/UpdateConflictResolver.java
+++ /dev/null
@@ -1,71 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.dedup;
-
-import java.util.Iterator;
-
-import org.apache.hadoop.conf.Configurable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.apache.hadoop.mapreduce.Reducer.Context;
-import org.apache.solr.common.SolrInputDocument;
-
-/**
- * Interface that enables deduplication and ordering of a series of document
- * updates for the same unique document key.
- *
- * For example, a MapReduce batch job might index multiple files in the same job
- * where some of the files contain old and new versions of the very same
- * document, using the same unique document key.
- *
- * Typically, implementations of this interface forbid collisions by throwing an
- * exception, or ignore all but the most recent document version, or, in the
- * general case, order colliding updates ascending from least recent to most
- * recent (partial) update.
- *
- * The caller of this interface (i.e. the Hadoop Reducer) will then apply the
- * updates to Solr in the order returned by the orderUpdates() method.
- *
- * Configuration: If an UpdateConflictResolver implementation also implements
- * {@link Configurable} then the Hadoop Reducer will call
- * {@link Configurable#setConf(org.apache.hadoop.conf.Configuration)} on
- * instance construction and pass the standard Hadoop configuration information.
- */
-public interface UpdateConflictResolver {
-
- /**
- * Given a list of all colliding document updates for the same unique document
- * key, this method returns zero or more documents in an application specific
- * order.
- *
- * The caller will then apply the updates for this key to Solr in the order
- * returned by the orderUpdate() method.
- *
- * @param uniqueKey
- * the document key common to all collidingUpdates mentioned below
- * @param collidingUpdates
- * all updates in the MapReduce job that have a key equal to
- * {@code uniqueKey} mentioned above. The input order is unspecified.
- * @param context
- * The <code>Context</code> passed from the {@link Reducer}
- * implementations.
- * @return the order in which the updates shall be applied to Solr
- */
- Iterator<SolrInputDocument> orderUpdates(
- Text uniqueKey, Iterator<SolrInputDocument> collidingUpdates, Context context);
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/package-info.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/package-info.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/package-info.java
deleted file mode 100644
index a021282..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/dedup/package-info.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Dedupe related code.
- */
-package org.apache.solr.hadoop.dedup;
-
-
-
-
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java
deleted file mode 100644
index 5ba98ff..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineCounters.java
+++ /dev/null
@@ -1,47 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.morphline;
-
-import org.apache.solr.hadoop.Utils;
-
-public enum MorphlineCounters {
-
- FILES_READ (getClassName(MorphlineMapper.class) + ": Number of files read"),
-
- FILE_BYTES_READ (getClassName(MorphlineMapper.class) + ": Number of file bytes read"),
-
- DOCS_READ (getClassName(MorphlineMapper.class) + ": Number of documents read"),
-
- PARSER_OUTPUT_BYTES (getClassName(MorphlineMapper.class) + ": Number of document bytes generated by Tika parser"),
-
- ERRORS (getClassName(MorphlineMapper.class) + ": Number of errors");
-
- private final String label;
-
- private MorphlineCounters(String label) {
- this.label = label;
- }
-
- public String toString() {
- return label;
- }
-
- private static String getClassName(Class clazz) {
- return Utils.getShortClassName(clazz);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java
deleted file mode 100644
index 372c48b..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapRunner.java
+++ /dev/null
@@ -1,268 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.morphline;
-
-import java.io.BufferedInputStream;
-import java.io.ByteArrayInputStream;
-import java.io.File;
-import java.io.IOException;
-import java.io.InputStream;
-import java.lang.invoke.MethodHandles;
-import java.util.HashMap;
-import java.util.Map;
-import java.util.TreeMap;
-import java.util.stream.Collectors;
-
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.hadoop.mapreduce.Mapper.Context;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.hadoop.HdfsFileFieldNames;
-import org.apache.solr.hadoop.PathParts;
-import org.apache.solr.hadoop.Utils;
-import org.apache.solr.morphlines.solr.DocumentLoader;
-import org.apache.solr.morphlines.solr.SolrLocator;
-import org.apache.solr.morphlines.solr.SolrMorphlineContext;
-import org.apache.solr.schema.IndexSchema;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import org.kitesdk.morphline.api.Command;
-import org.kitesdk.morphline.api.MorphlineCompilationException;
-import org.kitesdk.morphline.api.MorphlineContext;
-import org.kitesdk.morphline.api.Record;
-import org.kitesdk.morphline.base.Compiler;
-import org.kitesdk.morphline.base.FaultTolerance;
-import org.kitesdk.morphline.base.Fields;
-import org.kitesdk.morphline.base.Metrics;
-import org.kitesdk.morphline.base.Notifications;
-import com.codahale.metrics.MetricRegistry;
-import com.codahale.metrics.Timer;
-import com.google.common.annotations.Beta;
-import com.typesafe.config.Config;
-import com.typesafe.config.ConfigFactory;
-
-/**
- * Internal helper for {@link MorphlineMapper} and dryRun mode; This API is for *INTERNAL* use only
- * and should not be considered public.
- */
-@Beta
-public final class MorphlineMapRunner {
-
- private MorphlineContext morphlineContext;
- private Command morphline;
- private IndexSchema schema;
- private Map<String, String> commandLineMorphlineHeaders;
- private boolean disableFileOpen;
- private String morphlineFileAndId;
- private final Timer elapsedTime;
-
- public static final String MORPHLINE_FILE_PARAM = "morphlineFile";
- public static final String MORPHLINE_ID_PARAM = "morphlineId";
-
- /**
- * Morphline variables can be passed from the CLI to the Morphline, e.g.:
- * hadoop ... -D morphlineVariable.zkHost=127.0.0.1:2181/solr
- */
- public static final String MORPHLINE_VARIABLE_PARAM = "morphlineVariable";
-
- /**
- * Headers, including MIME types, can also explicitly be passed by force from the CLI to Morphline, e.g:
- * hadoop ... -D morphlineField._attachment_mimetype=text/csv
- */
- public static final String MORPHLINE_FIELD_PREFIX = "morphlineField.";
-
- /**
- * Flag to disable reading of file contents if indexing just file metadata is sufficient.
- * This improves performance and confidentiality.
- */
- public static final String DISABLE_FILE_OPEN = "morphlineDisableFileOpen";
-
- private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- MorphlineContext getMorphlineContext() {
- return morphlineContext;
- }
-
- IndexSchema getSchema() {
- return schema;
- }
-
- public MorphlineMapRunner(Configuration configuration, DocumentLoader loader, String solrHomeDir) throws IOException {
- if (LOG.isTraceEnabled()) {
- LOG.trace("CWD is {}", new File(".").getCanonicalPath());
- TreeMap map = new TreeMap();
- for (Map.Entry<String,String> entry : configuration) {
- map.put(entry.getKey(), entry.getValue());
- }
- LOG.trace("Configuration:\n" +
- map.entrySet().stream().map(Object::toString).collect(Collectors.joining("\n")));
- }
-
- FaultTolerance faultTolerance = new FaultTolerance(
- configuration.getBoolean(FaultTolerance.IS_PRODUCTION_MODE, false),
- configuration.getBoolean(FaultTolerance.IS_IGNORING_RECOVERABLE_EXCEPTIONS, false),
- configuration.get(FaultTolerance.RECOVERABLE_EXCEPTION_CLASSES, SolrServerException.class.getName())
- );
-
- morphlineContext = new SolrMorphlineContext.Builder()
- .setDocumentLoader(loader)
- .setExceptionHandler(faultTolerance)
- .setMetricRegistry(new MetricRegistry())
- .build();
-
- class MySolrLocator extends SolrLocator { // trick to access protected ctor
- public MySolrLocator(MorphlineContext ctx) {
- super(ctx);
- }
- }
-
- SolrLocator locator = new MySolrLocator(morphlineContext);
- locator.setSolrHomeDir(solrHomeDir);
- schema = locator.getIndexSchema();
-
- // rebuild context, now with schema
- morphlineContext = new SolrMorphlineContext.Builder()
- .setIndexSchema(schema)
- .setDocumentLoader(loader)
- .setExceptionHandler(faultTolerance)
- .setMetricRegistry(morphlineContext.getMetricRegistry())
- .build();
-
- String morphlineFile = configuration.get(MORPHLINE_FILE_PARAM);
- String morphlineId = configuration.get(MORPHLINE_ID_PARAM);
- if (morphlineFile == null || morphlineFile.trim().length() == 0) {
- throw new MorphlineCompilationException("Missing parameter: " + MORPHLINE_FILE_PARAM, null);
- }
- Map morphlineVariables = new HashMap();
- for (Map.Entry<String, String> entry : configuration) {
- String variablePrefix = MORPHLINE_VARIABLE_PARAM + ".";
- if (entry.getKey().startsWith(variablePrefix)) {
- morphlineVariables.put(entry.getKey().substring(variablePrefix.length()), entry.getValue());
- }
- }
- Config override = ConfigFactory.parseMap(morphlineVariables);
- morphline = new Compiler().compile(new File(morphlineFile), morphlineId, morphlineContext, null, override);
- morphlineFileAndId = morphlineFile + "@" + morphlineId;
-
- disableFileOpen = configuration.getBoolean(DISABLE_FILE_OPEN, false);
- LOG.debug("disableFileOpen: {}", disableFileOpen);
-
- commandLineMorphlineHeaders = new HashMap();
- for (Map.Entry<String,String> entry : configuration) {
- if (entry.getKey().startsWith(MORPHLINE_FIELD_PREFIX)) {
- commandLineMorphlineHeaders.put(entry.getKey().substring(MORPHLINE_FIELD_PREFIX.length()), entry.getValue());
- }
- }
- LOG.debug("Headers, including MIME types, passed by force from the CLI to morphline: {}", commandLineMorphlineHeaders);
-
- String metricName = MetricRegistry.name(Utils.getShortClassName(getClass()), Metrics.ELAPSED_TIME);
- this.elapsedTime = morphlineContext.getMetricRegistry().timer(metricName);
- Notifications.notifyBeginTransaction(morphline);
- }
-
- /**
- * Extract content from the path specified in the value. Key is useless.
- */
- public void map(String value, Configuration configuration, Context context) throws IOException {
- LOG.info("Processing file {}", value);
- InputStream in = null;
- Record record = null;
- Timer.Context timerContext = elapsedTime.time();
- try {
- PathParts parts = new PathParts(value.toString(), configuration);
- record = getRecord(parts);
- if (record == null) {
- return; // ignore
- }
- for (Map.Entry<String, String> entry : commandLineMorphlineHeaders.entrySet()) {
- record.replaceValues(entry.getKey(), entry.getValue());
- }
- long fileLength = parts.getFileStatus().getLen();
- if (disableFileOpen) {
- in = new ByteArrayInputStream(new byte[0]);
- } else {
- in = new BufferedInputStream(parts.getFileSystem().open(parts.getUploadPath()));
- }
- record.put(Fields.ATTACHMENT_BODY, in);
- Notifications.notifyStartSession(morphline);
- if (!morphline.process(record)) {
- LOG.warn("Morphline {} failed to process record: {}", morphlineFileAndId, record);
- }
- if (context != null) {
- context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.FILES_READ.toString()).increment(1);
- context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.FILE_BYTES_READ.toString()).increment(fileLength);
- }
- } catch (Exception e) {
- LOG.error("Unable to process file " + value, e);
- if (context != null) {
- context.getCounter(getClass().getName() + ".errors", e.getClass().getName()).increment(1);
- }
- morphlineContext.getExceptionHandler().handleException(e, record);
- } finally {
- timerContext.stop();
- if (in != null) {
- in.close();
- }
- }
- }
-
- protected Record getRecord(PathParts parts) {
- FileStatus stats;
- try {
- stats = parts.getFileStatus();
- } catch (IOException e) {
- stats = null;
- }
- if (stats == null) {
- LOG.warn("Ignoring file that somehow has become unavailable since the job was submitted: {}",
- parts.getUploadURL());
- return null;
- }
-
- Record headers = new Record();
- //headers.put(getSchema().getUniqueKeyField().getName(), parts.getId()); // use HDFS file path as docId if no docId is specified
- headers.put(Fields.BASE_ID, parts.getId()); // with sanitizeUniqueKey command, use HDFS file path as docId if no docId is specified
- headers.put(Fields.ATTACHMENT_NAME, parts.getName()); // Tika can use the file name in guessing the right MIME type
-
- // enable indexing and storing of file meta data in Solr
- headers.put(HdfsFileFieldNames.FILE_UPLOAD_URL, parts.getUploadURL());
- headers.put(HdfsFileFieldNames.FILE_DOWNLOAD_URL, parts.getDownloadURL());
- headers.put(HdfsFileFieldNames.FILE_SCHEME, parts.getScheme());
- headers.put(HdfsFileFieldNames.FILE_HOST, parts.getHost());
- headers.put(HdfsFileFieldNames.FILE_PORT, String.valueOf(parts.getPort()));
- headers.put(HdfsFileFieldNames.FILE_PATH, parts.getURIPath());
- headers.put(HdfsFileFieldNames.FILE_NAME, parts.getName());
- headers.put(HdfsFileFieldNames.FILE_LAST_MODIFIED, String.valueOf(stats.getModificationTime())); // FIXME also add in SpoolDirectorySource
- headers.put(HdfsFileFieldNames.FILE_LENGTH, String.valueOf(stats.getLen())); // FIXME also add in SpoolDirectorySource
- headers.put(HdfsFileFieldNames.FILE_OWNER, stats.getOwner());
- headers.put(HdfsFileFieldNames.FILE_GROUP, stats.getGroup());
- headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_USER, stats.getPermission().getUserAction().SYMBOL);
- headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_GROUP, stats.getPermission().getGroupAction().SYMBOL);
- headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_OTHER, stats.getPermission().getOtherAction().SYMBOL);
- headers.put(HdfsFileFieldNames.FILE_PERMISSIONS_STICKYBIT, String.valueOf(stats.getPermission().getStickyBit()));
- // TODO: consider to add stats.getAccessTime(), stats.getReplication(), stats.isSymlink(), stats.getBlockSize()
-
- return headers;
- }
-
- public void cleanup() {
- Notifications.notifyCommitTransaction(morphline);
- Notifications.notifyShutdown(morphline);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java
deleted file mode 100644
index 8ba2ea6..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/MorphlineMapper.java
+++ /dev/null
@@ -1,193 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop.morphline;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.Collection;
-import java.util.Map;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.response.SolrPingResponse;
-import org.apache.solr.client.solrj.response.UpdateResponse;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.SolrInputField;
-import org.apache.solr.hadoop.HeartBeater;
-import org.apache.solr.hadoop.SolrInputDocumentWritable;
-import org.apache.solr.hadoop.SolrMapper;
-import org.apache.solr.morphlines.solr.DocumentLoader;
-import org.apache.solr.schema.IndexSchema;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import com.codahale.metrics.Counter;
-import com.codahale.metrics.Counting;
-import com.codahale.metrics.Histogram;
-import com.codahale.metrics.Meter;
-import com.codahale.metrics.MetricRegistry;
-import com.codahale.metrics.Timer;
-
-/**
- * This class takes the input files, extracts the relevant content, transforms
- * it and hands SolrInputDocuments to a set of reducers.
- *
- * More specifically, it consumes a list of <offset, hdfsFilePath> input pairs.
- * For each such pair extracts a set of zero or more SolrInputDocuments and
- * sends them to a downstream Reducer. The key for the reducer is the unique id
- * of the SolrInputDocument specified in Solr schema.xml.
- */
-public class MorphlineMapper extends SolrMapper<LongWritable, Text> {
-
- private Context context;
- private MorphlineMapRunner runner;
- private HeartBeater heartBeater;
-
- private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- protected IndexSchema getSchema() {
- return runner.getSchema();
- }
-
- protected Context getContext() {
- return context;
- }
-
- @Override
- protected void setup(Context context) throws IOException, InterruptedException {
- super.setup(context);
- this.context = context;
- heartBeater = new HeartBeater(context);
- this.runner = new MorphlineMapRunner(
- context.getConfiguration(), new MyDocumentLoader(), getSolrHomeDir().toString());
- }
-
- /**
- * Extract content from the path specified in the value. Key is useless.
- */
- @Override
- public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
- heartBeater.needHeartBeat();
- try {
- runner.map(value.toString(), context.getConfiguration(), context);
- } finally {
- heartBeater.cancelHeartBeat();
- }
- }
-
- @Override
- protected void cleanup(Context context) throws IOException, InterruptedException {
- heartBeater.close();
- runner.cleanup();
- addMetricsToMRCounters(runner.getMorphlineContext().getMetricRegistry(), context);
- super.cleanup(context);
- }
-
- private void addMetricsToMRCounters(MetricRegistry metricRegistry, Context context) {
- for (Map.Entry<String, Counter> entry : metricRegistry.getCounters().entrySet()) {
- addCounting(entry.getKey(), entry.getValue(), 1);
- }
- for (Map.Entry<String, Histogram> entry : metricRegistry.getHistograms().entrySet()) {
- addCounting(entry.getKey(), entry.getValue(), 1);
- }
- for (Map.Entry<String, Meter> entry : metricRegistry.getMeters().entrySet()) {
- addCounting(entry.getKey(), entry.getValue(), 1);
- }
- for (Map.Entry<String, Timer> entry : metricRegistry.getTimers().entrySet()) {
- long nanosPerMilliSec = 1000 * 1000;
- addCounting(entry.getKey(), entry.getValue(), nanosPerMilliSec);
- }
- }
-
- private void addCounting(String metricName, Counting value, long scale) {
- context.getCounter("morphline", metricName).increment(value.getCount() / scale);
- }
-
- ///////////////////////////////////////////////////////////////////////////////
- // Nested classes:
- ///////////////////////////////////////////////////////////////////////////////
- private final class MyDocumentLoader implements DocumentLoader {
-
- @Override
- public void beginTransaction() {
- }
-
- @Override
- public void load(SolrInputDocument doc) throws IOException, SolrServerException {
- String uniqueKeyFieldName = getSchema().getUniqueKeyField().getName();
- Object id = doc.getFieldValue(uniqueKeyFieldName);
- if (id == null) {
- throw new IllegalArgumentException("Missing value for (required) unique document key: " + uniqueKeyFieldName
- + " (see Solr schema.xml)");
- }
- try {
- context.write(new Text(id.toString()), new SolrInputDocumentWritable(doc));
- } catch (InterruptedException e) {
- throw new IOException("Interrupted while writing " + doc, e);
- }
-
- if (LOG.isDebugEnabled()) {
- long numParserOutputBytes = 0;
- for (SolrInputField field : doc.values()) {
- numParserOutputBytes += sizeOf(field.getValue());
- }
- context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.PARSER_OUTPUT_BYTES.toString()).increment(numParserOutputBytes);
- }
- context.getCounter(MorphlineCounters.class.getName(), MorphlineCounters.DOCS_READ.toString()).increment(1);
- }
-
- // just an approximation
- private long sizeOf(Object value) {
- if (value instanceof CharSequence) {
- return ((CharSequence) value).length();
- } else if (value instanceof Integer) {
- return 4;
- } else if (value instanceof Long) {
- return 8;
- } else if (value instanceof Collection) {
- long size = 0;
- for (Object val : (Collection) value) {
- size += sizeOf(val);
- }
- return size;
- } else {
- return String.valueOf(value).length();
- }
- }
-
- @Override
- public void commitTransaction() {
- }
-
- @Override
- public UpdateResponse rollbackTransaction() throws SolrServerException, IOException {
- return new UpdateResponse();
- }
-
- @Override
- public void shutdown() {
- }
-
- @Override
- public SolrPingResponse ping() throws SolrServerException, IOException {
- return new SolrPingResponse();
- }
-
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/package-info.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/package-info.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/package-info.java
deleted file mode 100644
index c754f79..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/morphline/package-info.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * Morphlines related code.
- */
-package org.apache.solr.hadoop.morphline;
-
-
-
-
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/package-info.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/package-info.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/package-info.java
deleted file mode 100644
index 043a150..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/package-info.java
+++ /dev/null
@@ -1,25 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-/**
- * {@link org.apache.solr.hadoop.MapReduceIndexerTool} and related code.
- */
-package org.apache.solr.hadoop;
-
-
-
-
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/java/overview.html
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/overview.html b/solr/contrib/map-reduce/src/java/overview.html
deleted file mode 100644
index ad7c1c0..0000000
--- a/solr/contrib/map-reduce/src/java/overview.html
+++ /dev/null
@@ -1,21 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-<html>
-<body>
-Apache Solr Search Server: Solr MapReduce contrib
-</body>
-</html>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test-files/README.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test-files/README.txt b/solr/contrib/map-reduce/src/test-files/README.txt
deleted file mode 100644
index 8905df2..0000000
--- a/solr/contrib/map-reduce/src/test-files/README.txt
+++ /dev/null
@@ -1 +0,0 @@
-The test-files by this module are located in the morphlines-core module.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/AlphaNumericComparatorTest.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/AlphaNumericComparatorTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/AlphaNumericComparatorTest.java
deleted file mode 100644
index cab29e7..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/AlphaNumericComparatorTest.java
+++ /dev/null
@@ -1,46 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.util.Comparator;
-
-import org.junit.Assert;
-import org.junit.Test;
-
-public class AlphaNumericComparatorTest extends Assert {
-
- @Test
- public void testBasic() {
- Comparator c = new AlphaNumericComparator();
- assertTrue(c.compare("a", "b") < 0);
- assertTrue(c.compare("shard1", "shard1") == 0);
- //assertTrue(c.compare("shard01", "shard1") == 0);
- assertTrue(c.compare("shard10", "shard10") == 0);
- assertTrue(c.compare("shard1", "shard2") < 0);
- assertTrue(c.compare("shard9", "shard10") < 0);
- assertTrue(c.compare("shard09", "shard10") < 0);
- assertTrue(c.compare("shard019", "shard10") > 0);
- assertTrue(c.compare("shard10", "shard11") < 0);
- assertTrue(c.compare("shard10z", "shard10z") == 0);
- assertTrue(c.compare("shard10z", "shard11z") < 0);
- assertTrue(c.compare("shard10a", "shard10z") < 0);
- assertTrue(c.compare("shard10z", "shard10a") > 0);
- assertTrue(c.compare("shard1z", "shard1z") == 0);
- assertTrue(c.compare("shard2", "shard1") > 0);
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityMapper.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityMapper.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityMapper.java
deleted file mode 100644
index 9ba70d5..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityMapper.java
+++ /dev/null
@@ -1,38 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class IdentityMapper extends Mapper<LongWritable, Text, Text, NullWritable> {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- @Override
- protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
- LOGGER.info("map key: {}, value: {}", key, value);
- context.write(value, NullWritable.get());
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityReducer.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityReducer.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityReducer.java
deleted file mode 100644
index 1c248c6..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/IdentityReducer.java
+++ /dev/null
@@ -1,37 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-public class IdentityReducer extends Reducer<Text, NullWritable, Text, NullWritable> {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- @Override
- protected void reduce(Text key, Iterable<NullWritable> values, Context context) throws IOException, InterruptedException {
- LOGGER.info("reduce key: {}, value: {}", key, values);
- context.write(key, NullWritable.get());
- }
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java
deleted file mode 100644
index 2354fdd..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/LineRandomizerMapperReducerTest.java
+++ /dev/null
@@ -1,94 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.IOException;
-import java.util.Arrays;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mrunit.mapreduce.MapReduceDriver;
-import org.apache.hadoop.mrunit.types.Pair;
-import org.junit.Assert;
-import org.junit.Before;
-import org.junit.Test;
-
-public class LineRandomizerMapperReducerTest extends Assert {
-
- private MapReduceDriver<LongWritable, Text, LongWritable, Text, Text, NullWritable> mapReduceDriver;
-
- @Before
- public void setUp() {
- LineRandomizerMapper mapper = new LineRandomizerMapper();
- LineRandomizerReducer reducer = new LineRandomizerReducer();
- mapReduceDriver = MapReduceDriver.newMapReduceDriver(mapper, reducer);
- }
-
- @Test
- public void testMapReduce1Item() throws IOException {
- mapReduceDriver.withInput(new LongWritable(0), new Text("hello"));
- mapReduceDriver.withOutput(new Text("hello"), NullWritable.get());
- mapReduceDriver.runTest();
- }
-
- @Test
- public void testMapReduce2Items() throws IOException {
- mapReduceDriver.withAll(Arrays.asList(
- new Pair<>(new LongWritable(0), new Text("hello")),
- new Pair<>(new LongWritable(1), new Text("world"))
- ));
- mapReduceDriver.withAllOutput(Arrays.asList(
- new Pair<>(new Text("world"), NullWritable.get()),
- new Pair<>(new Text("hello"), NullWritable.get())
- ));
- mapReduceDriver.runTest();
- }
-
- @Test
- public void testMapReduce3Items() throws IOException {
- mapReduceDriver.withAll(Arrays.asList(
- new Pair<>(new LongWritable(0), new Text("hello")),
- new Pair<>(new LongWritable(1), new Text("world")),
- new Pair<>(new LongWritable(2), new Text("nadja"))
- ));
- mapReduceDriver.withAllOutput(Arrays.asList(
- new Pair<>(new Text("nadja"), NullWritable.get()),
- new Pair<>(new Text("world"), NullWritable.get()),
- new Pair<>(new Text("hello"), NullWritable.get())
- ));
- mapReduceDriver.runTest();
- }
-
- @Test
- public void testMapReduce4Items() throws IOException {
- mapReduceDriver.withAll(Arrays.asList(
- new Pair<>(new LongWritable(0), new Text("hello")),
- new Pair<>(new LongWritable(1), new Text("world")),
- new Pair<>(new LongWritable(2), new Text("nadja")),
- new Pair<>(new LongWritable(3), new Text("basti"))
- ));
- mapReduceDriver.withAllOutput(Arrays.asList(
- new Pair<>(new Text("nadja"), NullWritable.get()),
- new Pair<>(new Text("world"), NullWritable.get()),
- new Pair<>(new Text("basti"), NullWritable.get()),
- new Pair<>(new Text("hello"), NullWritable.get())
- ));
- mapReduceDriver.runTest();
- }
-
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java
deleted file mode 100644
index 558d662..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java
+++ /dev/null
@@ -1,64 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.File;
-import java.io.IOException;
-import java.nio.file.Files;
-import java.util.Locale;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.hadoop.morphline.MorphlineMapRunner;
-import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-
-public abstract class MRUnitBase extends SolrTestCaseJ4 {
-
- protected static final String RESOURCES_DIR = getFile("morphlines-core.marker").getParent();
- protected static final String DOCUMENTS_DIR = RESOURCES_DIR + "/test-documents";
- protected static File solrHomeZip;
-
- @BeforeClass
- public static void setupClass() throws Exception {
- assumeFalse("This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/SOLR-6387)",
- new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage()));
- solrHomeZip = SolrOutputFormat.createSolrHomeZip(new File(RESOURCES_DIR + "/solr/mrunit"));
- assertNotNull(solrHomeZip);
- }
-
- @AfterClass
- public static void teardownClass() throws Exception {
- if (solrHomeZip != null) Files.delete(solrHomeZip.toPath());
- solrHomeZip = null;
- }
-
- protected void setupHadoopConfig(Configuration config) throws IOException {
-
- String tempDir = createTempDir().toFile().getAbsolutePath();
-
- FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml"));
-
- AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", true);
-
- config.set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, tempDir + "/test-morphlines/solrCellDocumentTypes.conf");
- config.set(SolrOutputFormat.ZIP_NAME, solrHomeZip.getName());
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java
deleted file mode 100644
index 1aebcf7..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MapReduceIndexerToolArgumentParserTest.java
+++ /dev/null
@@ -1,468 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.ByteArrayOutputStream;
-import java.io.File;
-import java.io.PrintStream;
-import java.io.UnsupportedEncodingException;
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-import java.util.Collections;
-import java.util.Locale;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.Path;
-import org.apache.lucene.util.Constants;
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.cloud.AbstractZkTestCase;
-import org.apache.solr.hadoop.dedup.NoChangeUpdateConflictResolver;
-import org.apache.solr.hadoop.dedup.RetainMostRecentUpdateConflictResolver;
-import org.junit.After;
-import org.junit.Before;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class MapReduceIndexerToolArgumentParserTest extends SolrTestCaseJ4 {
-
- private Configuration conf;
- private MapReduceIndexerTool.MyArgumentParser parser;
- private MapReduceIndexerTool.Options opts;
- private PrintStream oldSystemOut;
- private PrintStream oldSystemErr;
- private ByteArrayOutputStream bout;
- private ByteArrayOutputStream berr;
-
- private static final String RESOURCES_DIR = getFile("morphlines-core.marker").getParent();
- private static final File MINIMR_INSTANCE_DIR = new File(RESOURCES_DIR + "/solr/minimr");
-
- private static final String MORPHLINE_FILE = RESOURCES_DIR + "/test-morphlines/solrCellDocumentTypes.conf";
-
- private final File solrHomeDirectory = createTempDir().toFile();
-
- @BeforeClass
- public static void beforeClass() {
- assumeFalse("Does not work on Windows, because it uses UNIX shell commands or POSIX paths", Constants.WINDOWS);
- assumeFalse("This test fails on UNIX with Turkish default locale (https://issues.apache.org/jira/browse/SOLR-6387)",
- new Locale("tr").getLanguage().equals(Locale.getDefault().getLanguage()));
- }
-
- @Before
- public void setUp() throws Exception {
- super.setUp();
- AbstractZkTestCase.SOLRHOME = solrHomeDirectory;
- FileUtils.copyDirectory(MINIMR_INSTANCE_DIR, solrHomeDirectory);
-
- conf = new Configuration();
- parser = new MapReduceIndexerTool.MyArgumentParser();
- opts = new MapReduceIndexerTool.Options();
- oldSystemOut = System.out;
- bout = new ByteArrayOutputStream();
- System.setOut(new PrintStream(bout, true, "UTF-8"));
- oldSystemErr = System.err;
- berr = new ByteArrayOutputStream();
- System.setErr(new PrintStream(berr, true, "UTF-8"));
- }
-
- @After
- public void tearDown() throws Exception {
- super.tearDown();
- System.setOut(oldSystemOut);
- System.setErr(oldSystemErr);
- }
-
- @Test
- public void testArgsParserTypicalUse() {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--morphline-id", "morphline_xyz",
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--mappers", "10",
- "--reducers", "9",
- "--fanout", "8",
- "--max-segments", "7",
- "--shards", "1",
- "--verbose",
- "file:///home",
- "file:///dev",
- };
- Integer res = parser.parseArgs(args, conf, opts);
- assertNull(res != null ? res.toString() : "", res);
- assertEquals(Collections.singletonList(new Path("file:///tmp")), opts.inputLists);
- assertEquals(new Path("file:/tmp/foo"), opts.outputDir);
- assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir);
- assertEquals(10, opts.mappers);
- assertEquals(9, opts.reducers);
- assertEquals(8, opts.fanout);
- assertEquals(7, opts.maxSegments);
- assertEquals(new Integer(1), opts.shards);
- assertEquals(null, opts.fairSchedulerPool);
- assertTrue(opts.isVerbose);
- assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles);
- assertEquals(RetainMostRecentUpdateConflictResolver.class.getName(), opts.updateConflictResolver);
- assertEquals(MORPHLINE_FILE, opts.morphlineFile.getPath());
- assertEquals("morphline_xyz", opts.morphlineId);
- assertEmptySystemErrAndEmptySystemOut();
- }
-
- @Test
- public void testArgsParserMultipleSpecsOfSameKind() {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--input-list", "file:///",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--shards", "1",
- "file:///home",
- "file:///dev",
- };
- assertNull(parser.parseArgs(args, conf, opts));
- assertEquals(Arrays.asList(new Path("file:///tmp"), new Path("file:///")), opts.inputLists);
- assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles);
- assertEquals(new Path("file:/tmp/foo"), opts.outputDir);
- assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir);
- assertEmptySystemErrAndEmptySystemOut();
- }
-
- @Test
- public void testArgsParserTypicalUseWithEqualsSign() {
- String[] args = new String[] {
- "--input-list=file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir=file:/tmp/foo",
- "--solr-home-dir=" + MINIMR_INSTANCE_DIR.getPath(),
- "--mappers=10",
- "--shards", "1",
- "--verbose",
- "file:///home",
- "file:///dev",
- };
- assertNull(parser.parseArgs(args, conf, opts));
- assertEquals(Collections.singletonList(new Path("file:///tmp")), opts.inputLists);
- assertEquals(new Path("file:/tmp/foo"), opts.outputDir);
- assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir);
- assertEquals(10, opts.mappers);
- assertEquals(new Integer(1), opts.shards);
- assertEquals(null, opts.fairSchedulerPool);
- assertTrue(opts.isVerbose);
- assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles);
- assertEmptySystemErrAndEmptySystemOut();
- }
-
- @Test
- public void testArgsParserMultipleSpecsOfSameKindWithEqualsSign() {
- String[] args = new String[] {
- "--input-list=file:///tmp",
- "--input-list=file:///",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir=file:/tmp/foo",
- "--solr-home-dir=" + MINIMR_INSTANCE_DIR.getPath(),
- "--shards", "1",
- "file:///home",
- "file:///dev",
- };
- assertNull(parser.parseArgs(args, conf, opts));
- assertEquals(Arrays.asList(new Path("file:///tmp"), new Path("file:///")), opts.inputLists);
- assertEquals(Arrays.asList(new Path("file:///home"), new Path("file:///dev")), opts.inputFiles);
- assertEquals(new Path("file:/tmp/foo"), opts.outputDir);
- assertEquals(new File(MINIMR_INSTANCE_DIR.getPath()), opts.solrHomeDir);
- assertEmptySystemErrAndEmptySystemOut();
- }
-
- @Test
- public void testArgsParserHelp() throws UnsupportedEncodingException {
- String[] args = new String[] { "--help" };
- assertEquals(new Integer(0), parser.parseArgs(args, conf, opts));
- String helpText = new String(bout.toByteArray(), StandardCharsets.UTF_8);
- assertTrue(helpText.contains("MapReduce batch job driver that "));
- assertTrue(helpText.contains("bin/hadoop command"));
- assertEquals(0, berr.toByteArray().length);
- }
-
- @Test
- public void testArgsParserOk() {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--shards", "1",
- };
- assertNull(parser.parseArgs(args, conf, opts));
- assertEquals(new Integer(1), opts.shards);
- assertEmptySystemErrAndEmptySystemOut();
- }
-
- @Test
- public void testArgsParserUpdateConflictResolver() {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--shards", "1",
- "--update-conflict-resolver", NoChangeUpdateConflictResolver.class.getName(),
- };
- assertNull(parser.parseArgs(args, conf, opts));
- assertEquals(NoChangeUpdateConflictResolver.class.getName(), opts.updateConflictResolver);
- assertEmptySystemErrAndEmptySystemOut();
- }
-
- @Test
- public void testArgsParserUnknownArgName() throws Exception {
- String[] args = new String[] {
- "--xxxxxxxxinputlist", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--shards", "1",
- };
- assertArgumentParserException(args);
- }
-
- @Test
- public void testArgsParserFileNotFound1() throws Exception {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/fileNotFound/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--shards", "1",
- };
- assertArgumentParserException(args);
- }
-
- @Test
- public void testArgsParserFileNotFound2() throws Exception {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", "/fileNotFound",
- "--shards", "1",
- };
- assertArgumentParserException(args);
- }
-
- @Test
- public void testArgsParserIntOutOfRange() throws Exception {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--shards", "1",
- "--mappers", "-20"
- };
- assertArgumentParserException(args);
- }
-
- @Test
- public void testArgsParserIllegalFanout() throws Exception {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--shards", "1",
- "--fanout", "1" // must be >= 2
- };
- assertArgumentParserException(args);
- }
-
- @Test
- public void testArgsParserSolrHomeMustContainSolrConfigFile() throws Exception {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--shards", "1",
- "--solr-home-dir", "/",
- };
- assertArgumentParserException(args);
- }
-
- @Test
- public void testArgsShardUrlOk() {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--shard-url", "http://localhost:8983/solr/collection1",
- "--shard-url", "http://localhost:8983/solr/collection2",
- };
- assertNull(parser.parseArgs(args, conf, opts));
- assertEquals(Arrays.asList(
- Collections.singletonList("http://localhost:8983/solr/collection1"),
- Collections.singletonList("http://localhost:8983/solr/collection2")),
- opts.shardUrls);
- assertEquals(new Integer(2), opts.shards);
- assertEmptySystemErrAndEmptySystemOut();
- }
-
- @Test
- public void testArgsShardUrlMustHaveAParam() throws Exception {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--shard-url",
- };
- assertArgumentParserException(args);
- }
-
- @Test
- public void testArgsShardUrlAndShardsSucceeds() {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--shards", "1",
- "--shard-url", "http://localhost:8983/solr/collection1",
- };
- assertNull(parser.parseArgs(args, conf, opts));
- assertEmptySystemErrAndEmptySystemOut();
- }
-
- @Test
- public void testArgsShardUrlNoGoLive() {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--shard-url", "http://localhost:8983/solr/collection1"
- };
- assertNull(parser.parseArgs(args, conf, opts));
- assertEmptySystemErrAndEmptySystemOut();
- assertEquals(new Integer(1), opts.shards);
- }
-
- @Test
- public void testArgsShardUrlsAndZkhostAreMutuallyExclusive() throws Exception {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--shard-url", "http://localhost:8983/solr/collection1",
- "--shard-url", "http://localhost:8983/solr/collection1",
- "--zk-host", "http://localhost:2185",
- "--go-live"
- };
- assertArgumentParserException(args);
- }
-
- @Test
- public void testArgsGoLiveAndSolrUrl() {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--shard-url", "http://localhost:8983/solr/collection1",
- "--shard-url", "http://localhost:8983/solr/collection1",
- "--go-live"
- };
- Integer result = parser.parseArgs(args, conf, opts);
- assertNull(result);
- assertEmptySystemErrAndEmptySystemOut();
- }
-
- @Test
- public void testArgsZkHostNoGoLive() throws Exception {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--zk-host", "http://localhost:2185",
- };
- assertArgumentParserException(args);
- }
-
- @Test
- public void testArgsGoLiveZkHostNoCollection() throws Exception {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--zk-host", "http://localhost:2185",
- "--go-live"
- };
- assertArgumentParserException(args);
- }
-
- @Test
- public void testArgsGoLiveNoZkHostOrSolrUrl() throws Exception {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--solr-home-dir", MINIMR_INSTANCE_DIR.getPath(),
- "--go-live"
- };
- assertArgumentParserException(args);
- }
-
- @Test
- public void testNoSolrHomeDirOrZKHost() throws Exception {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--shards", "1",
- };
- assertArgumentParserException(args);
- }
-
- @Test
- public void testZKHostNoSolrHomeDirOk() {
- String[] args = new String[] {
- "--input-list", "file:///tmp",
- "--morphline-file", MORPHLINE_FILE,
- "--output-dir", "file:/tmp/foo",
- "--zk-host", "http://localhost:2185",
- "--collection", "collection1",
- };
- assertNull(parser.parseArgs(args, conf, opts));
- assertEmptySystemErrAndEmptySystemOut();
- }
-
- private void assertEmptySystemErrAndEmptySystemOut() {
- assertEquals(0, bout.toByteArray().length);
- assertEquals(0, berr.toByteArray().length);
- }
-
- private void assertArgumentParserException(String[] args) throws UnsupportedEncodingException {
- assertEquals("should have returned fail code", new Integer(1), parser.parseArgs(args, conf, opts));
- assertEquals("no sys out expected:" + new String(bout.toByteArray(), StandardCharsets.UTF_8), 0, bout.toByteArray().length);
- String usageText;
- usageText = new String(berr.toByteArray(), StandardCharsets.UTF_8);
-
- assertTrue("should start with usage msg \"usage: hadoop \":" + usageText, usageText.startsWith("usage: hadoop "));
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/ac221b96/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java b/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java
deleted file mode 100644
index 6479a20..0000000
--- a/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java
+++ /dev/null
@@ -1,415 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.File;
-import java.io.IOException;
-import java.io.OutputStream;
-import java.io.OutputStreamWriter;
-import java.io.Writer;
-import java.lang.reflect.Array;
-import java.nio.charset.StandardCharsets;
-import java.util.Arrays;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.hadoop.conf.Configuration;
-import org.apache.hadoop.fs.FileSystem;
-import org.apache.hadoop.fs.FileUtil;
-import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.fs.permission.FsPermission;
-import org.apache.hadoop.hdfs.MiniDFSCluster;
-import org.apache.hadoop.mapred.JobConf;
-import org.apache.hadoop.mapreduce.Job;
-import org.apache.hadoop.security.authorize.ProxyUsers;
-import org.apache.hadoop.util.JarFinder;
-import org.apache.hadoop.util.ToolRunner;
-import org.apache.hadoop.yarn.conf.YarnConfiguration;
-import org.apache.lucene.util.Constants;
-import org.apache.lucene.util.LuceneTestCase.AwaitsFix;
-import org.apache.lucene.util.LuceneTestCase.Slow;
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.cloud.AbstractZkTestCase;
-import org.apache.solr.hadoop.hack.MiniMRCluster;
-import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
-import org.apache.solr.util.BadHdfsThreadsFilter;
-import org.apache.solr.util.BadMrClusterThreadsFilter;
-import org.junit.After;
-import org.junit.AfterClass;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-import com.carrotsearch.randomizedtesting.annotations.Nightly;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction.Action;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakFilters;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakLingering;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies;
-import com.carrotsearch.randomizedtesting.annotations.ThreadLeakZombies.Consequence;
-
-@ThreadLeakAction({Action.WARN})
-@ThreadLeakLingering(linger = 0)
-@ThreadLeakZombies(Consequence.CONTINUE)
-@ThreadLeakFilters(defaultFilters = true, filters = {
- BadHdfsThreadsFilter.class, BadMrClusterThreadsFilter.class // hdfs currently leaks thread(s)
-})
-@Slow
-@Nightly
-@AwaitsFix(bugUrl = "https://issues.apache.org/jira/browse/SOLR-9076")
-public class MorphlineBasicMiniMRTest extends SolrTestCaseJ4 {
-
- private static final boolean ENABLE_LOCAL_JOB_RUNNER = false; // for debugging only
- private static final String RESOURCES_DIR = getFile("morphlines-core.marker").getParent();
- private static final String DOCUMENTS_DIR = RESOURCES_DIR + "/test-documents";
- private static final File MINIMR_CONF_DIR = new File(RESOURCES_DIR + "/solr/minimr");
-
- private static String SEARCH_ARCHIVES_JAR;
-
- private static MiniDFSCluster dfsCluster = null;
- private static MiniMRCluster mrCluster = null;
- private static int numRuns = 0;
-
- private final String inputAvroFile;
- private final int count;
-
- private static String tempDir;
-
- private static File solrHomeDirectory;
-
- protected MapReduceIndexerTool createTool() {
- return new MapReduceIndexerTool();
- }
-
- public MorphlineBasicMiniMRTest() {
- int data = random().nextInt(3);
- switch (data) {
- case 0:
- this.inputAvroFile = "sample-statuses-20120906-141433.avro";
- this.count = 2;
- break;
- case 1:
- this.inputAvroFile = "sample-statuses-20120521-100919.avro";
- this.count = 20;
- break;
- case 2:
- this.inputAvroFile = "sample-statuses-20120906-141433-medium.avro";
- this.count = 2104;
- break;
- default:
- throw new RuntimeException("Test setup is broken");
- }
-
- }
-
- @BeforeClass
- public static void setupClass() throws Exception {
- solrHomeDirectory = createTempDir().toFile();
-
- assumeFalse("HDFS tests were disabled by -Dtests.disableHdfs",
- Boolean.parseBoolean(System.getProperty("tests.disableHdfs", "false")));
-
- assumeFalse("FIXME: This test does not work with Windows because of native library requirements", Constants.WINDOWS);
-
- AbstractZkTestCase.SOLRHOME = solrHomeDirectory;
- FileUtils.copyDirectory(MINIMR_CONF_DIR, solrHomeDirectory);
- File dataDir = createTempDir().toFile();
- tempDir = dataDir.getAbsolutePath();
- new File(tempDir).mkdirs();
- FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml"));
-
- AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", true);
-
- System.setProperty("hadoop.log.dir", new File(solrHomeDirectory, "logs").getAbsolutePath());
-
- int taskTrackers = 1;
- int dataNodes = 2;
-// String proxyUser = System.getProperty("user.name");
-// String proxyGroup = "g";
-// StringBuilder sb = new StringBuilder();
-// sb.append("127.0.0.1,localhost");
-// for (InetAddress i : InetAddress.getAllByName(InetAddress.getLocalHost().getHostName())) {
-// sb.append(",").append(i.getCanonicalHostName());
-// }
-
- new File(dataDir, "nm-local-dirs").mkdirs();
-
- System.setProperty("solr.hdfs.blockcache.enabled", "false");
-
- System.setProperty("test.build.dir", dataDir + File.separator + "hdfs" + File.separator + "test-build-dir");
- System.setProperty("test.build.data", dataDir + File.separator + "hdfs" + File.separator + "build");
- System.setProperty("test.cache.data", dataDir + File.separator + "hdfs" + File.separator + "cache");
-
- // Initialize AFTER test.build.dir is set, JarFinder uses it.
- SEARCH_ARCHIVES_JAR = JarFinder.getJar(MapReduceIndexerTool.class);
-
- JobConf conf = new JobConf();
- conf.set("dfs.block.access.token.enable", "false");
- conf.set("dfs.permissions", "true");
- conf.set("hadoop.security.authentication", "simple");
- conf.set(YarnConfiguration.NM_LOCAL_DIRS, dataDir.getPath() + File.separator + "nm-local-dirs");
- conf.set(YarnConfiguration.DEFAULT_NM_LOG_DIRS, dataDir + File.separator + "nm-logs");
- conf.set("testWorkDir", dataDir.getPath() + File.separator + "testWorkDir");
- conf.set("mapreduce.jobhistory.minicluster.fixed.ports", "false");
- conf.set("mapreduce.jobhistory.admin.address", "0.0.0.0:0");
-
- dfsCluster = new MiniDFSCluster(conf, dataNodes, true, null);
- FileSystem fileSystem = dfsCluster.getFileSystem();
- fileSystem.mkdirs(new Path("/tmp"));
- fileSystem.mkdirs(new Path("/user"));
- fileSystem.mkdirs(new Path("/hadoop/mapred/system"));
- fileSystem.setPermission(new Path("/tmp"), FsPermission.valueOf("-rwxrwxrwx"));
- fileSystem.setPermission(new Path("/user"), FsPermission.valueOf("-rwxrwxrwx"));
- fileSystem.setPermission(new Path("/hadoop/mapred/system"), FsPermission.valueOf("-rwx------"));
- String nnURI = fileSystem.getUri().toString();
- int numDirs = 1;
- String[] racks = null;
- String[] hosts = null;
-
- mrCluster = new MiniMRCluster(0, 0, taskTrackers, nnURI, numDirs, racks, hosts, null, conf);
- ProxyUsers.refreshSuperUserGroupsConfiguration(conf);
- }
-
- @AfterClass
- public static void teardownClass() throws Exception {
- System.clearProperty("solr.hdfs.blockcache.enabled");
- System.clearProperty("test.build.dir");
- System.clearProperty("test.build.data");
- System.clearProperty("test.cache.data");
-
- if (mrCluster != null) {
- mrCluster.shutdown();
- mrCluster = null;
- }
- if (dfsCluster != null) {
- dfsCluster.shutdown();
- dfsCluster = null;
- }
-
- FileSystem.closeAll();
- }
-
- @After
- public void tearDown() throws Exception {
- System.clearProperty("hadoop.log.dir");
- System.clearProperty("solr.hdfs.blockcache.enabled");
-
- super.tearDown();
- }
-
- private JobConf getJobConf() {
- return mrCluster.createJobConf();
- }
-
- @Test
- public void testPathParts() throws Exception { // see PathParts
- FileSystem fs = dfsCluster.getFileSystem();
- int dfsClusterPort = fs.getWorkingDirectory().toUri().getPort();
- assertTrue(dfsClusterPort > 0);
- JobConf jobConf = getJobConf();
- Configuration simpleConf = new Configuration();
-
- for (Configuration conf : Arrays.asList(jobConf, simpleConf)) {
- for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) {
- for (String up : Arrays.asList("", "../")) {
- String down = up.length() == 0 ? "foo/" : "";
- String uploadURL = "hdfs://localhost:12345/user/foo/" + up + "bar.txt" + queryAndFragment;
- PathParts parts = new PathParts(uploadURL, conf);
- assertEquals(uploadURL, parts.getUploadURL());
- assertEquals("/user/" + down + "bar.txt", parts.getURIPath());
- assertEquals("bar.txt", parts.getName());
- assertEquals("hdfs", parts.getScheme());
- assertEquals("localhost", parts.getHost());
- assertEquals(12345, parts.getPort());
- assertEquals("hdfs://localhost:12345/user/" + down + "bar.txt", parts.getId());
- assertEquals(parts.getId(), parts.getDownloadURL());
- assertFileNotFound(parts);
-
- uploadURL = "hdfs://localhost/user/foo/" + up + "bar.txt" + queryAndFragment;
- parts = new PathParts(uploadURL, conf);
- assertEquals(uploadURL, parts.getUploadURL());
- assertEquals("/user/" + down + "bar.txt", parts.getURIPath());
- assertEquals("bar.txt", parts.getName());
- assertEquals("hdfs", parts.getScheme());
- assertEquals("localhost", parts.getHost());
- assertEquals(8020, parts.getPort());
- assertEquals("hdfs://localhost:8020/user/" + down + "bar.txt", parts.getId());
- assertEquals(parts.getId(), parts.getDownloadURL());
- assertFileNotFound(parts);
- }
- }
- }
-
- for (Configuration conf : Arrays.asList(jobConf)) {
- for (String queryAndFragment : Arrays.asList("", "?key=value#fragment")) {
- for (String up : Arrays.asList("", "../")) {
- // verify using absolute path
- String down = up.length() == 0 ? "foo/" : "";
- String uploadURL = "/user/foo/" + up + "bar.txt" + queryAndFragment;
- PathParts parts = new PathParts(uploadURL, conf);
- assertEquals(uploadURL, parts.getUploadURL());
- assertEquals("/user/" + down + "bar.txt", parts.getURIPath());
- assertEquals("bar.txt", parts.getName());
- assertEquals("hdfs", parts.getScheme());
- assertTrue("localhost".equals(parts.getHost()) || "localhost.localdomain".equals(parts.getHost()));
- assertEquals(dfsClusterPort, parts.getPort());
- assertTrue(parts.getId().equals("hdfs://localhost:" + dfsClusterPort + "/user/" + down + "bar.txt")
- || parts.getId().equals("hdfs://localhost.localdomain:" + dfsClusterPort + "/user/" + down + "bar.txt")
- );
- assertFileNotFound(parts);
-
- // verify relative path is interpreted to be relative to user's home dir and resolved to an absolute path
- uploadURL = "xuser/foo/" + up + "bar.txt" + queryAndFragment;
- parts = new PathParts(uploadURL, conf);
- assertEquals(uploadURL, parts.getUploadURL());
- String homeDir = "/user/" + System.getProperty("user.name");
- assertEquals(homeDir + "/xuser/" + down + "bar.txt", parts.getURIPath());
- assertEquals("bar.txt", parts.getName());
- assertEquals("hdfs", parts.getScheme());
- assertTrue("localhost".equals(parts.getHost()) || "localhost.localdomain".equals(parts.getHost()));
- assertEquals(dfsClusterPort, parts.getPort());
- assertTrue(parts.getId().equals("hdfs://localhost:" + dfsClusterPort + homeDir + "/xuser/" + down + "bar.txt")
- || parts.getId().equals("hdfs://localhost.localdomain:" + dfsClusterPort + homeDir + "/xuser/" + down + "bar.txt")
- );
- assertFileNotFound(parts);
- }
- }
- }
-
- try {
- new PathParts("/user/foo/bar.txt", simpleConf);
- fail("host/port resolution requires minimr conf, not a simple conf");
- } catch (IllegalArgumentException e) {
- ; // expected
- }
- }
-
- private void assertFileNotFound(PathParts parts) {
- try {
- parts.getFileSystem().getFileStatus(parts.getUploadPath());
- fail();
- } catch (IOException e) {
- ; // expected
- }
- }
-
- @Test
- public void mrRun() throws Exception {
- FileSystem fs = dfsCluster.getFileSystem();
- Path inDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/input"));
- fs.delete(inDir, true);
- String DATADIR = "/user/testing/testMapperReducer/data";
- Path dataDir = fs.makeQualified(new Path(DATADIR));
- fs.delete(dataDir, true);
- Path outDir = fs.makeQualified(new Path("/user/testing/testMapperReducer/output"));
- fs.delete(outDir, true);
-
- assertTrue(fs.mkdirs(inDir));
- Path INPATH = new Path(inDir, "input.txt");
- OutputStream os = fs.create(INPATH);
- Writer wr = new OutputStreamWriter(os, StandardCharsets.UTF_8);
- wr.write(DATADIR + "/" + inputAvroFile);
- wr.close();
-
- assertTrue(fs.mkdirs(dataDir));
- fs.copyFromLocalFile(new Path(DOCUMENTS_DIR, inputAvroFile), dataDir);
-
- JobConf jobConf = getJobConf();
- jobConf.set("jobclient.output.filter", "ALL");
- if (ENABLE_LOCAL_JOB_RUNNER) { // enable Hadoop LocalJobRunner; this enables to run in debugger and set breakpoints
- jobConf.set("mapred.job.tracker", "local");
- }
- jobConf.setMaxMapAttempts(1);
- jobConf.setMaxReduceAttempts(1);
- jobConf.setJar(SEARCH_ARCHIVES_JAR);
-
- int shards = 2;
- int maxReducers = Integer.MAX_VALUE;
- if (ENABLE_LOCAL_JOB_RUNNER) {
- // local job runner has a couple of limitations: only one reducer is supported and the DistributedCache doesn't work.
- // see http://blog.cloudera.com/blog/2009/07/advice-on-qa-testing-your-mapreduce-jobs/
- maxReducers = 1;
- shards = 1;
- }
-
- String[] args = new String[] {
- "--morphline-file=" + tempDir + "/test-morphlines/solrCellDocumentTypes.conf",
- "--morphline-id=morphline1",
- "--solr-home-dir=" + MINIMR_CONF_DIR.getAbsolutePath(),
- "--output-dir=" + outDir.toString(),
- "--shards=" + shards,
- "--verbose",
- numRuns % 2 == 0 ? "--input-list=" + INPATH.toString() : dataDir.toString(),
- numRuns % 3 == 0 ? "--reducers=" + shards : (numRuns % 3 == 1 ? "--reducers=-1" : "--reducers=" + Math.min(8, maxReducers))
- };
- if (numRuns % 3 == 2) {
- args = concat(args, new String[] {"--fanout=2"});
- }
- if (numRuns == 0) {
- // force (slow) MapReduce based randomization to get coverage for that as well
- args = concat(new String[] {"-D", MapReduceIndexerTool.MAIN_MEMORY_RANDOMIZATION_THRESHOLD + "=-1"}, args);
- }
- MapReduceIndexerTool tool = createTool();
- int res = ToolRunner.run(jobConf, tool, args);
- assertEquals(0, res);
- Job job = tool.job;
- assertTrue(job.isComplete());
- assertTrue(job.isSuccessful());
-
- if (numRuns % 3 != 2) {
- // Only run this check if mtree merge is disabled.
- // With mtree merge enabled the BatchWriter counters aren't available anymore because
- // variable "job" now refers to the merge job rather than the indexing job
- assertEquals("Invalid counter " + SolrRecordWriter.class.getName() + "." + SolrCounters.DOCUMENTS_WRITTEN,
- count, job.getCounters().findCounter(SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString()).getValue());
- }
-
- // Check the output is as expected
- outDir = new Path(outDir, MapReduceIndexerTool.RESULTS_DIR);
- Path[] outputFiles = FileUtil.stat2Paths(fs.listStatus(outDir));
-
- System.out.println("outputfiles:" + Arrays.toString(outputFiles));
-
- UtilsForTests.validateSolrServerDocumentCount(MINIMR_CONF_DIR, fs, outDir, count, shards);
-
- // run again with --dryrun mode:
- tool = createTool();
- args = concat(args, new String[] {"--dry-run"});
- res = ToolRunner.run(jobConf, tool, args);
- assertEquals(0, res);
-
- numRuns++;
- }
-
- protected static <T> T[] concat(T[]... arrays) {
- if (arrays.length <= 0) {
- throw new IllegalArgumentException();
- }
- Class clazz = null;
- int length = 0;
- for (T[] array : arrays) {
- clazz = array.getClass();
- length += array.length;
- }
- T[] result = (T[]) Array.newInstance(clazz.getComponentType(), length);
- int pos = 0;
- for (T[] array : arrays) {
- System.arraycopy(array, 0, result, pos, array.length);
- pos += array.length;
- }
- return result;
- }
-
-}