You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2017/03/24 16:37:11 UTC
[30/62] lucene-solr:master: SOLR-9221: Remove Solr contribs: map-reduce, morphlines-core and morphlines-cell

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/BatchWriter.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/BatchWriter.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/BatchWriter.java
deleted file mode 100644
index 24458d5..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/BatchWriter.java
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskID;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
-import org.apache.solr.client.solrj.response.UpdateResponse;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.util.ExecutorUtil;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Locale;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-
-/**
- * Enables adding batches of documents to an EmbeddedSolrServer.
- */
-class BatchWriter {
-  
-  private final EmbeddedSolrServer solr;
-  private volatile Exception batchWriteException = null;
-  
-  private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
-  public Exception getBatchWriteException() {
-    return batchWriteException;
-  }
-
-  public void setBatchWriteException(Exception batchWriteException) {
-    this.batchWriteException = batchWriteException;
-  }
-
-  /** The number of writing threads. */
-  final int writerThreads;
-
-  /** Queue Size */
-  final int queueSize;
-
-  private final ThreadPoolExecutor batchPool;
-
-  private TaskID taskId = null;
-
-  /**
-   * The number of in progress batches, must be zero before the close can
-   * actually start closing
-   */
-  AtomicInteger executingBatches = new AtomicInteger(0);
-
-  /**
-   * Create the batch writer object, set the thread to daemon mode, and start
-   * it.
-   * 
-   */
-
-  final class Batch implements Runnable {
-    
-    private List<SolrInputDocument> documents;
-    private UpdateResponse result;
-
-    public Batch(Collection<SolrInputDocument> batch) {
-      documents = new ArrayList<>(batch);
-    }
-
-    public void run() {
-      try {
-        executingBatches.getAndIncrement();
-        result = runUpdate(documents);
-      } finally {
-        executingBatches.getAndDecrement();
-      }
-    }
-
-    protected List<SolrInputDocument> getDocuments() {
-      return documents;
-    }
-
-    protected void setDocuments(List<SolrInputDocument> documents) {
-      this.documents = documents;
-    }
-
-    protected UpdateResponse getResult() {
-      return result;
-    }
-
-    protected void setResult(UpdateResponse result) {
-      this.result = result;
-    }
-
-    protected void reset(List<SolrInputDocument> documents) {
-      if (this.documents == null) {
-        this.documents = new ArrayList<>(documents);
-      } else {
-        this.documents.clear();
-        this.documents.addAll(documents);
-      }
-      result = null;
-    }
-
-    protected void reset(SolrInputDocument document) {
-      if (this.documents == null) {
-        this.documents = new ArrayList<>();
-      } else {
-        this.documents.clear();
-      }
-      this.documents.add(document);
-      result = null;
-    }
-  }
-
-  protected UpdateResponse runUpdate(List<SolrInputDocument> batchToWrite) {
-    try {
-      UpdateResponse result = solr.add(batchToWrite);
-      SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.BATCHES_WRITTEN.toString(), 1);      
-      SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString(), batchToWrite.size());
-      if (LOG.isDebugEnabled()) {
-        SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.BATCH_WRITE_TIME.toString(), result.getElapsedTime());
-      }
-      return result;
-    } catch (Throwable e) {
-      if (e instanceof Exception) {
-        setBatchWriteException((Exception) e);
-      } else {
-        setBatchWriteException(new Exception(e));
-      }
-      SolrRecordWriter.incrementCounter(taskId, getClass().getName() + ".errors", e.getClass().getName(), 1);
-      LOG.error("Unable to process batch", e);
-      return null;
-    }
-  }
-
-
-  public BatchWriter(EmbeddedSolrServer solr, int batchSize, TaskID tid,
-      int writerThreads, int queueSize) {
-    this.solr = solr;
-    this.writerThreads = writerThreads;
-    this.queueSize = queueSize;
-    taskId = tid;
-
-    // we need to obtain the settings before the constructor
-    if (writerThreads != 0) {
-      batchPool = new ExecutorUtil.MDCAwareThreadPoolExecutor(writerThreads, writerThreads, 5,
-          TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(queueSize),
-          new ThreadPoolExecutor.CallerRunsPolicy());
-    } else { // single threaded case
-      batchPool = null;
-    }
-  }
-
-  public void queueBatch(Collection<SolrInputDocument> batch)
-      throws IOException, SolrServerException {
-
-    throwIf();
-    Batch b = new Batch(batch);
-    if (batchPool != null) {
-      batchPool.execute(b);
-    } else { // single threaded case
-      b.run();
-      throwIf();
-    }
-  }
-
-  public synchronized void close(TaskAttemptContext context)
-      throws InterruptedException, SolrServerException, IOException {
-
-    if (batchPool != null) {
-      context.setStatus("Waiting for batches to complete");
-      batchPool.shutdown();
-  
-      while (!batchPool.isTerminated()) {
-        LOG.info(String.format(Locale.ENGLISH, 
-            "Waiting for %d items and %d threads to finish executing", batchPool
-                .getQueue().size(), batchPool.getActiveCount()));
-        batchPool.awaitTermination(5, TimeUnit.SECONDS);
-      }
-    }
-    context.setStatus("Committing Solr Phase 1");
-    solr.commit(true, false);
-    context.setStatus("Optimizing Solr");
-    int maxSegments = context.getConfiguration().getInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, 1);
-    LOG.info("Optimizing Solr: forcing merge down to {} segments", maxSegments);
-    long start = System.nanoTime();
-    solr.optimize(true, false, maxSegments);
-    context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_REDUCER_MERGE_TIME.toString()).increment(System.nanoTime() - start);
-    float secs = (System.nanoTime() - start) / (float)(10^9);
-    LOG.info("Optimizing Solr: done forcing merge down to {} segments in {} secs", maxSegments, secs);
-    context.setStatus("Committing Solr Phase 2");
-    solr.commit(true, false);
-    context.setStatus("Shutting down Solr");
-    solr.close();
-  }
-
-  /**
-   * Throw a legal exception if a previous batch write had an exception. The
-   * previous state is cleared. Uses {@link #batchWriteException} for the state
-   * from the last exception.
-   * 
-   * This will loose individual exceptions if the exceptions happen rapidly.
-   * 
-   * @throws IOException On low level IO error
-   * @throws SolrServerException On Solr Exception
-   */
-  private void throwIf() throws IOException, SolrServerException {
-
-    final Exception last = batchWriteException;
-    batchWriteException = null;
-
-    if (last == null) {
-      return;
-    }
-    if (last instanceof SolrServerException) {
-      throw (SolrServerException) last;
-    }
-    if (last instanceof IOException) {
-      throw (IOException) last;
-    }
-    throw new IOException("Batch Write Failure", last);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataInputInputStream.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataInputInputStream.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataInputInputStream.java
deleted file mode 100644
index d1d46bc..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataInputInputStream.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.DataInput;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-/**
- * An InputStream that wraps a DataInput.
- * @see DataOutputOutputStream
- */
-@InterfaceAudience.Private
-public class DataInputInputStream extends InputStream {
-
-  private DataInput in;
-
-  /**
-   * Construct an InputStream from the given DataInput. If 'in'
-   * is already an InputStream, simply returns it. Otherwise, wraps
-   * it in an InputStream.
-   * @param in the DataInput to wrap
-   * @return an InputStream instance that reads from 'in'
-   */
-  public static InputStream constructInputStream(DataInput in) {
-    if (in instanceof InputStream) {
-      return (InputStream)in;
-    } else {
-      return new DataInputInputStream(in);
-    }
-  }
-
-
-  public DataInputInputStream(DataInput in) {
-    this.in = in;
-  }
-
-  @Override
-  public int read() throws IOException {
-    return in.readUnsignedByte();
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java
deleted file mode 100644
index 389c52a..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.OutputStream;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-/**
- * OutputStream implementation that wraps a DataOutput.
- */
-@InterfaceAudience.Private
-public class DataOutputOutputStream extends OutputStream {
-
-  private final DataOutput out;
-
-  /**
-   * Construct an OutputStream from the given DataOutput. If 'out'
-   * is already an OutputStream, simply returns it. Otherwise, wraps
-   * it in an OutputStream.
-   * @param out the DataOutput to wrap
-   * @return an OutputStream instance that outputs to 'out'
-   */
-  public static OutputStream constructOutputStream(DataOutput out) {
-    if (out instanceof OutputStream) {
-      return (OutputStream)out;
-    } else {
-      return new DataOutputOutputStream(out);
-    }
-  }
-
-  private DataOutputOutputStream(DataOutput out) {
-    this.out = out;
-  }
-
-  @Override
-  public void write(int b) throws IOException {
-    out.writeByte(b);
-  }
-
-  @Override
-  public void write(byte[] b, int off, int len) throws IOException {
-    out.write(b, off, len);
-  }
-
-  @Override
-  public void write(byte[] b) throws IOException {
-    out.write(b);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java
deleted file mode 100644
index bacf1d0..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import org.apache.solr.client.solrj.response.SolrPingResponse;
-import org.apache.solr.client.solrj.response.UpdateResponse;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.morphlines.solr.DocumentLoader;
-
-/**
- * Prints documents to stdout instead of loading them into Solr for quicker turnaround during early
- * trial & debug sessions.
- */
-final class DryRunDocumentLoader implements DocumentLoader {
-
-  @Override
-  public void beginTransaction() {
-  }
-
-  @Override
-  public void load(SolrInputDocument doc) {
-    System.out.println("dryrun: " + doc);
-  }
-
-  @Override
-  public void commitTransaction() {
-  }
-
-  @Override
-  public UpdateResponse rollbackTransaction() {
-    return new UpdateResponse();
-  }
-
-  @Override
-  public void shutdown() {
-  }
-
-  @Override
-  public SolrPingResponse ping() {
-    return new SolrPingResponse();
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/GoLive.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/GoLive.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/GoLive.java
deleted file mode 100644
index 5b1c343..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/GoLive.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.Callable;
-import java.util.concurrent.CompletionService;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorCompletionService;
-import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.impl.CloudSolrClient;
-import org.apache.solr.client.solrj.impl.HttpSolrClient;
-import org.apache.solr.client.solrj.request.CoreAdminRequest;
-import org.apache.solr.common.util.ExecutorUtil;
-import org.apache.solr.hadoop.MapReduceIndexerTool.Options;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * The optional (parallel) GoLive phase merges the output shards of the previous
- * phase into a set of live customer facing Solr servers, typically a SolrCloud.
- */
-class GoLive {
-
-  private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-  
-  // TODO: handle clusters with replicas
-  public boolean goLive(Options options, FileStatus[] outDirs) {
-    LOG.info("Live merging of output shards into Solr cluster...");
-    boolean success = false;
-    long start = System.nanoTime();
-    int concurrentMerges = options.goLiveThreads;
-    ThreadPoolExecutor executor = new ExecutorUtil.MDCAwareThreadPoolExecutor(concurrentMerges,
-        concurrentMerges, 1, TimeUnit.SECONDS,
-        new LinkedBlockingQueue<Runnable>());
-    
-    try {
-      CompletionService<Request> completionService = new ExecutorCompletionService<>(executor);
-      Set<Future<Request>> pending = new HashSet<>();
-      int cnt = -1;
-      for (final FileStatus dir : outDirs) {
-        
-        LOG.debug("processing: " + dir.getPath());
-
-        cnt++;
-        List<String> urls = options.shardUrls.get(cnt);
-        
-        for (String url : urls) {
-          
-          String baseUrl = url;
-          if (baseUrl.endsWith("/")) {
-            baseUrl = baseUrl.substring(0, baseUrl.length() - 1);
-          }
-          
-          int lastPathIndex = baseUrl.lastIndexOf("/");
-          if (lastPathIndex == -1) {
-            LOG.error("Found unexpected shardurl, live merge failed: " + baseUrl);
-            return false;
-          }
-          
-          final String name = baseUrl.substring(lastPathIndex + 1);
-          baseUrl = baseUrl.substring(0, lastPathIndex);
-          final String mergeUrl = baseUrl;
-          
-          Callable<Request> task = () -> {
-            Request req = new Request();
-            LOG.info("Live merge " + dir.getPath() + " into " + mergeUrl);
-            try (final HttpSolrClient client = new HttpSolrClient.Builder(mergeUrl).build()) {
-              CoreAdminRequest.MergeIndexes mergeRequest = new CoreAdminRequest.MergeIndexes();
-              mergeRequest.setCoreName(name);
-              mergeRequest.setIndexDirs(Arrays.asList(dir.getPath().toString() + "/data/index"));
-              mergeRequest.process(client);
-              req.success = true;
-            } catch (SolrServerException | IOException e) {
-              req.e = e;
-            }
-            return req;
-          };
-          pending.add(completionService.submit(task));
-        }
-      }
-      
-      while (pending != null && pending.size() > 0) {
-        try {
-          Future<Request> future = completionService.take();
-          if (future == null) break;
-          pending.remove(future);
-          
-          try {
-            Request req = future.get();
-            
-            if (!req.success) {
-              // failed
-              LOG.error("A live merge command failed", req.e);
-              return false;
-            }
-            
-          } catch (ExecutionException e) {
-            LOG.error("Error sending live merge command", e);
-            return false;
-          }
-          
-        } catch (InterruptedException e) {
-          Thread.currentThread().interrupt();
-          LOG.error("Live merge process interrupted", e);
-          return false;
-        }
-      }
-      
-      cnt = -1;
-      
-      
-      try {
-        LOG.info("Committing live merge...");
-        if (options.zkHost != null) {
-          try (CloudSolrClient server = new CloudSolrClient.Builder().withZkHost(options.zkHost).build()) {
-            server.setDefaultCollection(options.collection);
-            server.commit();
-          }
-        } else {
-          for (List<String> urls : options.shardUrls) {
-            for (String url : urls) {
-              // TODO: we should do these concurrently
-              try (HttpSolrClient server = new HttpSolrClient.Builder(url).build()) {
-                server.commit();
-              }
-            }
-          }
-        }
-        LOG.info("Done committing live merge");
-      } catch (Exception e) {
-        LOG.error("Error sending commits to live Solr cluster", e);
-        return false;
-      }
-
-      success = true;
-      return true;
-    } finally {
-      ExecutorUtil.shutdownAndAwaitTermination(executor);
-      float secs = (System.nanoTime() - start) / (float)(10^9);
-      LOG.info("Live merging of index shards into Solr cluster took " + secs + " secs");
-      if (success) {
-        LOG.info("Live merging completed successfully");
-      } else {
-        LOG.info("Live merging failed");
-      }
-    }
-    
-    // if an output dir does not exist, we should fail and do no merge?
-  }
-  
-  private static final class Request {
-    Exception e;
-    boolean success = false;
-  }
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java
deleted file mode 100644
index c9eaef6..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-
-/**
- * Solr field names for metadata of an HDFS file.
- */
-public interface HdfsFileFieldNames {
-
-  public static final String FILE_UPLOAD_URL = "file_upload_url";
-  public static final String FILE_DOWNLOAD_URL = "file_download_url";
-  public static final String FILE_SCHEME = "file_scheme";
-  public static final String FILE_HOST = "file_host";
-  public static final String FILE_PORT = "file_port";
-  public static final String FILE_PATH = "file_path";
-  public static final String FILE_NAME = "file_name";
-  public static final String FILE_LENGTH = "file_length";
-  public static final String FILE_LAST_MODIFIED = "file_last_modified";
-  public static final String FILE_OWNER = "file_owner";
-  public static final String FILE_GROUP = "file_group";
-  public static final String FILE_PERMISSIONS_USER = "file_permissions_user";
-  public static final String FILE_PERMISSIONS_GROUP = "file_permissions_group";
-  public static final String FILE_PERMISSIONS_OTHER = "file_permissions_other";
-  public static final String FILE_PERMISSIONS_STICKYBIT = "file_permissions_stickybit";
-
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HeartBeater.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HeartBeater.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HeartBeater.java
deleted file mode 100644
index 66cd3dc..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HeartBeater.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.lang.invoke.MethodHandles;
-import java.util.Locale;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-import org.apache.hadoop.util.Progressable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * This class runs a background thread that once every 60 seconds checks to see if
- * a progress report is needed. If a report is needed it is issued.
- * 
- * A simple counter {@link #threadsNeedingHeartBeat} handles the number of
- * threads requesting a heart beat.
- * 
- * The expected usage pattern is
- * 
- * <pre>
- *  try {
- *       heartBeater.needHeartBeat();
- *       do something that may take a while
- *    } finally {
- *       heartBeater.cancelHeartBeat();
- *    }
- * </pre>
- * 
- * 
- */
-public class HeartBeater extends Thread {
-  
-  private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
-  /**
-   * count of threads asking for heart beat, at 0 no heart beat done. This could
-   * be an atomic long but then missmatches in need/cancel could result in
-   * negative counts.
-   */
-  private volatile int threadsNeedingHeartBeat = 0;
-
-  private Progressable progress;
-
-  /**
-   * The amount of time to wait between checks for the need to issue a heart
-   * beat. In milliseconds.
-   */
-  private final long waitTimeMs = TimeUnit.MILLISECONDS.convert(60, TimeUnit.SECONDS);
-  
-  private final CountDownLatch isClosing = new CountDownLatch(1);
-
-  /**
-   * Create the heart beat object thread set it to daemon priority and start the
-   * thread. When the count in {@link #threadsNeedingHeartBeat} is positive, the
-   * heart beat will be issued on the progress object every 60 seconds.
-   */
-  public HeartBeater(Progressable progress) {
-    setDaemon(true);
-    this.progress = progress;
-    LOG.info("Heart beat reporting class is " + progress.getClass().getName());
-    start();
-  }
-
-  public Progressable getProgress() {
-    return progress;
-  }
-
-  public void setProgress(Progressable progress) {
-    this.progress = progress;
-  }
-
-  @Override
-  public void run() {
-    LOG.info("HeartBeat thread running");
-    while (true) {
-      try {
-        synchronized (this) {
-          if (threadsNeedingHeartBeat > 0) {
-            progress.progress();
-            if (LOG.isInfoEnabled()) {
-              LOG.info(String.format(Locale.ENGLISH, "Issuing heart beat for %d threads",
-                  threadsNeedingHeartBeat));
-            }
-          } else {
-            if (LOG.isInfoEnabled()) {
-              LOG.info(String.format(Locale.ENGLISH, "heartbeat skipped count %d",
-                  threadsNeedingHeartBeat));
-            }
-          }
-        }
-        if (isClosing.await(waitTimeMs, TimeUnit.MILLISECONDS)) {
-          return;
-        }
-      } catch (Throwable e) {
-        LOG.error("HeartBeat throwable", e);
-      }
-    }
-  }
-
-  /**
-   * inform the background thread that heartbeats are to be issued. Issue a
-   * heart beat also
-   */
-  public synchronized void needHeartBeat() {
-    threadsNeedingHeartBeat++;
-    // Issue a progress report right away,
-    // just in case the the cancel comes before the background thread issues a
-    // report.
-    // If enough cases like this happen the 600 second timeout can occur
-    progress.progress();
-    if (threadsNeedingHeartBeat == 1) {
-      // this.notify(); // wake up the heartbeater
-    }
-  }
-
-  /**
-   * inform the background thread that this heartbeat request is not needed.
-   * This must be called at some point after each {@link #needHeartBeat()}
-   * request.
-   */
-  public synchronized void cancelHeartBeat() {
-    if (threadsNeedingHeartBeat > 0) {
-      threadsNeedingHeartBeat--;
-    } else {
-      Exception e = new Exception("Dummy");
-      e.fillInStackTrace();
-      LOG.warn("extra call to cancelHeartBeat", e);
-    }
-  }
-
-  public void setStatus(String status) {
-    if (progress instanceof TaskInputOutputContext) {
-      ((TaskInputOutputContext<?,?,?,?>) progress).setStatus(status);
-    }
-  }
-  
-  /** Releases any resources */
-  public void close() {
-    isClosing.countDown();
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java
deleted file mode 100644
index 6f2638c..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.Random;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * MR Mapper that randomizing a list of URLs.
- * 
- * Mapper input is (offset, URL) pairs. Each such pair indicates a file to
- * index.
- * 
- * Mapper output is (randomPosition, URL) pairs. The reducer receives these
- * pairs sorted by randomPosition.
- */
-public class LineRandomizerMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
-
-  private Random random;
-  
-  private static final Logger LOGGER = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
-  @Override
-  protected void setup(Context context) throws IOException, InterruptedException {
-    super.setup(context);
-    random = createRandom(context);
-  }
-
-  @Override
-  protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
-    LOGGER.debug("map key: {}, value: {}", key, value);
-    context.write(new LongWritable(random.nextLong()), value);
-  }
-  
-  private Random createRandom(Context context) {
-    long taskId = 0;
-    if (context.getTaskAttemptID() != null) { // MRUnit returns null
-      LOGGER.debug("context.getTaskAttemptID().getId(): {}", context.getTaskAttemptID().getId());
-      LOGGER.debug("context.getTaskAttemptID().getTaskID().getId(): {}", context.getTaskAttemptID().getTaskID().getId());
-      taskId = context.getTaskAttemptID().getTaskID().getId(); // taskId = 0, 1, ..., N
-    }
-    // create a good random seed, yet ensure deterministic PRNG sequence for easy reproducability
-    return new Random(421439783L * (taskId + 1));
-  }
-
-}
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java
deleted file mode 100644
index cbe194a..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * MR Reducer that randomizing a list of URLs.
- * 
- * Reducer input is (randomPosition, URL) pairs. Each such pair indicates a file
- * to index.
- * 
- * Reducer output is a list of URLs, each URL in a random position.
- */
-public class LineRandomizerReducer extends Reducer<LongWritable, Text, Text, NullWritable> {
-
-  private static final Logger LOGGER = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
-  @Override
-  protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
-    for (Text value : values) {
-      LOGGER.debug("reduce key: {}, value: {}", key, value);
-      context.write(value, NullWritable.get());
-    }
-  }
-}
\ No newline at end of file