You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2017/03/24 16:37:11 UTC
[30/62] lucene-solr:master: SOLR-9221: Remove Solr contribs:
map-reduce, morphlines-core and morphlines-cell
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/BatchWriter.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/BatchWriter.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/BatchWriter.java
deleted file mode 100644
index 24458d5..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/BatchWriter.java
+++ /dev/null
@@ -1,243 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import org.apache.hadoop.mapreduce.TaskAttemptContext;
-import org.apache.hadoop.mapreduce.TaskID;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.embedded.EmbeddedSolrServer;
-import org.apache.solr.client.solrj.response.UpdateResponse;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.util.ExecutorUtil;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.ArrayList;
-import java.util.Collection;
-import java.util.List;
-import java.util.Locale;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-import java.util.concurrent.atomic.AtomicInteger;
-
-/**
- * Enables adding batches of documents to an EmbeddedSolrServer.
- */
-class BatchWriter {
-
- private final EmbeddedSolrServer solr;
- private volatile Exception batchWriteException = null;
-
- private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- public Exception getBatchWriteException() {
- return batchWriteException;
- }
-
- public void setBatchWriteException(Exception batchWriteException) {
- this.batchWriteException = batchWriteException;
- }
-
- /** The number of writing threads. */
- final int writerThreads;
-
- /** Queue Size */
- final int queueSize;
-
- private final ThreadPoolExecutor batchPool;
-
- private TaskID taskId = null;
-
- /**
- * The number of in progress batches, must be zero before the close can
- * actually start closing
- */
- AtomicInteger executingBatches = new AtomicInteger(0);
-
- /**
- * Create the batch writer object, set the thread to daemon mode, and start
- * it.
- *
- */
-
- final class Batch implements Runnable {
-
- private List<SolrInputDocument> documents;
- private UpdateResponse result;
-
- public Batch(Collection<SolrInputDocument> batch) {
- documents = new ArrayList<>(batch);
- }
-
- public void run() {
- try {
- executingBatches.getAndIncrement();
- result = runUpdate(documents);
- } finally {
- executingBatches.getAndDecrement();
- }
- }
-
- protected List<SolrInputDocument> getDocuments() {
- return documents;
- }
-
- protected void setDocuments(List<SolrInputDocument> documents) {
- this.documents = documents;
- }
-
- protected UpdateResponse getResult() {
- return result;
- }
-
- protected void setResult(UpdateResponse result) {
- this.result = result;
- }
-
- protected void reset(List<SolrInputDocument> documents) {
- if (this.documents == null) {
- this.documents = new ArrayList<>(documents);
- } else {
- this.documents.clear();
- this.documents.addAll(documents);
- }
- result = null;
- }
-
- protected void reset(SolrInputDocument document) {
- if (this.documents == null) {
- this.documents = new ArrayList<>();
- } else {
- this.documents.clear();
- }
- this.documents.add(document);
- result = null;
- }
- }
-
- protected UpdateResponse runUpdate(List<SolrInputDocument> batchToWrite) {
- try {
- UpdateResponse result = solr.add(batchToWrite);
- SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.BATCHES_WRITTEN.toString(), 1);
- SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.DOCUMENTS_WRITTEN.toString(), batchToWrite.size());
- if (LOG.isDebugEnabled()) {
- SolrRecordWriter.incrementCounter(taskId, SolrCounters.class.getName(), SolrCounters.BATCH_WRITE_TIME.toString(), result.getElapsedTime());
- }
- return result;
- } catch (Throwable e) {
- if (e instanceof Exception) {
- setBatchWriteException((Exception) e);
- } else {
- setBatchWriteException(new Exception(e));
- }
- SolrRecordWriter.incrementCounter(taskId, getClass().getName() + ".errors", e.getClass().getName(), 1);
- LOG.error("Unable to process batch", e);
- return null;
- }
- }
-
-
- public BatchWriter(EmbeddedSolrServer solr, int batchSize, TaskID tid,
- int writerThreads, int queueSize) {
- this.solr = solr;
- this.writerThreads = writerThreads;
- this.queueSize = queueSize;
- taskId = tid;
-
- // we need to obtain the settings before the constructor
- if (writerThreads != 0) {
- batchPool = new ExecutorUtil.MDCAwareThreadPoolExecutor(writerThreads, writerThreads, 5,
- TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(queueSize),
- new ThreadPoolExecutor.CallerRunsPolicy());
- } else { // single threaded case
- batchPool = null;
- }
- }
-
- public void queueBatch(Collection<SolrInputDocument> batch)
- throws IOException, SolrServerException {
-
- throwIf();
- Batch b = new Batch(batch);
- if (batchPool != null) {
- batchPool.execute(b);
- } else { // single threaded case
- b.run();
- throwIf();
- }
- }
-
- public synchronized void close(TaskAttemptContext context)
- throws InterruptedException, SolrServerException, IOException {
-
- if (batchPool != null) {
- context.setStatus("Waiting for batches to complete");
- batchPool.shutdown();
-
- while (!batchPool.isTerminated()) {
- LOG.info(String.format(Locale.ENGLISH,
- "Waiting for %d items and %d threads to finish executing", batchPool
- .getQueue().size(), batchPool.getActiveCount()));
- batchPool.awaitTermination(5, TimeUnit.SECONDS);
- }
- }
- context.setStatus("Committing Solr Phase 1");
- solr.commit(true, false);
- context.setStatus("Optimizing Solr");
- int maxSegments = context.getConfiguration().getInt(SolrOutputFormat.SOLR_RECORD_WRITER_MAX_SEGMENTS, 1);
- LOG.info("Optimizing Solr: forcing merge down to {} segments", maxSegments);
- long start = System.nanoTime();
- solr.optimize(true, false, maxSegments);
- context.getCounter(SolrCounters.class.getName(), SolrCounters.PHYSICAL_REDUCER_MERGE_TIME.toString()).increment(System.nanoTime() - start);
- float secs = (System.nanoTime() - start) / (float)(10^9);
- LOG.info("Optimizing Solr: done forcing merge down to {} segments in {} secs", maxSegments, secs);
- context.setStatus("Committing Solr Phase 2");
- solr.commit(true, false);
- context.setStatus("Shutting down Solr");
- solr.close();
- }
-
- /**
- * Throw a legal exception if a previous batch write had an exception. The
- * previous state is cleared. Uses {@link #batchWriteException} for the state
- * from the last exception.
- *
- * This will loose individual exceptions if the exceptions happen rapidly.
- *
- * @throws IOException On low level IO error
- * @throws SolrServerException On Solr Exception
- */
- private void throwIf() throws IOException, SolrServerException {
-
- final Exception last = batchWriteException;
- batchWriteException = null;
-
- if (last == null) {
- return;
- }
- if (last instanceof SolrServerException) {
- throw (SolrServerException) last;
- }
- if (last instanceof IOException) {
- throw (IOException) last;
- }
- throw new IOException("Batch Write Failure", last);
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataInputInputStream.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataInputInputStream.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataInputInputStream.java
deleted file mode 100644
index d1d46bc..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataInputInputStream.java
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.DataInput;
-import java.io.IOException;
-import java.io.InputStream;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-/**
- * An InputStream that wraps a DataInput.
- * @see DataOutputOutputStream
- */
-@InterfaceAudience.Private
-public class DataInputInputStream extends InputStream {
-
- private DataInput in;
-
- /**
- * Construct an InputStream from the given DataInput. If 'in'
- * is already an InputStream, simply returns it. Otherwise, wraps
- * it in an InputStream.
- * @param in the DataInput to wrap
- * @return an InputStream instance that reads from 'in'
- */
- public static InputStream constructInputStream(DataInput in) {
- if (in instanceof InputStream) {
- return (InputStream)in;
- } else {
- return new DataInputInputStream(in);
- }
- }
-
-
- public DataInputInputStream(DataInput in) {
- this.in = in;
- }
-
- @Override
- public int read() throws IOException {
- return in.readUnsignedByte();
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java
deleted file mode 100644
index 389c52a..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DataOutputOutputStream.java
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.DataOutput;
-import java.io.IOException;
-import java.io.OutputStream;
-
-import org.apache.hadoop.classification.InterfaceAudience;
-
-/**
- * OutputStream implementation that wraps a DataOutput.
- */
-@InterfaceAudience.Private
-public class DataOutputOutputStream extends OutputStream {
-
- private final DataOutput out;
-
- /**
- * Construct an OutputStream from the given DataOutput. If 'out'
- * is already an OutputStream, simply returns it. Otherwise, wraps
- * it in an OutputStream.
- * @param out the DataOutput to wrap
- * @return an OutputStream instance that outputs to 'out'
- */
- public static OutputStream constructOutputStream(DataOutput out) {
- if (out instanceof OutputStream) {
- return (OutputStream)out;
- } else {
- return new DataOutputOutputStream(out);
- }
- }
-
- private DataOutputOutputStream(DataOutput out) {
- this.out = out;
- }
-
- @Override
- public void write(int b) throws IOException {
- out.writeByte(b);
- }
-
- @Override
- public void write(byte[] b, int off, int len) throws IOException {
- out.write(b, off, len);
- }
-
- @Override
- public void write(byte[] b) throws IOException {
- out.write(b);
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java
deleted file mode 100644
index bacf1d0..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/DryRunDocumentLoader.java
+++ /dev/null
@@ -1,57 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import org.apache.solr.client.solrj.response.SolrPingResponse;
-import org.apache.solr.client.solrj.response.UpdateResponse;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.morphlines.solr.DocumentLoader;
-
-/**
- * Prints documents to stdout instead of loading them into Solr for quicker turnaround during early
- * trial & debug sessions.
- */
-final class DryRunDocumentLoader implements DocumentLoader {
-
- @Override
- public void beginTransaction() {
- }
-
- @Override
- public void load(SolrInputDocument doc) {
- System.out.println("dryrun: " + doc);
- }
-
- @Override
- public void commitTransaction() {
- }
-
- @Override
- public UpdateResponse rollbackTransaction() {
- return new UpdateResponse();
- }
-
- @Override
- public void shutdown() {
- }
-
- @Override
- public SolrPingResponse ping() {
- return new SolrPingResponse();
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/GoLive.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/GoLive.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/GoLive.java
deleted file mode 100644
index 5b1c343..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/GoLive.java
+++ /dev/null
@@ -1,182 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import java.util.concurrent.Callable;
-import java.util.concurrent.CompletionService;
-import java.util.concurrent.ExecutionException;
-import java.util.concurrent.ExecutorCompletionService;
-import java.util.concurrent.Future;
-import java.util.concurrent.LinkedBlockingQueue;
-import java.util.concurrent.ThreadPoolExecutor;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.hadoop.fs.FileStatus;
-import org.apache.solr.client.solrj.SolrServerException;
-import org.apache.solr.client.solrj.impl.CloudSolrClient;
-import org.apache.solr.client.solrj.impl.HttpSolrClient;
-import org.apache.solr.client.solrj.request.CoreAdminRequest;
-import org.apache.solr.common.util.ExecutorUtil;
-import org.apache.solr.hadoop.MapReduceIndexerTool.Options;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * The optional (parallel) GoLive phase merges the output shards of the previous
- * phase into a set of live customer facing Solr servers, typically a SolrCloud.
- */
-class GoLive {
-
- private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- // TODO: handle clusters with replicas
- public boolean goLive(Options options, FileStatus[] outDirs) {
- LOG.info("Live merging of output shards into Solr cluster...");
- boolean success = false;
- long start = System.nanoTime();
- int concurrentMerges = options.goLiveThreads;
- ThreadPoolExecutor executor = new ExecutorUtil.MDCAwareThreadPoolExecutor(concurrentMerges,
- concurrentMerges, 1, TimeUnit.SECONDS,
- new LinkedBlockingQueue<Runnable>());
-
- try {
- CompletionService<Request> completionService = new ExecutorCompletionService<>(executor);
- Set<Future<Request>> pending = new HashSet<>();
- int cnt = -1;
- for (final FileStatus dir : outDirs) {
-
- LOG.debug("processing: " + dir.getPath());
-
- cnt++;
- List<String> urls = options.shardUrls.get(cnt);
-
- for (String url : urls) {
-
- String baseUrl = url;
- if (baseUrl.endsWith("/")) {
- baseUrl = baseUrl.substring(0, baseUrl.length() - 1);
- }
-
- int lastPathIndex = baseUrl.lastIndexOf("/");
- if (lastPathIndex == -1) {
- LOG.error("Found unexpected shardurl, live merge failed: " + baseUrl);
- return false;
- }
-
- final String name = baseUrl.substring(lastPathIndex + 1);
- baseUrl = baseUrl.substring(0, lastPathIndex);
- final String mergeUrl = baseUrl;
-
- Callable<Request> task = () -> {
- Request req = new Request();
- LOG.info("Live merge " + dir.getPath() + " into " + mergeUrl);
- try (final HttpSolrClient client = new HttpSolrClient.Builder(mergeUrl).build()) {
- CoreAdminRequest.MergeIndexes mergeRequest = new CoreAdminRequest.MergeIndexes();
- mergeRequest.setCoreName(name);
- mergeRequest.setIndexDirs(Arrays.asList(dir.getPath().toString() + "/data/index"));
- mergeRequest.process(client);
- req.success = true;
- } catch (SolrServerException | IOException e) {
- req.e = e;
- }
- return req;
- };
- pending.add(completionService.submit(task));
- }
- }
-
- while (pending != null && pending.size() > 0) {
- try {
- Future<Request> future = completionService.take();
- if (future == null) break;
- pending.remove(future);
-
- try {
- Request req = future.get();
-
- if (!req.success) {
- // failed
- LOG.error("A live merge command failed", req.e);
- return false;
- }
-
- } catch (ExecutionException e) {
- LOG.error("Error sending live merge command", e);
- return false;
- }
-
- } catch (InterruptedException e) {
- Thread.currentThread().interrupt();
- LOG.error("Live merge process interrupted", e);
- return false;
- }
- }
-
- cnt = -1;
-
-
- try {
- LOG.info("Committing live merge...");
- if (options.zkHost != null) {
- try (CloudSolrClient server = new CloudSolrClient.Builder().withZkHost(options.zkHost).build()) {
- server.setDefaultCollection(options.collection);
- server.commit();
- }
- } else {
- for (List<String> urls : options.shardUrls) {
- for (String url : urls) {
- // TODO: we should do these concurrently
- try (HttpSolrClient server = new HttpSolrClient.Builder(url).build()) {
- server.commit();
- }
- }
- }
- }
- LOG.info("Done committing live merge");
- } catch (Exception e) {
- LOG.error("Error sending commits to live Solr cluster", e);
- return false;
- }
-
- success = true;
- return true;
- } finally {
- ExecutorUtil.shutdownAndAwaitTermination(executor);
- float secs = (System.nanoTime() - start) / (float)(10^9);
- LOG.info("Live merging of index shards into Solr cluster took " + secs + " secs");
- if (success) {
- LOG.info("Live merging completed successfully");
- } else {
- LOG.info("Live merging failed");
- }
- }
-
- // if an output dir does not exist, we should fail and do no merge?
- }
-
- private static final class Request {
- Exception e;
- boolean success = false;
- }
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java
deleted file mode 100644
index c9eaef6..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HdfsFileFieldNames.java
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-
-/**
- * Solr field names for metadata of an HDFS file.
- */
-public interface HdfsFileFieldNames {
-
- public static final String FILE_UPLOAD_URL = "file_upload_url";
- public static final String FILE_DOWNLOAD_URL = "file_download_url";
- public static final String FILE_SCHEME = "file_scheme";
- public static final String FILE_HOST = "file_host";
- public static final String FILE_PORT = "file_port";
- public static final String FILE_PATH = "file_path";
- public static final String FILE_NAME = "file_name";
- public static final String FILE_LENGTH = "file_length";
- public static final String FILE_LAST_MODIFIED = "file_last_modified";
- public static final String FILE_OWNER = "file_owner";
- public static final String FILE_GROUP = "file_group";
- public static final String FILE_PERMISSIONS_USER = "file_permissions_user";
- public static final String FILE_PERMISSIONS_GROUP = "file_permissions_group";
- public static final String FILE_PERMISSIONS_OTHER = "file_permissions_other";
- public static final String FILE_PERMISSIONS_STICKYBIT = "file_permissions_stickybit";
-
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HeartBeater.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HeartBeater.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HeartBeater.java
deleted file mode 100644
index 66cd3dc..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/HeartBeater.java
+++ /dev/null
@@ -1,159 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.lang.invoke.MethodHandles;
-import java.util.Locale;
-import java.util.concurrent.CountDownLatch;
-import java.util.concurrent.TimeUnit;
-
-import org.apache.hadoop.mapreduce.TaskInputOutputContext;
-import org.apache.hadoop.util.Progressable;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * This class runs a background thread that once every 60 seconds checks to see if
- * a progress report is needed. If a report is needed it is issued.
- *
- * A simple counter {@link #threadsNeedingHeartBeat} handles the number of
- * threads requesting a heart beat.
- *
- * The expected usage pattern is
- *
- * <pre>
- * try {
- * heartBeater.needHeartBeat();
- * do something that may take a while
- * } finally {
- * heartBeater.cancelHeartBeat();
- * }
- * </pre>
- *
- *
- */
-public class HeartBeater extends Thread {
-
- private static final Logger LOG = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- /**
- * count of threads asking for heart beat, at 0 no heart beat done. This could
- * be an atomic long but then missmatches in need/cancel could result in
- * negative counts.
- */
- private volatile int threadsNeedingHeartBeat = 0;
-
- private Progressable progress;
-
- /**
- * The amount of time to wait between checks for the need to issue a heart
- * beat. In milliseconds.
- */
- private final long waitTimeMs = TimeUnit.MILLISECONDS.convert(60, TimeUnit.SECONDS);
-
- private final CountDownLatch isClosing = new CountDownLatch(1);
-
- /**
- * Create the heart beat object thread set it to daemon priority and start the
- * thread. When the count in {@link #threadsNeedingHeartBeat} is positive, the
- * heart beat will be issued on the progress object every 60 seconds.
- */
- public HeartBeater(Progressable progress) {
- setDaemon(true);
- this.progress = progress;
- LOG.info("Heart beat reporting class is " + progress.getClass().getName());
- start();
- }
-
- public Progressable getProgress() {
- return progress;
- }
-
- public void setProgress(Progressable progress) {
- this.progress = progress;
- }
-
- @Override
- public void run() {
- LOG.info("HeartBeat thread running");
- while (true) {
- try {
- synchronized (this) {
- if (threadsNeedingHeartBeat > 0) {
- progress.progress();
- if (LOG.isInfoEnabled()) {
- LOG.info(String.format(Locale.ENGLISH, "Issuing heart beat for %d threads",
- threadsNeedingHeartBeat));
- }
- } else {
- if (LOG.isInfoEnabled()) {
- LOG.info(String.format(Locale.ENGLISH, "heartbeat skipped count %d",
- threadsNeedingHeartBeat));
- }
- }
- }
- if (isClosing.await(waitTimeMs, TimeUnit.MILLISECONDS)) {
- return;
- }
- } catch (Throwable e) {
- LOG.error("HeartBeat throwable", e);
- }
- }
- }
-
- /**
- * inform the background thread that heartbeats are to be issued. Issue a
- * heart beat also
- */
- public synchronized void needHeartBeat() {
- threadsNeedingHeartBeat++;
- // Issue a progress report right away,
- // just in case the the cancel comes before the background thread issues a
- // report.
- // If enough cases like this happen the 600 second timeout can occur
- progress.progress();
- if (threadsNeedingHeartBeat == 1) {
- // this.notify(); // wake up the heartbeater
- }
- }
-
- /**
- * inform the background thread that this heartbeat request is not needed.
- * This must be called at some point after each {@link #needHeartBeat()}
- * request.
- */
- public synchronized void cancelHeartBeat() {
- if (threadsNeedingHeartBeat > 0) {
- threadsNeedingHeartBeat--;
- } else {
- Exception e = new Exception("Dummy");
- e.fillInStackTrace();
- LOG.warn("extra call to cancelHeartBeat", e);
- }
- }
-
- public void setStatus(String status) {
- if (progress instanceof TaskInputOutputContext) {
- ((TaskInputOutputContext<?,?,?,?>) progress).setStatus(status);
- }
- }
-
- /** Releases any resources */
- public void close() {
- isClosing.countDown();
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java
deleted file mode 100644
index 6f2638c..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerMapper.java
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-import java.util.Random;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Mapper;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * MR Mapper that randomizing a list of URLs.
- *
- * Mapper input is (offset, URL) pairs. Each such pair indicates a file to
- * index.
- *
- * Mapper output is (randomPosition, URL) pairs. The reducer receives these
- * pairs sorted by randomPosition.
- */
-public class LineRandomizerMapper extends Mapper<LongWritable, Text, LongWritable, Text> {
-
- private Random random;
-
- private static final Logger LOGGER = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- @Override
- protected void setup(Context context) throws IOException, InterruptedException {
- super.setup(context);
- random = createRandom(context);
- }
-
- @Override
- protected void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException {
- LOGGER.debug("map key: {}, value: {}", key, value);
- context.write(new LongWritable(random.nextLong()), value);
- }
-
- private Random createRandom(Context context) {
- long taskId = 0;
- if (context.getTaskAttemptID() != null) { // MRUnit returns null
- LOGGER.debug("context.getTaskAttemptID().getId(): {}", context.getTaskAttemptID().getId());
- LOGGER.debug("context.getTaskAttemptID().getTaskID().getId(): {}", context.getTaskAttemptID().getTaskID().getId());
- taskId = context.getTaskAttemptID().getTaskID().getId(); // taskId = 0, 1, ..., N
- }
- // create a good random seed, yet ensure deterministic PRNG sequence for easy reproducability
- return new Random(421439783L * (taskId + 1));
- }
-
-}
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java
----------------------------------------------------------------------
diff --git a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java b/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java
deleted file mode 100644
index cbe194a..0000000
--- a/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/LineRandomizerReducer.java
+++ /dev/null
@@ -1,48 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.hadoop;
-
-import java.io.IOException;
-import java.lang.invoke.MethodHandles;
-
-import org.apache.hadoop.io.LongWritable;
-import org.apache.hadoop.io.NullWritable;
-import org.apache.hadoop.io.Text;
-import org.apache.hadoop.mapreduce.Reducer;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
-/**
- * MR Reducer that randomizing a list of URLs.
- *
- * Reducer input is (randomPosition, URL) pairs. Each such pair indicates a file
- * to index.
- *
- * Reducer output is a list of URLs, each URL in a random position.
- */
-public class LineRandomizerReducer extends Reducer<LongWritable, Text, Text, NullWritable> {
-
- private static final Logger LOGGER = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
-
- @Override
- protected void reduce(LongWritable key, Iterable<Text> values, Context context) throws IOException, InterruptedException {
- for (Text value : values) {
- LOGGER.debug("reduce key: {}, value: {}", key, value);
- context.write(value, NullWritable.get());
- }
- }
-}
\ No newline at end of file