You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2013/12/05 00:28:51 UTC
svn commit: r1547962 - in /lucene/dev/trunk/solr:
contrib/map-reduce/src/java/org/apache/solr/hadoop/
contrib/map-reduce/src/test/org/apache/solr/hadoop/
example/scripts/map-reduce/
Author: markrmiller
Date: Wed Dec 4 23:28:50 2013
New Revision: 1547962
URL: http://svn.apache.org/r1547962
Log:
SOLR-1301: Clean up.
Added:
lucene/dev/trunk/solr/example/scripts/map-reduce/set-map-reduce-classpath.sh (with props)
Removed:
lucene/dev/trunk/solr/example/scripts/map-reduce/map-reduce-indexer.bat
lucene/dev/trunk/solr/example/scripts/map-reduce/map-reduce-indexer.sh
Modified:
lucene/dev/trunk/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java
lucene/dev/trunk/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java
lucene/dev/trunk/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java
Modified: lucene/dev/trunk/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java?rev=1547962&r1=1547961&r2=1547962&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java (original)
+++ lucene/dev/trunk/solr/contrib/map-reduce/src/java/org/apache/solr/hadoop/MapReduceIndexerTool.java Wed Dec 4 23:28:50 2013
@@ -136,7 +136,7 @@ public class MapReduceIndexerTool extend
showNonSolrCloud = Arrays.asList(args).contains(SHOW_NON_SOLR_CLOUD); // intercept it first
ArgumentParser parser = ArgumentParsers
- .newArgumentParser("hadoop [GenericOptions]... jar search-mr-*-job.jar " + MapReduceIndexerTool.class.getName(), false)
+ .newArgumentParser("hadoop [GenericOptions]... jar solr-map-reduce-*.jar ", false)
.defaultHelp(true)
.description(
"MapReduce batch job driver that takes a morphline and creates a set of Solr index shards from a set of input files " +
@@ -197,7 +197,7 @@ public class MapReduceIndexerTool extend
"# (Re)index an Avro based Twitter tweet file:\n" +
"sudo -u hdfs hadoop \\\n" +
" --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" +
- " jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n" +
+ " jar target/solr-map-reduce-*.jar \\\n" +
" -D 'mapred.child.java.opts=-Xmx500m' \\\n" +
// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" +
" --log4j src/test/resources/log4j.properties \\\n" +
@@ -213,7 +213,7 @@ public class MapReduceIndexerTool extend
"# 3) file was last modified less than 100000 minutes ago\n" +
"# 4) file size is between 1 MB and 1 GB\n" +
"# Also include extra library jar file containing JSON tweet Java parser:\n" +
- "hadoop jar target/search-mr-*-job.jar " + "com.cloudera.cdk.morphline.hadoop.find.HdfsFindTool" + " \\\n" +
+ "hadoop jar target/solr-map-reduce-*.jar " + "com.cloudera.cdk.morphline.hadoop.find.HdfsFindTool" + " \\\n" +
" -find hdfs:///user/$USER/solrloadtest/twitter/tweets \\\n" +
" -type f \\\n" +
" -name 'sample-statuses*.gz' \\\n" +
@@ -222,7 +222,7 @@ public class MapReduceIndexerTool extend
" -size +1000000c \\\n" +
"| sudo -u hdfs hadoop \\\n" +
" --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" +
- " jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n" +
+ " jar target/solr-map-reduce-*.jar \\\n" +
" -D 'mapred.child.java.opts=-Xmx500m' \\\n" +
// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" +
" --log4j src/test/resources/log4j.properties \\\n" +
@@ -236,7 +236,7 @@ public class MapReduceIndexerTool extend
"# (explicitly specify Solr URLs - for a SolrCloud cluster see next example):\n" +
"sudo -u hdfs hadoop \\\n" +
" --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" +
- " jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n" +
+ " jar target/solr-map-reduce-*.jar \\\n" +
" -D 'mapred.child.java.opts=-Xmx500m' \\\n" +
// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" +
" --log4j src/test/resources/log4j.properties \\\n" +
@@ -252,7 +252,7 @@ public class MapReduceIndexerTool extend
"# (discover shards and Solr URLs through ZooKeeper):\n" +
"sudo -u hdfs hadoop \\\n" +
" --config /etc/hadoop/conf.cloudera.mapreduce1 \\\n" +
- " jar target/search-mr-*-job.jar " + MapReduceIndexerTool.class.getName() + " \\\n" +
+ " jar target/solr-map-reduce-*.jar \\\n" +
" -D 'mapred.child.java.opts=-Xmx500m' \\\n" +
// " -D 'mapreduce.child.java.opts=-Xmx500m' \\\n" +
" --log4j src/test/resources/log4j.properties \\\n" +
Modified: lucene/dev/trunk/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java?rev=1547962&r1=1547961&r2=1547962&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java (original)
+++ lucene/dev/trunk/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java Wed Dec 4 23:28:50 2013
@@ -38,17 +38,14 @@ import org.apache.hadoop.util.JarFinder;
import org.apache.hadoop.util.ToolRunner;
import org.apache.hadoop.yarn.conf.YarnConfiguration;
import org.apache.lucene.util.Constants;
-import org.apache.lucene.util.LuceneTestCase;
import org.apache.lucene.util.LuceneTestCase.Slow;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.cloud.AbstractZkTestCase;
import org.apache.solr.hadoop.hack.MiniMRCluster;
-import org.apache.solr.handler.extraction.ExtractingParams;
import org.apache.solr.util.ExternalPaths;
import org.junit.After;
import org.junit.AfterClass;
import org.junit.BeforeClass;
-import org.junit.Ignore;
import org.junit.Test;
import com.carrotsearch.randomizedtesting.annotations.ThreadLeakAction;
@@ -324,7 +321,6 @@ public class MorphlineBasicMiniMRTest ex
jobConf.setMaxMapAttempts(1);
jobConf.setMaxReduceAttempts(1);
jobConf.setJar(SEARCH_ARCHIVES_JAR);
- jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);
int shards = 2;
int maxReducers = Integer.MAX_VALUE;
Modified: lucene/dev/trunk/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java?rev=1547962&r1=1547961&r2=1547962&view=diff
==============================================================================
--- lucene/dev/trunk/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java (original)
+++ lucene/dev/trunk/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java Wed Dec 4 23:28:50 2013
@@ -67,7 +67,6 @@ import org.apache.solr.common.params.Mod
import org.apache.solr.common.util.NamedList;
import org.apache.solr.hadoop.hack.MiniMRClientCluster;
import org.apache.solr.hadoop.hack.MiniMRClientClusterFactory;
-import org.apache.solr.handler.extraction.ExtractingParams;
import org.apache.solr.util.ExternalPaths;
import org.junit.After;
import org.junit.AfterClass;
@@ -367,7 +366,6 @@ public class MorphlineGoLiveMiniMRTest e
jobConf.setMaxMapAttempts(1);
jobConf.setMaxReduceAttempts(1);
jobConf.setJar(SEARCH_ARCHIVES_JAR);
- jobConf.setBoolean(ExtractingParams.IGNORE_TIKA_EXCEPTION, false);
MapReduceIndexerTool tool;
int res;
Added: lucene/dev/trunk/solr/example/scripts/map-reduce/set-map-reduce-classpath.sh
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/scripts/map-reduce/set-map-reduce-classpath.sh?rev=1547962&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/scripts/map-reduce/set-map-reduce-classpath.sh (added)
+++ lucene/dev/trunk/solr/example/scripts/map-reduce/set-map-reduce-classpath.sh Wed Dec 4 23:28:50 2013
@@ -0,0 +1,3 @@
+#!/usr/bin/env bash
+
+export HADOOP_CLASSPATH="$sdir/../../../dist/*:$sdir/../../../contrib/map-reduce/lib/*:$sdir/../../../contrib/morphlines-core/lib/*:$sdir/../../../contrib/morphlines-cell/lib/*:$sdir/../../../contrib/extraction/lib/*:$sdir/../../solr-webapp/webapp/WEB-INF/lib/*:$sdir/../../lib/ext/*"
\ No newline at end of file