You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by ma...@apache.org on 2014/01/15 22:57:58 UTC
svn commit: r1558580 - in /lucene/dev/branches/branch_4x: ./ solr/
solr/contrib/ solr/contrib/map-reduce/
solr/contrib/map-reduce/src/test-files/test-documents/
solr/contrib/map-reduce/src/test-files/test-morphlines/
solr/contrib/map-reduce/src/test/or...
Author: markrmiller
Date: Wed Jan 15 21:57:57 2014
New Revision: 1558580
URL: http://svn.apache.org/r1558580
Log:
SOLR-1301: Merge Morphlines modules up to Kite 0.10 and CDK 0.9
Added:
lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test-files/test-documents/cars.csv
- copied unchanged from r1552398, lucene/dev/trunk/solr/contrib/map-reduce/src/test-files/test-documents/cars.csv
lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test-files/test-documents/cars.csv.gz
- copied unchanged from r1552398, lucene/dev/trunk/solr/contrib/map-reduce/src/test-files/test-documents/cars.csv.gz
lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test-files/test-documents/cars.tar.gz
- copied unchanged from r1552398, lucene/dev/trunk/solr/contrib/map-reduce/src/test-files/test-documents/cars.tar.gz
lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test-files/test-documents/email.eml
- copied unchanged from r1552398, lucene/dev/trunk/solr/contrib/map-reduce/src/test-files/test-documents/email.eml
lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test-files/test-documents/testRFC822
- copied unchanged from r1552398, lucene/dev/trunk/solr/contrib/map-reduce/src/test-files/test-documents/testRFC822
Modified:
lucene/dev/branches/branch_4x/ (props changed)
lucene/dev/branches/branch_4x/solr/ (props changed)
lucene/dev/branches/branch_4x/solr/contrib/ (props changed)
lucene/dev/branches/branch_4x/solr/contrib/map-reduce/build.xml
lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellDocumentTypes.conf
lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java
lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java
lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java
lucene/dev/branches/branch_4x/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
lucene/dev/branches/branch_4x/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java
lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java
lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java
Modified: lucene/dev/branches/branch_4x/solr/contrib/map-reduce/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/map-reduce/build.xml?rev=1558580&r1=1558579&r2=1558580&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/map-reduce/build.xml (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/map-reduce/build.xml Wed Jan 15 21:57:57 2014
@@ -92,6 +92,7 @@
<path id="test.classpath">
<path refid="solr.test.base.classpath"/>
<path refid="classpath.additions"/>
+ <pathelement location="${common-solr.dir}/build/contrib/solr-morphlines-core/classes/test"/>
<fileset dir="${common-solr.dir}/contrib/morphlines-core/test-lib" excludes="${common.classpath.excludes}"/>
</path>
Modified: lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellDocumentTypes.conf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellDocumentTypes.conf?rev=1558580&r1=1558579&r2=1558580&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellDocumentTypes.conf (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test-files/test-morphlines/solrCellDocumentTypes.conf Wed Jan 15 21:57:57 2014
@@ -78,6 +78,32 @@ morphlines : [
]
}
+ {
+ commands : [
+ {
+ readCSV {
+ supportedMimeTypes : [text/csv]
+ charset : UTF-8
+ ignoreFirstLine : false
+ columns : [ user_screen_name, text ]
+ }
+ }
+
+ {
+ generateUUID {
+ field : id
+ preserveExisting : false
+ }
+ }
+
+ {
+ sanitizeUnknownSolrFields {
+ solrLocator : ${SOLR_LOCATOR}
+ }
+ }
+ ]
+ }
+
{
commands : [
{
@@ -180,6 +206,7 @@ morphlines : [
# the parser is chosen that is closest to the bottom in this list:
parsers : [
{ parser : org.apache.tika.parser.asm.ClassParser }
+ # { parser : org.apache.tika.parser.AutoDetectParser }
# { parser : org.gagravarr.tika.OggParser, additionalSupportedMimeTypes : [audio/ogg] }
{ parser : org.gagravarr.tika.FlacParser }
{ parser : org.apache.tika.parser.audio.AudioParser }
@@ -218,6 +245,7 @@ morphlines : [
{ parser : org.apache.tika.parser.xml.DcXMLParser }
{ parser : org.apache.tika.parser.xml.FictionBookParser }
{ parser : org.apache.tika.parser.chm.ChmParser }
+ #{ parser : org.apache.tika.parser.AutoDetectParser }
]
}
}
Modified: lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java?rev=1558580&r1=1558579&r2=1558580&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MRUnitBase.java Wed Jan 15 21:57:57 2014
@@ -23,6 +23,7 @@ import org.apache.commons.io.FileUtils;
import org.apache.hadoop.conf.Configuration;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.hadoop.morphline.MorphlineMapRunner;
+import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
import org.apache.solr.util.ExternalPaths;
import org.junit.AfterClass;
import org.junit.BeforeClass;
@@ -50,17 +51,10 @@ public abstract class MRUnitBase extends
new File(tempDir).mkdirs();
FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml"));
- setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes");
+ AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", true);
config.set(MorphlineMapRunner.MORPHLINE_FILE_PARAM, tempDir + "/test-morphlines/solrCellDocumentTypes.conf");
config.set(SolrOutputFormat.ZIP_NAME, solrHomeZip.getName());
}
- public static void setupMorphline(String tempDir, String file) throws IOException {
- String morphlineText = FileUtils.readFileToString(new File(RESOURCES_DIR + "/" + file + ".conf"), "UTF-8");
- morphlineText = morphlineText.replaceAll("RESOURCES_DIR", new File(tempDir).getAbsolutePath());
- morphlineText = morphlineText.replaceAll("\\$\\{SOLR_LOCATOR\\}", "{ collection : collection1 }");
-
- FileUtils.writeStringToFile(new File(tempDir + "/" + file + ".conf"), morphlineText, "UTF-8");
- }
}
Modified: lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java?rev=1558580&r1=1558579&r2=1558580&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineBasicMiniMRTest.java Wed Jan 15 21:57:57 2014
@@ -42,6 +42,7 @@ import org.apache.lucene.util.LuceneTest
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.cloud.AbstractZkTestCase;
import org.apache.solr.hadoop.hack.MiniMRCluster;
+import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
import org.apache.solr.util.ExternalPaths;
import org.junit.After;
import org.junit.AfterClass;
@@ -125,7 +126,7 @@ public class MorphlineBasicMiniMRTest ex
new File(tempDir).mkdirs();
FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml"));
- MRUnitBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes");
+ AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", true);
System.setProperty("hadoop.log.dir", new File(solrHomeDirectory, "logs").getAbsolutePath());
Modified: lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java?rev=1558580&r1=1558579&r2=1558580&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/map-reduce/src/test/org/apache/solr/hadoop/MorphlineGoLiveMiniMRTest.java Wed Jan 15 21:57:57 2014
@@ -67,6 +67,7 @@ import org.apache.solr.common.params.Mod
import org.apache.solr.common.util.NamedList;
import org.apache.solr.hadoop.hack.MiniMRClientCluster;
import org.apache.solr.hadoop.hack.MiniMRClientClusterFactory;
+import org.apache.solr.morphlines.solr.AbstractSolrMorphlineTestBase;
import org.apache.solr.util.ExternalPaths;
import org.junit.After;
import org.junit.AfterClass;
@@ -142,7 +143,7 @@ public class MorphlineGoLiveMiniMRTest e
new File(tempDir).mkdirs();
FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml"));
- MRUnitBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes");
+ AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", true);
System.setProperty("hadoop.log.dir", new File(dataDir, "logs").getAbsolutePath());
Modified: lucene/dev/branches/branch_4x/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java?rev=1558580&r1=1558579&r2=1558580&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/morphlines-cell/src/java/org/apache/solr/morphlines/cell/SolrCellBuilder.java Wed Jan 15 21:57:57 2014
@@ -223,16 +223,12 @@ public final class SolrCellBuilder imple
ParseContext parseContext = new ParseContext();
- // necessary for gzipped files or tar files, etc! copied from TikaCLI
- parseContext.set(Parser.class, parser);
-
Metadata metadata = new Metadata();
for (Entry<String, Object> entry : record.getFields().entries()) {
metadata.add(entry.getKey(), entry.getValue().toString());
}
SolrContentHandler handler = solrContentHandlerFactory.createSolrContentHandler(metadata, solrParams, schema);
-
try {
inputStream = TikaInputStream.get(inputStream);
Modified: lucene/dev/branches/branch_4x/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java?rev=1558580&r1=1558579&r2=1558580&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/morphlines-cell/src/test/org/apache/solr/morphlines/cell/SolrCellMorphlineTest.java Wed Jan 15 21:57:57 2014
@@ -18,6 +18,7 @@ package org.apache.solr.morphlines.cell;
import java.io.File;
import java.util.HashMap;
+import java.util.LinkedHashMap;
import java.util.Map;
import org.apache.commons.io.FileUtils;
@@ -37,7 +38,7 @@ import org.junit.Test;
public class SolrCellMorphlineTest extends AbstractSolrMorphlineTestBase {
private Map<String,Integer> expectedRecords = new HashMap<String,Integer>();
-
+ private Map<String, Map<String, Object>> expectedRecordContents = new HashMap<String, Map<String, Object>>();
@BeforeClass
public static void beforeClass2() {
assumeFalse("FIXME: This test fails under Java 8 due to the Saxon dependency - see SOLR-1301", Constants.JRE_IS_MINIMUM_JAVA8);
@@ -47,16 +48,17 @@ public class SolrCellMorphlineTest exten
@Before
public void setUp() throws Exception {
super.setUp();
+
String path = RESOURCES_DIR + "/test-documents";
expectedRecords.put(path + "/sample-statuses-20120906-141433.avro", 2);
expectedRecords.put(path + "/sample-statuses-20120906-141433", 2);
expectedRecords.put(path + "/sample-statuses-20120906-141433.gz", 2);
expectedRecords.put(path + "/sample-statuses-20120906-141433.bz2", 2);
- expectedRecords.put(path + "/cars.csv", 5);
- expectedRecords.put(path + "/cars.csv.gz", 5);
+ expectedRecords.put(path + "/cars.csv", 6);
+ expectedRecords.put(path + "/cars.csv.gz", 6);
expectedRecords.put(path + "/cars.tar.gz", 4);
- expectedRecords.put(path + "/cars.tsv", 5);
- expectedRecords.put(path + "/cars.ssv", 5);
+ expectedRecords.put(path + "/cars.tsv", 6);
+ expectedRecords.put(path + "/cars.ssv", 6);
expectedRecords.put(path + "/test-documents.7z", 9);
expectedRecords.put(path + "/test-documents.cpio", 9);
expectedRecords.put(path + "/test-documents.tar", 9);
@@ -65,12 +67,80 @@ public class SolrCellMorphlineTest exten
expectedRecords.put(path + "/test-documents.zip", 9);
expectedRecords.put(path + "/multiline-stacktrace.log", 4);
+ {
+ Map<String, Object> record = new LinkedHashMap();
+ record.put("ignored__attachment_mimetype", "image/jpeg");
+ record.put("ignored_exif_isospeedratings", "400");
+ record.put("ignored_meta_creation_date", "2009-08-11T09:09:45");
+ record.put("ignored_tiff_model", "Canon EOS 40D");
+ record.put("text", NON_EMPTY_FIELD);
+ expectedRecordContents.put("/testJPEG_EXIF.jpg", record);
+ expectedRecordContents.put("/testJPEG_EXIF.jpg.tar", record);
+ expectedRecordContents.put("/testJPEG_EXIF.jpg.tar.gz", record);
+ }
+
+ {
+ String file = path + "/testWORD_various.doc";
+ Map<String, Object> record = new LinkedHashMap();
+ record.put("ignored__attachment_mimetype", "application/msword");
+ record.put("ignored_author", "Michael McCandless");
+ record.put("ignored_creation_date", "2011-09-02T10:11:00Z");
+ record.put("ignored_title", "");
+ record.put("ignored_keywords", "Keyword1 Keyword2");
+ record.put("ignored_subject", "Subject is here");
+ record.put("text", NON_EMPTY_FIELD);
+ expectedRecordContents.put(file, record);
+ }
+
+ {
+ String file = path + "/testPDF.pdf";
+ Map<String, Object> record = new LinkedHashMap();
+ record.put("ignored__attachment_mimetype", "application/pdf");
+ record.put("ignored_author", "Bertrand Delacrétaz");
+ record.put("ignored_creation_date", "2007-09-15T09:02:31Z");
+ record.put("ignored_title", "Apache Tika - Apache Tika");
+ record.put("ignored_xmp_creatortool", "Firefox");
+ record.put("text", NON_EMPTY_FIELD);
+ expectedRecordContents.put(file, record);
+ }
+
+ {
+ String file = path + "/email.eml";
+ Map<String, Object> record = new LinkedHashMap();
+ String name = "Patrick Foo <fo...@cloudera.com>";
+ record.put("ignored__attachment_mimetype", "message/rfc822");
+ record.put("ignored_author", name);
+ //record.put("ignored_content_length", "1068");
+ record.put("ignored_creation_date", "2013-11-27T20:01:23Z");
+ record.put("ignored_message_from", name);
+ record.put("ignored_message_to", name);
+ record.put("ignored_creator", name);
+ record.put("ignored_dc_creator", name);
+ record.put("ignored_dc_title", "Test EML");
+ record.put("ignored_dcterms_created", "2013-11-27T20:01:23Z");
+ record.put("ignored_meta_author", name);
+ record.put("ignored_meta_creation_date", "2013-11-27T20:01:23Z");
+ record.put("ignored_subject", "Test EML");
+ record.put("text", NON_EMPTY_FIELD);
+ expectedRecordContents.put(file, record);
+ }
+
+ {
+ String file = path + "/testEXCEL.xlsx";
+ Map<String, Object> record = new LinkedHashMap();
+ record.put("ignored__attachment_mimetype", "application/vnd.openxmlformats-officedocument.spreadsheetml.sheet");
+ record.put("ignored_author", "Keith Bennett");
+ record.put("ignored_creation_date", "2007-10-01T16:13:56Z");
+ record.put("ignored_title", "Simple Excel document");
+ record.put("text", NON_EMPTY_FIELD);
+ expectedRecordContents.put(file, record);
+ }
+
FileUtils.copyFile(new File(RESOURCES_DIR + "/custom-mimetypes.xml"), new File(tempDir + "/custom-mimetypes.xml"));
}
@Test
public void testSolrCellJPGCompressed() throws Exception {
-
morphline = createMorphline("test-morphlines/solrCellJPGCompressed");
String path = RESOURCES_DIR + "/test-documents";
String[] files = new String[] {
@@ -79,7 +149,7 @@ public class SolrCellMorphlineTest exten
path + "/testJPEG_EXIF.jpg.tar.gz",
//path + "/jpeg2000.jp2",
};
- testDocumentTypesInternal(files, expectedRecords);
+ testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
}
@Test
@@ -89,13 +159,14 @@ public class SolrCellMorphlineTest exten
String[] files = new String[] {
path + "/testXML2.xml",
};
- testDocumentTypesInternal(files, expectedRecords);
+ testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
}
@Test
public void testSolrCellDocumentTypes() throws Exception {
-
- morphline = createMorphline("test-morphlines/solrCellDocumentTypes");
+ AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", false);
+
+ morphline = createMorphline(new File(tempDir).getAbsolutePath() + "/test-morphlines/solrCellDocumentTypes");
String path = RESOURCES_DIR + "/test-documents";
String[] files = new String[] {
path + "/testBMPfp.txt",
@@ -107,22 +178,26 @@ public class SolrCellMorphlineTest exten
path + "/testJPEG_EXIF.jpg.gz",
path + "/testJPEG_EXIF.jpg.tar.gz",
path + "/testXML.xml",
-// path + "/cars.csv",
+ path + "/cars.csv",
// path + "/cars.tsv",
// path + "/cars.ssv",
-// path + "/cars.csv.gz",
-// path + "/cars.tar.gz",
+ path + "/cars.csv.gz",
+ path + "/cars.tar.gz",
path + "/sample-statuses-20120906-141433.avro",
path + "/sample-statuses-20120906-141433",
path + "/sample-statuses-20120906-141433.gz",
path + "/sample-statuses-20120906-141433.bz2",
+ path + "/email.eml",
};
- testDocumentTypesInternal(files, expectedRecords);
+ testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
}
@Test
public void testSolrCellDocumentTypes2() throws Exception {
- morphline = createMorphline("test-morphlines/solrCellDocumentTypes");
+
+ AbstractSolrMorphlineTestBase.setupMorphline(tempDir, "test-morphlines/solrCellDocumentTypes", false);
+
+ morphline = createMorphline(new File(tempDir).getAbsolutePath() + "/test-morphlines/solrCellDocumentTypes");
String path = RESOURCES_DIR + "/test-documents";
String[] files = new String[] {
path + "/testPPT_various.ppt",
@@ -137,7 +212,7 @@ public class SolrCellMorphlineTest exten
path + "/complex.mbox",
path + "/test-outlook.msg",
path + "/testEMLX.emlx",
-// path + "/testRFC822",
+ path + "/testRFC822",
path + "/rsstest.rss",
// path + "/testDITA.dita",
@@ -176,7 +251,7 @@ public class SolrCellMorphlineTest exten
// path + "/testWINMAIL.dat",
// path + "/testWMF.wmf",
};
- testDocumentTypesInternal(files, expectedRecords);
+ testDocumentTypesInternal(files, expectedRecords, expectedRecordContents);
}
/**
Modified: lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java?rev=1558580&r1=1558579&r2=1558580&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/java/org/apache/solr/morphlines/solr/SolrLocator.java Wed Jan 15 21:57:57 2014
@@ -20,11 +20,13 @@ import org.kitesdk.morphline.api.Morphli
import org.kitesdk.morphline.api.MorphlineContext;
import org.kitesdk.morphline.api.MorphlineRuntimeException;
import org.kitesdk.morphline.base.Configs;
+
import com.google.common.base.Preconditions;
import com.typesafe.config.Config;
import com.typesafe.config.ConfigFactory;
import com.typesafe.config.ConfigRenderOptions;
import com.typesafe.config.ConfigUtil;
+
import org.apache.solr.client.solrj.SolrServer;
import org.apache.solr.client.solrj.impl.CloudSolrServer;
import org.apache.solr.common.cloud.SolrZkClient;
@@ -39,8 +41,10 @@ import org.xml.sax.InputSource;
import org.xml.sax.SAXException;
import javax.xml.parsers.ParserConfigurationException;
+
import java.io.File;
import java.io.IOException;
+import java.net.MalformedURLException;
/**
* Set of configuration parameters that identify the location and schema of a Solr server or
@@ -57,8 +61,6 @@ public class SolrLocator {
private String solrHomeDir;
private int batchSize = 1000;
- private static final String SOLR_HOME_PROPERTY_NAME = "solr.solr.home";
-
private static final Logger LOG = LoggerFactory.getLogger(SolrLocator.class);
protected SolrLocator(MorphlineContext context) {
@@ -120,7 +122,6 @@ public class SolrLocator {
// If solrHomeDir isn't defined and zkHost and collectionName are defined
// then download schema.xml and solrconfig.xml, etc from zk and use that as solrHomeDir
- String oldSolrHomeDir = null;
String mySolrHomeDir = solrHomeDir;
if (solrHomeDir == null || solrHomeDir.length() == 0) {
if (zkHost == null || zkHost.length() == 0) {
@@ -150,20 +151,13 @@ public class SolrLocator {
}
}
- oldSolrHomeDir = System.setProperty(SOLR_HOME_PROPERTY_NAME, mySolrHomeDir);
+ LOG.debug("SolrLocator loading IndexSchema from dir {}", mySolrHomeDir);
try {
- SolrConfig solrConfig = new SolrConfig(); // TODO use SolrResourceLoader ala TikaMapper?
- // SolrConfig solrConfig = new SolrConfig("solrconfig.xml");
- // SolrConfig solrConfig = new
- // SolrConfig("/cloud/apache-solr-4.0.0-BETA/example/solr/collection1",
- // "solrconfig.xml", null);
- // SolrConfig solrConfig = new
- // SolrConfig("/cloud/apache-solr-4.0.0-BETA/example/solr/collection1/conf/solrconfig.xml");
- SolrResourceLoader loader = solrConfig.getResourceLoader();
-
+ SolrResourceLoader loader = new SolrResourceLoader(mySolrHomeDir);
+ SolrConfig solrConfig = new SolrConfig(loader, "solrconfig.xml", null);
InputSource is = new InputSource(loader.openSchema("schema.xml"));
- is.setSystemId(SystemIdResolver.createSystemIdFromResourceName("schema.xml"));
-
+ is.setSystemId(SystemIdResolver.createSystemIdFromResourceName("schema.xml"));
+
IndexSchema schema = new IndexSchema(solrConfig, "schema.xml", is);
validateSchema(schema);
return schema;
@@ -173,14 +167,6 @@ public class SolrLocator {
throw new MorphlineRuntimeException(e);
} catch (SAXException e) {
throw new MorphlineRuntimeException(e);
- } finally { // restore old global state
- if (solrHomeDir != null) {
- if (oldSolrHomeDir == null) {
- System.clearProperty(SOLR_HOME_PROPERTY_NAME);
- } else {
- System.setProperty(SOLR_HOME_PROPERTY_NAME, oldSolrHomeDir);
- }
- }
}
}
Modified: lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java?rev=1558580&r1=1558579&r2=1558580&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/AbstractSolrMorphlineTestBase.java Wed Jan 15 21:57:57 2014
@@ -19,6 +19,7 @@ package org.apache.solr.morphlines.solr;
import java.io.ByteArrayInputStream;
import java.io.File;
import java.io.IOException;
+import java.util.Arrays;
import java.util.Collection;
import java.util.HashMap;
import java.util.HashSet;
@@ -28,7 +29,6 @@ import java.util.Map.Entry;
import java.util.concurrent.atomic.AtomicInteger;
import org.apache.commons.io.FileUtils;
-import org.apache.commons.lang.StringEscapeUtils;
import org.apache.solr.SolrTestCaseJ4;
import org.apache.solr.client.solrj.SolrQuery;
import org.apache.solr.client.solrj.SolrServer;
@@ -41,9 +41,6 @@ import org.apache.solr.util.ExternalPath
import org.junit.After;
import org.junit.Before;
import org.junit.BeforeClass;
-import org.slf4j.Logger;
-import org.slf4j.LoggerFactory;
-
import org.kitesdk.morphline.api.Collector;
import org.kitesdk.morphline.api.Command;
import org.kitesdk.morphline.api.MorphlineContext;
@@ -53,6 +50,9 @@ import org.kitesdk.morphline.base.FaultT
import org.kitesdk.morphline.base.Fields;
import org.kitesdk.morphline.base.Notifications;
import org.kitesdk.morphline.stdlib.PipeBuilder;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
import com.codahale.metrics.MetricRegistry;
import com.google.common.io.Files;
import com.typesafe.config.Config;
@@ -73,6 +73,8 @@ public class AbstractSolrMorphlineTestBa
protected static final AtomicInteger SEQ_NUM = new AtomicInteger();
protected static final AtomicInteger SEQ_NUM2 = new AtomicInteger();
+ protected static final Object NON_EMPTY_FIELD = new Object();
+
private static final Logger LOGGER = LoggerFactory.getLogger(AbstractSolrMorphlineTestBase.class);
protected String tempDir;
@@ -113,7 +115,7 @@ public class AbstractSolrMorphlineTestBa
testServer = new SolrServerDocumentLoader(solrServer, batchSize);
deleteAllDocuments();
- tempDir = TEMP_DIR + "/test-morphlines-" + System.currentTimeMillis();
+ tempDir = new File(TEMP_DIR + "/test-morphlines-" + System.currentTimeMillis()).getAbsolutePath();
new File(tempDir).mkdirs();
}
@@ -124,7 +126,11 @@ public class AbstractSolrMorphlineTestBa
super.tearDown();
}
- protected void testDocumentTypesInternal(String[] files, Map<String,Integer> expectedRecords) throws Exception {
+ protected void testDocumentTypesInternal(
+ String[] files,
+ Map<String,Integer> expectedRecords,
+ Map<String, Map<String, Object>> expectedRecordContents) throws Exception {
+
deleteAllDocuments();
int numDocs = 0;
for (int i = 0; i < 1; i++) {
@@ -137,6 +143,7 @@ public class AbstractSolrMorphlineTestBa
event.getFields().put(Fields.ATTACHMENT_BODY, new ByteArrayInputStream(body));
event.getFields().put(Fields.ATTACHMENT_NAME, f.getName());
event.getFields().put(Fields.BASE_ID, f.getName());
+ collector.reset();
load(event);
Integer count = expectedRecords.get(file);
if (count != null) {
@@ -145,6 +152,20 @@ public class AbstractSolrMorphlineTestBa
numDocs++;
}
assertEquals("unexpected results in " + file, numDocs, queryResultSetSize("*:*"));
+ Map<String, Object> expectedContents = expectedRecordContents.get(file);
+ if (expectedContents != null) {
+ Record actual = collector.getFirstRecord();
+ for (Map.Entry<String, Object> entry : expectedContents.entrySet()) {
+ if (entry.getValue() == NON_EMPTY_FIELD) {
+ assertNotNull(entry.getKey());
+ assertTrue(actual.getFirstValue(entry.getKey()).toString().length() > 0);
+ } else if (entry.getValue() == null) {
+ assertEquals("key:" + entry.getKey(), 0, actual.get(entry.getKey()).size());
+ } else {
+ assertEquals("key:" + entry.getKey(), Arrays.asList(entry.getValue()), actual.get(entry.getKey()));
+ }
+ }
+ }
}
}
assertEquals(numDocs, queryResultSetSize("*:*"));
@@ -180,17 +201,7 @@ public class AbstractSolrMorphlineTestBa
s.commit();
}
-
- public static void setupMorphline(String tempDir, String file) throws IOException {
- String morphlineText = FileUtils.readFileToString(new File(RESOURCES_DIR + "/" + file + ".conf"), "UTF-8");
- morphlineText = morphlineText.replace("RESOURCES_DIR", StringEscapeUtils.escapeJavaScript(new File(tempDir).getAbsolutePath()));
-
- FileUtils.writeStringToFile(new File(tempDir + "/" + file + ".conf"), morphlineText, "UTF-8");
- }
-
protected Command createMorphline(String file) throws IOException {
- setupMorphline(tempDir, file);
-
return new PipeBuilder().build(parse(file), null, collector, createMorphlineContext());
}
@@ -206,7 +217,13 @@ public class AbstractSolrMorphlineTestBa
private Config parse(String file) throws IOException {
SolrLocator locator = new SolrLocator(createMorphlineContext());
locator.setSolrHomeDir(testSolrHome + "/collection1");
- Config config = new Compiler().parse(new File(tempDir + "/" + file + ".conf"), locator.toConfig("SOLR_LOCATOR"));
+ File morphlineFile;
+ if (new File(file).isAbsolute()) {
+ morphlineFile = new File(file + ".conf");
+ } else {
+ morphlineFile = new File(RESOURCES_DIR + "/" + file + ".conf");
+ }
+ Config config = new Compiler().parse(morphlineFile, locator.toConfig("SOLR_LOCATOR"));
config = config.getConfigList("morphlines").get(0);
return config;
}
@@ -266,4 +283,15 @@ public class AbstractSolrMorphlineTestBa
public HashSet<String> getFieldValues() { return fieldValues; }
public CompareType getCompareType() { return compareType; }
}
+
+ public static void setupMorphline(String tempDir, String file, boolean replaceSolrLocator) throws IOException {
+ String morphlineText = FileUtils.readFileToString(new File(RESOURCES_DIR + "/" + file + ".conf"), "UTF-8");
+ morphlineText = morphlineText.replaceAll("RESOURCES_DIR", new File(tempDir).getAbsolutePath());
+ if (replaceSolrLocator) {
+ morphlineText = morphlineText.replaceAll("\\$\\{SOLR_LOCATOR\\}",
+ "{ collection : collection1 }");
+ }
+ new File(tempDir + "/" + file + ".conf").getParentFile().mkdirs();
+ FileUtils.writeStringToFile(new File(tempDir + "/" + file + ".conf"), morphlineText, "UTF-8");
+ }
}
Modified: lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java?rev=1558580&r1=1558579&r2=1558580&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java (original)
+++ lucene/dev/branches/branch_4x/solr/contrib/morphlines-core/src/test/org/apache/solr/morphlines/solr/SolrMorphlineTest.java Wed Jan 15 21:57:57 2014
@@ -57,18 +57,21 @@ public class SolrMorphlineTest extends A
@Test
public void testTokenizeText() throws Exception {
- morphline = createMorphline("test-morphlines/tokenizeText");
- Record record = new Record();
- record.put(Fields.MESSAGE, "Hello World!");
- record.put(Fields.MESSAGE, "\nFoo@Bar.com #%()123");
- Record expected = record.copy();
- expected.getFields().putAll("tokens", Arrays.asList("hello", "world", "foo", "bar.com", "123"));
- startSession();
- Notifications.notifyBeginTransaction(morphline);
- assertTrue(morphline.process(record));
- assertEquals(1, collector.getNumStartEvents());
- Notifications.notifyCommitTransaction(morphline);
- assertEquals(expected, collector.getFirstRecord());
+ morphline = createMorphline("test-morphlines/tokenizeText");
+ for (int i = 0; i < 3; i++) {
+ Record record = new Record();
+ record.put(Fields.MESSAGE, "Hello World!");
+ record.put(Fields.MESSAGE, "\nFoo@Bar.com #%()123");
+ Record expected = record.copy();
+ expected.getFields().putAll("tokens", Arrays.asList("hello", "world", "foo", "bar.com", "123"));
+ collector.reset();
+ startSession();
+ Notifications.notifyBeginTransaction(morphline);
+ assertTrue(morphline.process(record));
+ assertEquals(1, collector.getNumStartEvents());
+ Notifications.notifyCommitTransaction(morphline);
+ assertEquals(expected, collector.getFirstRecord());
+ }
}
}