You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/11/02 10:30:03 UTC
[5/7] lucene-solr:jira/gradle: Adding solr:analysis-extras module
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/java/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/java/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java b/solr/contrib/analysis-extras/src/test/java/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
new file mode 100644
index 0000000..851fea0
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/java/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import java.io.File;
+import java.util.Arrays;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends UpdateProcessorTestBase {
+
+ @BeforeClass
+ public static void beforeClass() throws Exception {
+ File testHome = createTempDir().toFile();
+ FileUtils.copyDirectory(getFile("analysis-extras/solr"), testHome);
+ initCore("solrconfig-opennlp-extract.xml", "schema-opennlp-extract.xml", testHome.getAbsolutePath());
+ }
+
+ @Test
+ public void testSimpleExtract() throws Exception {
+ SolrInputDocument doc = processAdd("extract-single",
+ doc(f("id", "1"),
+ f("source1_s", "Take this to Mr. Flashman.")));
+ assertEquals("dest_s should have stringValue", "Flashman", doc.getFieldValue("dest_s"));
+ }
+
+ @Test
+ public void testMultiExtract() throws Exception {
+ SolrInputDocument doc = processAdd("extract-multi",
+ doc(f("id", "1"),
+ f("source1_s", "Hello Flashman."),
+ f("source2_s", "Calling Flashman.")));
+
+ assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
+ }
+
+ @Test
+ public void testArrayExtract() throws Exception {
+ SolrInputDocument doc = processAdd("extract-array",
+ doc(f("id", "1"),
+ f("source1_s", "Currently we have Flashman. Not much else."),
+ f("source2_s", "Flashman. Is. Not. There.")));
+
+ assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
+ }
+
+ @Test
+ public void testSelectorExtract() throws Exception {
+ SolrInputDocument doc = processAdd("extract-selector",
+ doc(f("id", "1"),
+ f("source0_s", "Flashman. Or not."),
+ f("source1_s", "Serendipitously, he was. I mean, Flashman. And yet."),
+ f("source2_s", "Correct, Flashman.")));
+
+ assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
+ }
+
+ public void testMultipleExtracts() throws Exception {
+ // test example from the javadocs
+ SolrInputDocument doc = processAdd("multiple-extract",
+ doc(f("id", "1"),
+ f("text", "From Flashman. To Panman."),
+ f("title", "It's Captain Flashman.", "Privately, Flashman."),
+ f("subtitle", "Ineluctably, Flashman."),
+ f("corrolary_txt", "Forsooth thou bringeth Flashman."),
+ f("notes_txt", "Yes Flashman."),
+ f("summary", "Many aspire to be Flashman in London."),
+ f("descs", "Courage, Flashman.", "Ain't he Flashman."),
+ f("descriptions", "Flashman. Flashman. Flashman.")));
+
+ assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("people_s"));
+ assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("titular_people"));
+ assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("key_desc_people"));
+ assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("key_description_people"));
+ assertEquals("Flashman", doc.getFieldValue("summary_person_s")); // {EntityType} field name interpolation
+ assertEquals("London", doc.getFieldValue("summary_location_s")); // {EntityType} field name interpolation
+ }
+
+ public void testEquivalentExtraction() throws Exception {
+ SolrInputDocument d;
+
+ // regardless of chain, all of these checks should be equivalent
+ for (String chain : Arrays.asList("extract-single", "extract-single-regex",
+ "extract-multi", "extract-multi-regex",
+ "extract-array", "extract-array-regex",
+ "extract-selector", "extract-selector-regex")) {
+
+ // simple extract
+ d = processAdd(chain,
+ doc(f("id", "1111"),
+ f("source0_s", "Totally Flashman."), // not extracted
+ f("source1_s", "One nation under Flashman.", "Good Flashman.")));
+ assertNotNull(chain, d);
+ assertEquals(chain, Arrays.asList("Flashman", "Flashman"), d.getFieldValues("dest_s"));
+
+ // append to existing values
+ d = processAdd(chain,
+ doc(f("id", "1111"),
+ field("dest_s", "orig1", "orig2"),
+ f("source0_s", "Flashman. In totality."), // not extracted
+ f("source1_s", "Two nations under Flashman.", "Meh Flashman.")));
+ assertNotNull(chain, d);
+ assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+ }
+
+ // should be equivalent for any chain matching source1_s and source2_s (but not source0_s)
+ for (String chain : Arrays.asList("extract-multi", "extract-multi-regex",
+ "extract-array", "extract-array-regex",
+ "extract-selector", "extract-selector-regex")) {
+
+ // simple extract
+ d = processAdd(chain,
+ doc(f("id", "1111"),
+ f("source0_s", "Not Flashman."), // not extracted
+ f("source1_s", "Could have had a Flashman.", "Bad Flashman."),
+ f("source2_s", "Indubitably Flashman.")));
+ assertNotNull(chain, d);
+ assertEquals(chain, Arrays.asList("Flashman", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+
+ // append to existing values
+ d = processAdd(chain,
+ doc(f("id", "1111"),
+ field("dest_s", "orig1", "orig2"),
+ f("source0_s", "Never Flashman."), // not extracted
+ f("source1_s", "Seeking Flashman.", "Evil incarnate Flashman."),
+ f("source2_s", "Perfunctorily Flashman.")));
+ assertNotNull(chain, d);
+ assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+ }
+
+ // any chain that copies source1_s to dest_s should be equivalent for these assertions
+ for (String chain : Arrays.asList("extract-single", "extract-single-regex",
+ "extract-multi", "extract-multi-regex",
+ "extract-array", "extract-array-regex",
+ "extract-selector", "extract-selector-regex")) {
+
+ // simple extract
+ d = processAdd(chain,
+ doc(f("id", "1111"),
+ f("source1_s", "Always Flashman.", "Flashman. Noone else.")));
+ assertNotNull(chain, d);
+ assertEquals(chain, Arrays.asList("Flashman", "Flashman"), d.getFieldValues("dest_s"));
+
+ // append to existing values
+ d = processAdd(chain,
+ doc(f("id", "1111"),
+ field("dest_s", "orig1", "orig2"),
+ f("source1_s", "Flashman. And, scene.", "Contemporary Flashman. Yeesh.")));
+ assertNotNull(chain, d);
+ assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+ }
+ }
+
+ public void testExtractFieldRegexReplaceAll() throws Exception {
+ SolrInputDocument d = processAdd("extract-regex-replaceall",
+ doc(f("id", "1111"),
+ f("foo_x2_s", "Infrequently Flashman.", "In the words of Flashman."),
+ f("foo_x3_x7_s", "Flashman. Whoa.")));
+
+ assertNotNull(d);
+ assertEquals(Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_y2_s"));
+ assertEquals("Flashman", d.getFieldValue("foo_y3_y7_s"));
+ }
+
+ public void testExtractFieldRegexReplaceAllWithEntityType() throws Exception {
+ SolrInputDocument d = processAdd("extract-regex-replaceall-with-entity-type",
+ doc(f("id", "1111"),
+ f("foo_x2_s", "Infrequently Flashman in London.", "In the words of Flashman in London."),
+ f("foo_x3_x7_s", "Flashman in London. Whoa.")));
+
+ assertNotNull(d);
+ assertEquals(d.getFieldNames().toString(), Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_person_y2_s"));
+ assertEquals(d.getFieldNames().toString(), Arrays.asList("London", "London"), d.getFieldValues("foo_location_y2_s"));
+ assertEquals(d.getFieldNames().toString(),"Flashman", d.getFieldValue("foo_person_y3_person_y7_s"));
+ assertEquals(d.getFieldNames().toString(),"London", d.getFieldValue("foo_location_y3_location_y7_s"));
+ }
+}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
deleted file mode 100644
index b2cdbc2..0000000
--- a/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.analysis;
-
-import java.io.File;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.solr.SolrTestCaseJ4;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-// See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows machines occasionally
-public class TestFoldingMultitermExtrasQuery extends SolrTestCaseJ4 {
-
- public String getCoreName() {
- return "basic";
- }
-
- @BeforeClass
- public static void beforeTests() throws Exception {
- File testHome = createTempDir().toFile();
- FileUtils.copyDirectory(getFile("analysis-extras/solr"), testHome);
- initCore("solrconfig-icucollate.xml","schema-folding-extra.xml", testHome.getAbsolutePath());
-
- int idx = 1;
- // ICUFoldingFilterFactory
- assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "BadMagicICUFolding"));
- assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "Ruß"));
- assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "ΜΆΪΟΣ"));
- assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "Μάϊος"));
- assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "résumé"));
- assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "re\u0301sume\u0301"));
- assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "ELİF"));
- assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "eli\u0307f"));
-
- // ICUNormalizer2FilterFactory
-
- assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "BadMagicICUFolding"));
- assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "Ruß"));
- assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "ΜΆΪΟΣ"));
- assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "Μάϊος"));
- assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "résumé"));
- assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "re\u0301sume\u0301"));
- assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "ELİF"));
- assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "eli\u0307f"));
-
- // ICUTransformFilterFactory
- assertU(adoc("id", Integer.toString(idx++), "content_icutransform", "Российская"));
-
- assertU(commit());
- }
-
- @Test
- public void testICUFolding() {
- assertQ(req("q", "content_icufolding:BadMagicicuFold*"), "//result[@numFound='1']");
- assertQ(req("q", "content_icufolding:rU*"), "//result[@numFound='1']");
- assertQ(req("q", "content_icufolding:Re*Me"), "//result[@numFound='2']");
- assertQ(req("q", "content_icufolding:RE\u0301su*"), "//result[@numFound='2']");
- assertQ(req("q", "content_icufolding:El*"), "//result[@numFound='2']");
- }
- @Test
- public void testICUNormalizer2() {
- assertQ(req("q", "content_icunormalizer2:BadMagicicuFold*"), "//result[@numFound='1']");
- assertQ(req("q", "content_icunormalizer2:RU*"), "//result[@numFound='1']");
- assertQ(req("q", "content_icunormalizer2:Μάϊ*"), "//result[@numFound='2']");
- assertQ(req("q", "content_icunormalizer2:re\u0301Su*"), "//result[@numFound='2']");
- assertQ(req("q", "content_icunormalizer2:eL*"), "//result[@numFound='2']");
- }
-
- public void testICUTransform() {
- assertQ(req("q", "content_icutransform:Росс*"), "//result[@numFound='1']");
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java
deleted file mode 100644
index f164080..0000000
--- a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.schema;
-
-import java.io.File;
-import java.io.FileOutputStream;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.IOUtils;
-import org.apache.lucene.analysis.util.FilesystemResourceLoader;
-import org.apache.lucene.analysis.util.ResourceLoader;
-import org.apache.lucene.analysis.util.StringMockResourceLoader;
-import org.apache.solr.SolrTestCaseJ4;
-import org.junit.BeforeClass;
-
-import com.ibm.icu.text.Collator;
-import com.ibm.icu.text.RuleBasedCollator;
-import com.ibm.icu.util.ULocale;
-
-/**
- * Tests {@link ICUCollationField} with TermQueries, RangeQueries, and sort order.
- */
-public class TestICUCollationField extends SolrTestCaseJ4 {
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- String home = setupSolrHome();
- initCore("solrconfig.xml","schema.xml", home);
- // add some docs
- assertU(adoc("id", "1", "text", "\u0633\u0627\u0628"));
- assertU(adoc("id", "2", "text", "I WİLL USE TURKİSH CASING"));
- assertU(adoc("id", "3", "text", "ı will use turkish casıng"));
- assertU(adoc("id", "4", "text", "Töne"));
- assertU(adoc("id", "5", "text", "I W\u0049\u0307LL USE TURKİSH CASING"));
- assertU(adoc("id", "6", "text", "Testing"));
- assertU(adoc("id", "7", "text", "Tone"));
- assertU(adoc("id", "8", "text", "Testing"));
- assertU(adoc("id", "9", "text", "testing"));
- assertU(adoc("id", "10", "text", "toene"));
- assertU(adoc("id", "11", "text", "Tzne"));
- assertU(adoc("id", "12", "text", "\u0698\u0698"));
- assertU(commit());
- }
-
- /**
- * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
- * These are largish files, and jvm-specific (as our documentation says, you should always
- * look out for jvm differences with collation).
- * So it's preferable to create this file on-the-fly.
- */
- public static String setupSolrHome() throws Exception {
- String tmpFile = createTempDir().toFile().getAbsolutePath();
- // make data and conf dirs
- new File(tmpFile + "/collection1", "data").mkdirs();
- File confDir = new File(tmpFile + "/collection1", "conf");
- confDir.mkdirs();
-
- // copy over configuration files
- FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
- FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate.xml"), new File(confDir, "schema.xml"));
-
- // generate custom collation rules (DIN 5007-2), saving to customrules.dat
- RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));
-
- String DIN5007_2_tailorings =
- "& ae , a\u0308 & AE , A\u0308"+
- "& oe , o\u0308 & OE , O\u0308"+
- "& ue , u\u0308 & UE , u\u0308";
-
- RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
- String tailoredRules = tailoredCollator.getRules();
- final String osFileName = "customrules.dat";
- final FileOutputStream os = new FileOutputStream(new File(confDir, osFileName));
- IOUtils.write(tailoredRules, os, "UTF-8");
- os.close();
-
- final ResourceLoader loader;
- if (random().nextBoolean()) {
- loader = new StringMockResourceLoader(tailoredRules);
- } else {
- loader = new FilesystemResourceLoader(confDir.toPath());
- }
- final Collator readCollator = ICUCollationField.createFromRules(osFileName, loader);
- assertEquals(tailoredCollator, readCollator);
-
- return tmpFile;
- }
-
- /**
- * Test termquery with german DIN 5007-1 primary strength.
- * In this case, ö is equivalent to o (but not oe)
- */
- public void testBasicTermQuery() {
- assertQ("Collated TQ: ",
- req("fl", "id", "q", "sort_de:tone", "sort", "id asc" ),
- "//*[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.=4]",
- "//result/doc[2]/str[@name='id'][.=7]"
- );
- }
-
- /**
- * Test rangequery again with the DIN 5007-1 collator.
- * We do a range query of tone .. tp, in binary order this
- * would retrieve nothing due to case and accent differences.
- */
- public void testBasicRangeQuery() {
- assertQ("Collated RangeQ: ",
- req("fl", "id", "q", "sort_de:[tone TO tp]", "sort", "id asc" ),
- "//*[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.=4]",
- "//result/doc[2]/str[@name='id'][.=7]"
- );
- }
-
- /**
- * Test sort with a danish collator. ö is ordered after z
- */
- public void testBasicSort() {
- assertQ("Collated Sort: ",
- req("fl", "id", "q", "sort_da:[tz TO töz]", "sort", "sort_da asc" ),
- "//*[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.=11]",
- "//result/doc[2]/str[@name='id'][.=4]"
- );
- }
-
- /**
- * Test sort with an arabic collator. U+0633 is ordered after U+0698.
- * With a binary collator, the range would also return nothing.
- */
- public void testArabicSort() {
- assertQ("Collated Sort: ",
- req("fl", "id", "q", "sort_ar:[\u0698 TO \u0633\u0633]", "sort", "sort_ar asc" ),
- "//*[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.=12]",
- "//result/doc[2]/str[@name='id'][.=1]"
- );
- }
-
- /**
- * Test rangequery again with an Arabic collator.
- * Binary order would normally order U+0633 in this range.
- */
- public void testNegativeRangeQuery() {
- assertQ("Collated RangeQ: ",
- req("fl", "id", "q", "sort_ar:[\u062F TO \u0698]", "sort", "id asc" ),
- "//*[@numFound='0']"
- );
- }
- /**
- * Test canonical decomposition with turkish primary strength.
- * With this sort order, İ is the uppercase form of i, and I is the uppercase form of ı.
- * We index a decomposed form of İ.
- */
- public void testCanonicalDecomposition() {
- assertQ("Collated TQ: ",
- req("fl", "id", "q", "sort_tr_canon:\"I Will Use Turkish Casıng\"", "sort", "id asc" ),
- "//*[@numFound='3']",
- "//result/doc[1]/str[@name='id'][.=2]",
- "//result/doc[2]/str[@name='id'][.=3]",
- "//result/doc[3]/str[@name='id'][.=5]"
- );
- }
-
- /**
- * Test termquery with custom collator (DIN 5007-2).
- * In this case, ö is equivalent to oe (but not o)
- */
- public void testCustomCollation() {
- assertQ("Collated TQ: ",
- req("fl", "id", "q", "sort_custom:toene"),
- "//*[@numFound='2']",
- "//result/doc/str[@name='id'][.=4]",
- "//result/doc/str[@name='id'][.=10]"
- );
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java
deleted file mode 100644
index 57b403a..0000000
--- a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.schema;
-
-import java.io.File;
-import java.io.FileOutputStream;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.IOUtils;
-import org.apache.solr.SolrTestCaseJ4;
-import org.junit.BeforeClass;
-
-import com.ibm.icu.text.Collator;
-import com.ibm.icu.text.RuleBasedCollator;
-import com.ibm.icu.util.ULocale;
-
-/**
- * Tests {@link ICUCollationField} with docValues.
- */
-public class TestICUCollationFieldDocValues extends SolrTestCaseJ4 {
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- String home = setupSolrHome();
- initCore("solrconfig.xml","schema.xml", home);
- // add some docs
- assertU(adoc("id", "1", "text", "\u0633\u0627\u0628"));
- assertU(adoc("id", "2", "text", "I WİLL USE TURKİSH CASING"));
- assertU(adoc("id", "3", "text", "ı will use turkish casıng"));
- assertU(adoc("id", "4", "text", "Töne"));
- assertU(adoc("id", "5", "text", "I W\u0049\u0307LL USE TURKİSH CASING"));
- assertU(adoc("id", "6", "text", "Testing"));
- assertU(adoc("id", "7", "text", "Tone"));
- assertU(adoc("id", "8", "text", "Testing"));
- assertU(adoc("id", "9", "text", "testing"));
- assertU(adoc("id", "10", "text", "toene"));
- assertU(adoc("id", "11", "text", "Tzne"));
- assertU(adoc("id", "12", "text", "\u0698\u0698"));
- assertU(commit());
- }
-
- /**
- * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
- * These are largish files, and jvm-specific (as our documentation says, you should always
- * look out for jvm differences with collation).
- * So it's preferable to create this file on-the-fly.
- */
- public static String setupSolrHome() throws Exception {
- File tmpFile = createTempDir().toFile();
-
- // make data and conf dirs
- new File(tmpFile + "/collection1", "data").mkdirs();
- File confDir = new File(tmpFile + "/collection1", "conf");
- confDir.mkdirs();
-
- // copy over configuration files
- FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
- FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml"), new File(confDir, "schema.xml"));
-
- // generate custom collation rules (DIN 5007-2), saving to customrules.dat
- RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));
-
- String DIN5007_2_tailorings =
- "& ae , a\u0308 & AE , A\u0308"+
- "& oe , o\u0308 & OE , O\u0308"+
- "& ue , u\u0308 & UE , u\u0308";
-
- RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
- String tailoredRules = tailoredCollator.getRules();
- FileOutputStream os = new FileOutputStream(new File(confDir, "customrules.dat"));
- IOUtils.write(tailoredRules, os, "UTF-8");
- os.close();
-
- return tmpFile.getAbsolutePath();
- }
-
- /**
- * Test termquery with german DIN 5007-1 primary strength.
- * In this case, ö is equivalent to o (but not oe)
- */
- public void testBasicTermQuery() {
- assertQ("Collated TQ: ",
- req("fl", "id", "q", "sort_de:tone", "sort", "id asc" ),
- "//*[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.=4]",
- "//result/doc[2]/str[@name='id'][.=7]"
- );
- }
-
- /**
- * Test rangequery again with the DIN 5007-1 collator.
- * We do a range query of tone .. tp, in binary order this
- * would retrieve nothing due to case and accent differences.
- */
- public void testBasicRangeQuery() {
- assertQ("Collated RangeQ: ",
- req("fl", "id", "q", "sort_de:[tone TO tp]", "sort", "id asc" ),
- "//*[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.=4]",
- "//result/doc[2]/str[@name='id'][.=7]"
- );
- }
-
- /**
- * Test sort with a danish collator. ö is ordered after z
- */
- public void testBasicSort() {
- assertQ("Collated Sort: ",
- req("fl", "id", "q", "sort_da:[tz TO töz]", "sort", "sort_da asc" ),
- "//*[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.=11]",
- "//result/doc[2]/str[@name='id'][.=4]"
- );
- }
-
- /**
- * Test sort with an arabic collator. U+0633 is ordered after U+0698.
- * With a binary collator, the range would also return nothing.
- */
- public void testArabicSort() {
- assertQ("Collated Sort: ",
- req("fl", "id", "q", "sort_ar:[\u0698 TO \u0633\u0633]", "sort", "sort_ar asc" ),
- "//*[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.=12]",
- "//result/doc[2]/str[@name='id'][.=1]"
- );
- }
-
- /**
- * Test rangequery again with an Arabic collator.
- * Binary order would normally order U+0633 in this range.
- */
- public void testNegativeRangeQuery() {
- assertQ("Collated RangeQ: ",
- req("fl", "id", "q", "sort_ar:[\u062F TO \u0698]", "sort", "id asc" ),
- "//*[@numFound='0']"
- );
- }
- /**
- * Test canonical decomposition with turkish primary strength.
- * With this sort order, İ is the uppercase form of i, and I is the uppercase form of ı.
- * We index a decomposed form of İ.
- */
- public void testCanonicalDecomposition() {
- assertQ("Collated TQ: ",
- req("fl", "id", "q", "sort_tr_canon:\"I Will Use Turkish Casıng\"", "sort", "id asc" ),
- "//*[@numFound='3']",
- "//result/doc[1]/str[@name='id'][.=2]",
- "//result/doc[2]/str[@name='id'][.=3]",
- "//result/doc[3]/str[@name='id'][.=5]"
- );
- }
-
- /**
- * Test termquery with custom collator (DIN 5007-2).
- * In this case, ö is equivalent to oe (but not o)
- */
- public void testCustomCollation() {
- assertQ("Collated TQ: ",
- req("fl", "id", "q", "sort_custom:toene"),
- "//*[@numFound='2']",
- "//result/doc/str[@name='id'][.=4]",
- "//result/doc/str[@name='id'][.=10]"
- );
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldOptions.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldOptions.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldOptions.java
deleted file mode 100644
index 0b198b7..0000000
--- a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldOptions.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.schema;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.solr.SolrTestCaseJ4;
-import org.junit.BeforeClass;
-
-import java.io.File;
-
-/**
- * Tests expert options of {@link ICUCollationField}.
- */
-public class TestICUCollationFieldOptions extends SolrTestCaseJ4 {
- @BeforeClass
- public static void beforeClass() throws Exception {
- File testHome = createTempDir().toFile();
- FileUtils.copyDirectory(getFile("analysis-extras/solr"), testHome);
- initCore("solrconfig-icucollate.xml","schema-icucollateoptions.xml", testHome.getAbsolutePath());
- // add some docs
- assertU(adoc("id", "1", "text", "foo-bar"));
- assertU(adoc("id", "2", "text", "foo bar"));
- assertU(adoc("id", "3", "text", "foobar"));
- assertU(adoc("id", "4", "text", "foobar-10"));
- assertU(adoc("id", "5", "text", "foobar-9"));
- assertU(adoc("id", "6", "text", "resume"));
- assertU(adoc("id", "7", "text", "Résumé"));
- assertU(adoc("id", "8", "text", "Resume"));
- assertU(adoc("id", "9", "text", "résumé"));
- assertU(commit());
- }
-
- /*
- * Setting alternate=shifted to shift whitespace, punctuation and symbols
- * to quaternary level
- */
- public void testIgnorePunctuation() {
- assertQ("Collated TQ: ",
- req("fl", "id", "q", "sort_ignore_punctuation:foobar", "sort", "id asc" ),
- "//*[@numFound='3']",
- "//result/doc[1]/str[@name='id'][.=1]",
- "//result/doc[2]/str[@name='id'][.=2]",
- "//result/doc[3]/str[@name='id'][.=3]"
- );
- }
-
- /*
- * Setting alternate=shifted and variableTop to shift whitespace, but not
- * punctuation or symbols, to quaternary level
- */
- public void testIgnoreWhitespace() {
- assertQ("Collated TQ: ",
- req("fl", "id", "q", "sort_ignore_space:\"foo bar\"", "sort", "id asc" ),
- "//*[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.=2]",
- "//result/doc[2]/str[@name='id'][.=3]"
- );
- }
-
- /*
- * Setting numeric to encode digits with numeric value, so that
- * foobar-9 sorts before foobar-10
- */
- public void testNumerics() {
- assertQ("Collated sort: ",
- req("fl", "id", "q", "id:[4 TO 5]", "sort", "sort_numerics asc" ),
- "//*[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.=5]",
- "//result/doc[2]/str[@name='id'][.=4]"
- );
- }
-
- /*
- * Setting caseLevel=true to create an additional case level between
- * secondary and tertiary
- */
- public void testIgnoreAccentsButNotCase() {
- assertQ("Collated TQ: ",
- req("fl", "id", "q", "sort_ignore_accents:resume", "sort", "id asc" ),
- "//*[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.=6]",
- "//result/doc[2]/str[@name='id'][.=9]"
- );
-
- assertQ("Collated TQ: ",
- req("fl", "id", "q", "sort_ignore_accents:Resume", "sort", "id asc" ),
- "//*[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.=7]",
- "//result/doc[2]/str[@name='id'][.=8]"
- );
- }
-
- /*
- * Setting caseFirst=upper to cause uppercase strings to sort
- * before lowercase ones.
- */
- public void testUpperCaseFirst() {
- assertQ("Collated sort: ",
- req("fl", "id", "q", "id:6 OR id:8", "sort", "sort_uppercase_first asc" ),
- "//*[@numFound='2']",
- "//result/doc[1]/str[@name='id'][.=8]",
- "//result/doc[2]/str[@name='id'][.=6]"
- );
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
deleted file mode 100644
index 851fea0..0000000
--- a/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.update.processor;
-
-import java.io.File;
-import java.util.Arrays;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.solr.common.SolrInputDocument;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends UpdateProcessorTestBase {
-
- @BeforeClass
- public static void beforeClass() throws Exception {
- File testHome = createTempDir().toFile();
- FileUtils.copyDirectory(getFile("analysis-extras/solr"), testHome);
- initCore("solrconfig-opennlp-extract.xml", "schema-opennlp-extract.xml", testHome.getAbsolutePath());
- }
-
- @Test
- public void testSimpleExtract() throws Exception {
- SolrInputDocument doc = processAdd("extract-single",
- doc(f("id", "1"),
- f("source1_s", "Take this to Mr. Flashman.")));
- assertEquals("dest_s should have stringValue", "Flashman", doc.getFieldValue("dest_s"));
- }
-
- @Test
- public void testMultiExtract() throws Exception {
- SolrInputDocument doc = processAdd("extract-multi",
- doc(f("id", "1"),
- f("source1_s", "Hello Flashman."),
- f("source2_s", "Calling Flashman.")));
-
- assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
- }
-
- @Test
- public void testArrayExtract() throws Exception {
- SolrInputDocument doc = processAdd("extract-array",
- doc(f("id", "1"),
- f("source1_s", "Currently we have Flashman. Not much else."),
- f("source2_s", "Flashman. Is. Not. There.")));
-
- assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
- }
-
- @Test
- public void testSelectorExtract() throws Exception {
- SolrInputDocument doc = processAdd("extract-selector",
- doc(f("id", "1"),
- f("source0_s", "Flashman. Or not."),
- f("source1_s", "Serendipitously, he was. I mean, Flashman. And yet."),
- f("source2_s", "Correct, Flashman.")));
-
- assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
- }
-
- public void testMultipleExtracts() throws Exception {
- // test example from the javadocs
- SolrInputDocument doc = processAdd("multiple-extract",
- doc(f("id", "1"),
- f("text", "From Flashman. To Panman."),
- f("title", "It's Captain Flashman.", "Privately, Flashman."),
- f("subtitle", "Ineluctably, Flashman."),
- f("corrolary_txt", "Forsooth thou bringeth Flashman."),
- f("notes_txt", "Yes Flashman."),
- f("summary", "Many aspire to be Flashman in London."),
- f("descs", "Courage, Flashman.", "Ain't he Flashman."),
- f("descriptions", "Flashman. Flashman. Flashman.")));
-
- assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("people_s"));
- assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("titular_people"));
- assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("key_desc_people"));
- assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("key_description_people"));
- assertEquals("Flashman", doc.getFieldValue("summary_person_s")); // {EntityType} field name interpolation
- assertEquals("London", doc.getFieldValue("summary_location_s")); // {EntityType} field name interpolation
- }
-
- public void testEquivalentExtraction() throws Exception {
- SolrInputDocument d;
-
- // regardless of chain, all of these checks should be equivalent
- for (String chain : Arrays.asList("extract-single", "extract-single-regex",
- "extract-multi", "extract-multi-regex",
- "extract-array", "extract-array-regex",
- "extract-selector", "extract-selector-regex")) {
-
- // simple extract
- d = processAdd(chain,
- doc(f("id", "1111"),
- f("source0_s", "Totally Flashman."), // not extracted
- f("source1_s", "One nation under Flashman.", "Good Flashman.")));
- assertNotNull(chain, d);
- assertEquals(chain, Arrays.asList("Flashman", "Flashman"), d.getFieldValues("dest_s"));
-
- // append to existing values
- d = processAdd(chain,
- doc(f("id", "1111"),
- field("dest_s", "orig1", "orig2"),
- f("source0_s", "Flashman. In totality."), // not extracted
- f("source1_s", "Two nations under Flashman.", "Meh Flashman.")));
- assertNotNull(chain, d);
- assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
- }
-
- // should be equivalent for any chain matching source1_s and source2_s (but not source0_s)
- for (String chain : Arrays.asList("extract-multi", "extract-multi-regex",
- "extract-array", "extract-array-regex",
- "extract-selector", "extract-selector-regex")) {
-
- // simple extract
- d = processAdd(chain,
- doc(f("id", "1111"),
- f("source0_s", "Not Flashman."), // not extracted
- f("source1_s", "Could have had a Flashman.", "Bad Flashman."),
- f("source2_s", "Indubitably Flashman.")));
- assertNotNull(chain, d);
- assertEquals(chain, Arrays.asList("Flashman", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
-
- // append to existing values
- d = processAdd(chain,
- doc(f("id", "1111"),
- field("dest_s", "orig1", "orig2"),
- f("source0_s", "Never Flashman."), // not extracted
- f("source1_s", "Seeking Flashman.", "Evil incarnate Flashman."),
- f("source2_s", "Perfunctorily Flashman.")));
- assertNotNull(chain, d);
- assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
- }
-
- // any chain that copies source1_s to dest_s should be equivalent for these assertions
- for (String chain : Arrays.asList("extract-single", "extract-single-regex",
- "extract-multi", "extract-multi-regex",
- "extract-array", "extract-array-regex",
- "extract-selector", "extract-selector-regex")) {
-
- // simple extract
- d = processAdd(chain,
- doc(f("id", "1111"),
- f("source1_s", "Always Flashman.", "Flashman. Noone else.")));
- assertNotNull(chain, d);
- assertEquals(chain, Arrays.asList("Flashman", "Flashman"), d.getFieldValues("dest_s"));
-
- // append to existing values
- d = processAdd(chain,
- doc(f("id", "1111"),
- field("dest_s", "orig1", "orig2"),
- f("source1_s", "Flashman. And, scene.", "Contemporary Flashman. Yeesh.")));
- assertNotNull(chain, d);
- assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
- }
- }
-
- public void testExtractFieldRegexReplaceAll() throws Exception {
- SolrInputDocument d = processAdd("extract-regex-replaceall",
- doc(f("id", "1111"),
- f("foo_x2_s", "Infrequently Flashman.", "In the words of Flashman."),
- f("foo_x3_x7_s", "Flashman. Whoa.")));
-
- assertNotNull(d);
- assertEquals(Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_y2_s"));
- assertEquals("Flashman", d.getFieldValue("foo_y3_y7_s"));
- }
-
- public void testExtractFieldRegexReplaceAllWithEntityType() throws Exception {
- SolrInputDocument d = processAdd("extract-regex-replaceall-with-entity-type",
- doc(f("id", "1111"),
- f("foo_x2_s", "Infrequently Flashman in London.", "In the words of Flashman in London."),
- f("foo_x3_x7_s", "Flashman in London. Whoa.")));
-
- assertNotNull(d);
- assertEquals(d.getFieldNames().toString(), Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_person_y2_s"));
- assertEquals(d.getFieldNames().toString(), Arrays.asList("London", "London"), d.getFieldValues("foo_location_y2_s"));
- assertEquals(d.getFieldNames().toString(),"Flashman", d.getFieldValue("foo_person_y3_person_y7_s"));
- assertEquals(d.getFieldNames().toString(),"London", d.getFieldValue("foo_location_y3_location_y7_s"));
- }
-}
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-ner.bin
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-ner.bin b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-ner.bin
new file mode 100644
index 0000000..b4d8cdc
Binary files /dev/null and b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-ner.bin differ
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-sent.bin
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-sent.bin b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-sent.bin
new file mode 100644
index 0000000..6e19e6b
Binary files /dev/null and b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-sent.bin differ
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin
new file mode 100644
index 0000000..796a744
Binary files /dev/null and b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin differ
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-folding-extra.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-folding-extra.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-folding-extra.xml
new file mode 100644
index 0000000..573ca53
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-folding-extra.xml
@@ -0,0 +1,52 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<schema name="test" version="1.0">
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" multiValued="false"/>
+
+
+ <fieldType name="text_icufolding" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.ICUFoldingFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_icunormalizer2" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.ICUNormalizer2FilterFactory" name="nfkc_cf" mode="compose"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="text_icutransform" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.ICUTransformFilterFactory" id="Cyrillic-Latin"/>
+ </analyzer>
+ </fieldType>
+
+
+ <field name="id" type="string" indexed="true" stored="true" required="true"/>
+ <field name="content_icufolding" type="text_icufolding" indexed="true" stored="true"/>
+ <field name="content_icunormalizer2" type="text_icunormalizer2" indexed="true" stored="true"/>
+ <field name="content_icutransform" type="text_icutransform" indexed="true" stored="true"/>
+
+
+ <uniqueKey>id</uniqueKey>
+
+</schema>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
new file mode 100644
index 0000000..63f7330
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for CollationField (docvalues) -->
+
+<schema name="test" version="1.0">
+
+ <fieldType name="string" class="solr.StrField" omitNorms="true" positionIncrementGap="0"/>
+
+ <!-- basic text field -->
+ <fieldType name="text" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="sort_ar_t" class="solr.ICUCollationField" locale="ar"/>
+ <fieldType name="sort_de_t" class="solr.ICUCollationField" locale="de" strength="primary"/>
+ <fieldType name="sort_tr_canon_t" class="solr.ICUCollationField" locale="tr" strength="primary"
+ decomposition="canonical"/>
+ <fieldType name="sort_da_t" class="solr.ICUCollationField" locale="da" strength="primary"/>
+ <fieldType name="sort_custom_t" class="solr.ICUCollationField" custom="customrules.dat" strength="primary"/>
+
+ <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
+ <field name="text" type="text" indexed="true" stored="false"/>
+ <field name="sort_ar" type="sort_ar_t" indexed="false" stored="false" multiValued="false" docValues="true"/>
+ <field name="sort_de" type="sort_de_t" indexed="false" stored="false" multiValued="false" docValues="true"/>
+ <field name="sort_tr_canon" type="sort_tr_canon_t" indexed="false" stored="false" multiValued="true"
+ docValues="true"/>
+ <field name="sort_da" type="sort_da_t" indexed="false" stored="false" multiValued="false" docValues="true"/>
+ <field name="sort_custom" type="sort_custom_t" indexed="false" stored="false" multiValued="true" docValues="true"/>
+
+ <uniqueKey>id</uniqueKey>
+
+ <!-- copy our text to some sort fields with different orders -->
+ <copyField source="text" dest="sort_ar"/>
+ <copyField source="text" dest="sort_de"/>
+ <copyField source="text" dest="sort_tr_canon"/>
+ <copyField source="text" dest="sort_da"/>
+ <copyField source="text" dest="sort_custom"/>
+</schema>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate.xml
new file mode 100644
index 0000000..9698013
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for CollationField -->
+
+<schema name="test" version="1.0">
+
+ <fieldType name="string" class="solr.StrField" omitNorms="true" positionIncrementGap="0"/>
+
+ <!-- basic text field -->
+ <fieldType name="text" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="sort_ar_t" class="solr.ICUCollationField" locale="ar"/>
+ <fieldType name="sort_de_t" class="solr.ICUCollationField" locale="de" strength="primary"/>
+ <fieldType name="sort_tr_canon_t" class="solr.ICUCollationField" locale="tr" strength="primary"
+ decomposition="canonical"/>
+ <fieldType name="sort_da_t" class="solr.ICUCollationField" locale="da" strength="primary"/>
+ <fieldType name="sort_custom_t" class="solr.ICUCollationField" custom="customrules.dat" strength="primary"/>
+
+ <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
+ <field name="text" type="text" indexed="true" stored="false"/>
+ <field name="sort_ar" type="sort_ar_t" indexed="true" stored="false" multiValued="false"/>
+ <field name="sort_de" type="sort_de_t" indexed="true" stored="false" multiValued="false"/>
+ <field name="sort_tr_canon" type="sort_tr_canon_t" indexed="true" stored="false" multiValued="false"/>
+ <field name="sort_da" type="sort_da_t" indexed="true" stored="false" multiValued="false"/>
+ <field name="sort_custom" type="sort_custom_t" indexed="true" stored="false" multiValued="false"/>
+
+
+ <uniqueKey>id</uniqueKey>
+
+ <!-- copy our text to some sort fields with different orders -->
+ <copyField source="text" dest="sort_ar"/>
+ <copyField source="text" dest="sort_de"/>
+ <copyField source="text" dest="sort_tr_canon"/>
+ <copyField source="text" dest="sort_da"/>
+ <copyField source="text" dest="sort_custom"/>
+</schema>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollateoptions.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollateoptions.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollateoptions.xml
new file mode 100644
index 0000000..59b8d25
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollateoptions.xml
@@ -0,0 +1,68 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for CollationField options -->
+
+<schema name="test" version="1.0">
+
+ <fieldType name="string" class="solr.StrField" omitNorms="true" positionIncrementGap="0"/>
+
+ <!-- basic text field -->
+ <fieldType name="text" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <!-- ignores punctuation and whitespace -->
+ <fieldType name="sort_ignore_punctuation_t" class="solr.ICUCollationField"
+ locale="en" strength="primary" alternate="shifted"/>
+ <!-- ignores only whitespace -->
+ <fieldType name="sort_ignore_space_t" class="solr.ICUCollationField"
+ locale="en" strength="primary" alternate="shifted" variableTop=" "/>
+ <!-- ignores only accents, but not case -->
+ <fieldType name="sort_ignore_accents_t" class="solr.ICUCollationField"
+ locale="en" strength="primary" caseLevel="true"/>
+ <!-- sorts numerics in numeric order -->
+ <fieldType name="sort_numerics_t" class="solr.ICUCollationField"
+ locale="en" numeric="true"/>
+ <!-- sorts uppercase before lowercase -->
+ <fieldType name="sort_uppercase_first_t" class="solr.ICUCollationField"
+ locale="en" strength="tertiary" caseFirst="upper"/>
+
+
+ <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
+ <field name="text" type="text" indexed="true" stored="false"/>
+ <field name="sort_ignore_punctuation" type="sort_ignore_punctuation_t" indexed="true" stored="false"
+ multiValued="false"/>
+ <field name="sort_ignore_space" type="sort_ignore_space_t" indexed="true" stored="false" multiValued="false"/>
+ <field name="sort_ignore_accents" type="sort_ignore_accents_t" indexed="true" stored="false" multiValued="false"/>
+ <field name="sort_numerics" type="sort_numerics_t" indexed="true" stored="false" multiValued="false"/>
+ <field name="sort_uppercase_first" type="sort_uppercase_first_t" indexed="true" stored="false" multiValued="false"/>
+
+
+ <uniqueKey>id</uniqueKey>
+
+ <!-- copy our text to some sort fields with different orders -->
+ <copyField source="text" dest="sort_ignore_punctuation"/>
+ <copyField source="text" dest="sort_ignore_space"/>
+ <copyField source="text" dest="sort_ignore_accents"/>
+ <copyField source="text" dest="sort_numerics"/>
+ <copyField source="text" dest="sort_uppercase_first"/>
+</schema>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml
new file mode 100644
index 0000000..fc13431
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml
@@ -0,0 +1,49 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="test-opennlp-extract" version="1.6">
+ <fieldType name="opennlp-en-tokenization" class="solr.TextField">
+ <analyzer>
+ <tokenizer class="solr.OpenNLPTokenizerFactory"
+ sentenceModel="en-test-sent.bin"
+ tokenizerModel="en-test-tokenizer.bin"/>
+ </analyzer>
+ </fieldType>
+
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
+
+ <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+ <analyzer>
+ <tokenizer class="solr.MockTokenizerFactory"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.PorterStemFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
+ <field name="text" type="text" indexed="true" stored="false"/>
+ <field name="subject" type="text" indexed="true" stored="true"/>
+ <field name="title" type="text" indexed="true" stored="true"/>
+ <field name="subtitle" type="text" indexed="true" stored="true"/>
+ <field name="descs" type="text" indexed="true" stored="true"/>
+ <field name="descriptions" type="text" indexed="true" stored="true"/>
+
+ <dynamicField name="*_txt" type="text" indexed="true" stored="true"/>
+ <dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/>
+ <dynamicField name="*_people" type="string" indexed="true" stored="true" multiValued="true"/>
+</schema>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml
new file mode 100644
index 0000000..90c52d7
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<config>
+ <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+ <indexConfig>
+ <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
+ </indexConfig>
+ <requestHandler name="/select" class="solr.SearchHandler"></requestHandler>
+ <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
+</config>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml
new file mode 100644
index 0000000..7fd793e
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml
@@ -0,0 +1,206 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<config>
+ <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+ <xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+ <requestHandler name="/select" class="solr.SearchHandler"></requestHandler>
+ <requestHandler name="/update" class="solr.UpdateRequestHandler" />
+ <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
+ <schemaFactory class="ClassicIndexSchemaFactory"/>
+
+ <updateRequestProcessorChain name="extract-single">
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <str name="source">source1_s</str>
+ <str name="dest">dest_s</str>
+ </processor>
+ </updateRequestProcessorChain>
+
+ <updateRequestProcessorChain name="extract-single-regex">
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <str name="source">source1_s</str>
+ <lst name="dest">
+ <str name="pattern">source\d(_s)</str>
+ <str name="replacement">dest$1</str>
+ </lst>
+ </processor>
+ </updateRequestProcessorChain>
+
+ <updateRequestProcessorChain name="extract-multi">
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <str name="source">source1_s</str>
+ <str name="source">source2_s</str>
+ <str name="dest">dest_s</str>
+ </processor>
+ </updateRequestProcessorChain>
+
+ <updateRequestProcessorChain name="extract-multi-regex">
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <str name="source">source1_s</str>
+ <str name="source">source2_s</str>
+ <lst name="dest">
+ <str name="pattern">source\d(_s)</str>
+ <str name="replacement">dest$1</str>
+ </lst>
+ </processor>
+ </updateRequestProcessorChain>
+
+ <updateRequestProcessorChain name="extract-array">
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <arr name="source">
+ <str>source1_s</str>
+ <str>source2_s</str>
+ </arr>
+ <str name="dest">dest_s</str>
+ </processor>
+ </updateRequestProcessorChain>
+
+ <updateRequestProcessorChain name="extract-array-regex">
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <arr name="source">
+ <str>source1_s</str>
+ <str>source2_s</str>
+ </arr>
+ <lst name="dest">
+ <str name="pattern">source\d(_s)</str>
+ <str name="replacement">dest$1</str>
+ </lst>
+ </processor>
+ </updateRequestProcessorChain>
+
+ <updateRequestProcessorChain name="extract-selector">
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <lst name="source">
+ <str name="fieldRegex">source\d_.*</str>
+ <lst name="exclude">
+ <str name="fieldRegex">source0_.*</str>
+ </lst>
+ </lst>
+ <str name="dest">dest_s</str>
+ </processor>
+ </updateRequestProcessorChain>
+
+ <updateRequestProcessorChain name="extract-selector-regex">
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <lst name="source">
+ <str name="fieldRegex">source\d_.*</str>
+ <lst name="exclude">
+ <str name="fieldRegex">source0_.*</str>
+ </lst>
+ </lst>
+ <lst name="dest">
+ <str name="pattern">source\d(_s)</str>
+ <str name="replacement">dest$1</str>
+ </lst>
+ </processor>
+ </updateRequestProcessorChain>
+
+ <updateRequestProcessorChain name="extract-regex-replaceall">
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <lst name="source">
+ <str name="fieldRegex">foo.*</str>
+ </lst>
+ <lst name="dest">
+ <!-- unbounded pattern that can be replaced multiple times in field name -->
+ <str name="pattern">x(\d)</str>
+ <str name="replacement">y$1</str>
+ </lst>
+ </processor>
+ </updateRequestProcessorChain>
+
+ <updateRequestProcessorChain name="extract-regex-replaceall-with-entity-type">
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <lst name="source">
+ <str name="fieldRegex">foo.*</str>
+ </lst>
+ <lst name="dest">
+ <!-- unbounded pattern that can be replaced multiple times in field name -->
+ <str name="pattern">x(\d)</str>
+ <str name="replacement">{EntityType}_y$1</str>
+ </lst>
+ </processor>
+ </updateRequestProcessorChain>
+
+ <!-- example used in OpenNLPExtractNamedEntitiesUpdateProcessorFactory javadocs -->
+ <updateRequestProcessorChain name="multiple-extract">
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <str name="source">text</str>
+ <str name="dest">people_s</str>
+ </processor>
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <arr name="source">
+ <str>title</str>
+ <str>subtitle</str>
+ </arr>
+ <str name="dest">titular_people</str>
+ </processor>
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <lst name="source">
+ <str name="fieldRegex">.*_txt$</str>
+ <lst name="exclude">
+ <str name="fieldName">notes_txt</str>
+ </lst>
+ </lst>
+ <str name="dest">people_s</str>
+ </processor>
+ <processor class="solr.processor.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <lst name="source">
+ <str name="fieldRegex">^desc(.*)s$</str>
+ </lst>
+ <lst name="dest">
+ <str name="pattern">^desc(.*)s$</str>
+ <str name="replacement">key_desc$1_people</str>
+ </lst>
+ </processor>
+ <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+ <str name="modelFile">en-test-ner.bin</str>
+ <str name="analyzerFieldType">opennlp-en-tokenization</str>
+ <str name="source">summary</str>
+ <str name="dest">summary_{EntityType}_s</str>
+ </processor>
+ </updateRequestProcessorChain>
+</config>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
new file mode 100644
index 0000000..23516b0
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+A solrconfig.xml snippet containing indexConfig settings for randomized testing.
+-->
+<indexConfig>
+ <!-- this sys property is not set by SolrTestCaseJ4 because we ideally want to use
+ the RandomMergePolicy in all tests - but some tests expect very specific
+ Merge behavior, so those tests can set it as needed.
+ -->
+ <mergePolicyFactory class="${solr.tests.mergePolicyFactory:org.apache.solr.util.RandomMergePolicyFactory}" />
+
+ <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
+
+ <maxBufferedDocs>${solr.tests.maxBufferedDocs}</maxBufferedDocs>
+ <ramBufferSizeMB>${solr.tests.ramBufferSizeMB}</ramBufferSizeMB>
+
+ <mergeScheduler class="${solr.tests.mergeScheduler}" />
+
+ <writeLockTimeout>1000</writeLockTimeout>
+ <commitLockTimeout>10000</commitLockTimeout>
+
+ <!-- this sys property is not set by SolrTestCaseJ4 because almost all tests should
+ use the single process lockType for speed - but tests that explicitly need
+ to vary the lockType can set it as needed.
+ -->
+ <lockType>${solr.tests.lockType:single}</lockType>
+
+ <infoStream>${solr.tests.infostream:false}</infoStream>
+
+</indexConfig>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/core/build.gradle
----------------------------------------------------------------------
diff --git a/solr/core/build.gradle b/solr/core/build.gradle
index e8cc58e..afe4834 100644
--- a/solr/core/build.gradle
+++ b/solr/core/build.gradle
@@ -133,4 +133,5 @@ dependencies {
testCompile project(':solr:test-framework')
testCompile project(path: ':lucene:backward-codecs', configuration: 'testOutput')
testCompile project(path: ':lucene:queryparser', configuration: 'testOutput')
+ testCompile project(':solr:contrib:analysis-extras')
}
\ No newline at end of file