You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by da...@apache.org on 2018/11/02 10:30:03 UTC
[5/7] lucene-solr:jira/gradle: Adding solr:analysis-extras module

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/java/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/java/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java b/solr/contrib/analysis-extras/src/test/java/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
new file mode 100644
index 0000000..851fea0
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/java/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
@@ -0,0 +1,195 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import java.io.File;
+import java.util.Arrays;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends UpdateProcessorTestBase {
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    File testHome = createTempDir().toFile();
+    FileUtils.copyDirectory(getFile("analysis-extras/solr"), testHome);
+    initCore("solrconfig-opennlp-extract.xml", "schema-opennlp-extract.xml", testHome.getAbsolutePath());
+  }
+
+  @Test
+  public void testSimpleExtract() throws Exception {
+    SolrInputDocument doc = processAdd("extract-single",
+        doc(f("id", "1"),
+            f("source1_s", "Take this to Mr. Flashman.")));
+    assertEquals("dest_s should have stringValue", "Flashman", doc.getFieldValue("dest_s"));
+  }
+
+  @Test
+  public void testMultiExtract() throws Exception {
+    SolrInputDocument doc = processAdd("extract-multi",
+        doc(f("id", "1"),
+            f("source1_s", "Hello Flashman."),
+            f("source2_s", "Calling Flashman.")));
+
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
+  }
+
+  @Test
+  public void testArrayExtract() throws Exception {
+    SolrInputDocument doc = processAdd("extract-array",
+        doc(f("id", "1"),
+            f("source1_s", "Currently we have Flashman. Not much else."),
+            f("source2_s", "Flashman. Is. Not. There.")));
+
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
+  }
+
+  @Test
+  public void testSelectorExtract() throws Exception {
+    SolrInputDocument doc = processAdd("extract-selector",
+        doc(f("id", "1"),
+            f("source0_s", "Flashman. Or not."),
+            f("source1_s", "Serendipitously, he was. I mean, Flashman. And yet."),
+            f("source2_s", "Correct, Flashman.")));
+
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
+  }
+
+  public void testMultipleExtracts() throws Exception {
+    // test example from the javadocs
+    SolrInputDocument doc = processAdd("multiple-extract",
+        doc(f("id", "1"),
+            f("text", "From Flashman. To Panman."),
+            f("title", "It's Captain Flashman.", "Privately, Flashman."),
+            f("subtitle", "Ineluctably, Flashman."),
+            f("corrolary_txt", "Forsooth thou bringeth Flashman."),
+            f("notes_txt", "Yes Flashman."),
+            f("summary", "Many aspire to be Flashman in London."),
+            f("descs", "Courage, Flashman.", "Ain't he Flashman."),
+            f("descriptions", "Flashman. Flashman. Flashman.")));
+
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("people_s"));
+    assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("titular_people"));
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("key_desc_people"));
+    assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("key_description_people"));
+    assertEquals("Flashman", doc.getFieldValue("summary_person_s")); // {EntityType} field name interpolation
+    assertEquals("London", doc.getFieldValue("summary_location_s")); // {EntityType} field name interpolation
+  }
+
+  public void testEquivalentExtraction() throws Exception {
+    SolrInputDocument d;
+
+    // regardless of chain, all of these checks should be equivalent
+    for (String chain : Arrays.asList("extract-single", "extract-single-regex",
+        "extract-multi", "extract-multi-regex",
+        "extract-array", "extract-array-regex",
+        "extract-selector", "extract-selector-regex")) {
+
+      // simple extract
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              f("source0_s", "Totally Flashman."), // not extracted
+              f("source1_s", "One nation under Flashman.", "Good Flashman.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("Flashman", "Flashman"), d.getFieldValues("dest_s"));
+
+      // append to existing values
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              field("dest_s", "orig1", "orig2"),
+              f("source0_s", "Flashman. In totality."), // not extracted
+              f("source1_s", "Two nations under Flashman.", "Meh Flashman.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+    }
+
+    // should be equivalent for any chain matching source1_s and source2_s (but not source0_s)
+    for (String chain : Arrays.asList("extract-multi", "extract-multi-regex",
+        "extract-array", "extract-array-regex",
+        "extract-selector", "extract-selector-regex")) {
+
+      // simple extract
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              f("source0_s", "Not Flashman."), // not extracted
+              f("source1_s", "Could have had a Flashman.", "Bad Flashman."),
+              f("source2_s", "Indubitably Flashman.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("Flashman", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+
+      // append to existing values
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              field("dest_s", "orig1", "orig2"),
+              f("source0_s", "Never Flashman."), // not extracted
+              f("source1_s", "Seeking Flashman.", "Evil incarnate Flashman."),
+              f("source2_s", "Perfunctorily Flashman.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+    }
+
+    // any chain that copies source1_s to dest_s should be equivalent for these assertions
+    for (String chain : Arrays.asList("extract-single", "extract-single-regex",
+        "extract-multi", "extract-multi-regex",
+        "extract-array", "extract-array-regex",
+        "extract-selector", "extract-selector-regex")) {
+
+      // simple extract
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              f("source1_s", "Always Flashman.", "Flashman. Noone else.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("Flashman", "Flashman"), d.getFieldValues("dest_s"));
+
+      // append to existing values
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              field("dest_s", "orig1", "orig2"),
+              f("source1_s", "Flashman.  And, scene.", "Contemporary Flashman. Yeesh.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+    }
+  }
+
+  public void testExtractFieldRegexReplaceAll() throws Exception {
+    SolrInputDocument d = processAdd("extract-regex-replaceall",
+        doc(f("id", "1111"),
+            f("foo_x2_s", "Infrequently Flashman.", "In the words of Flashman."),
+            f("foo_x3_x7_s", "Flashman. Whoa.")));
+
+    assertNotNull(d);
+    assertEquals(Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_y2_s"));
+    assertEquals("Flashman", d.getFieldValue("foo_y3_y7_s"));
+  }
+
+  public void testExtractFieldRegexReplaceAllWithEntityType() throws Exception {
+    SolrInputDocument d = processAdd("extract-regex-replaceall-with-entity-type",
+        doc(f("id", "1111"),
+            f("foo_x2_s", "Infrequently Flashman in London.", "In the words of Flashman in London."),
+            f("foo_x3_x7_s", "Flashman in London. Whoa.")));
+
+    assertNotNull(d);
+    assertEquals(d.getFieldNames().toString(), Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_person_y2_s"));
+    assertEquals(d.getFieldNames().toString(), Arrays.asList("London", "London"), d.getFieldValues("foo_location_y2_s"));
+    assertEquals(d.getFieldNames().toString(),"Flashman", d.getFieldValue("foo_person_y3_person_y7_s"));
+    assertEquals(d.getFieldNames().toString(),"London", d.getFieldValue("foo_location_y3_location_y7_s"));
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
deleted file mode 100644
index b2cdbc2..0000000
--- a/solr/contrib/analysis-extras/src/test/org/apache/solr/analysis/TestFoldingMultitermExtrasQuery.java
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.analysis;
-
-import java.io.File;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.solr.SolrTestCaseJ4;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-// See: https://issues.apache.org/jira/browse/SOLR-12028 Tests cannot remove files on Windows machines occasionally
-public class TestFoldingMultitermExtrasQuery extends SolrTestCaseJ4 {
-
-  public String getCoreName() {
-    return "basic";
-  }
-
-  @BeforeClass
-  public static void beforeTests() throws Exception {
-    File testHome = createTempDir().toFile();
-    FileUtils.copyDirectory(getFile("analysis-extras/solr"), testHome);
-    initCore("solrconfig-icucollate.xml","schema-folding-extra.xml", testHome.getAbsolutePath());
-
-    int idx = 1;
-    // ICUFoldingFilterFactory
-    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "BadMagicICUFolding"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "Ruß"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "ΜΆΪΟΣ"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "Μάϊος"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "résumé"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "re\u0301sume\u0301"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "ELİF"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icufolding", "eli\u0307f"));
-
-    // ICUNormalizer2FilterFactory
-
-    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "BadMagicICUFolding"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "Ruß"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "ΜΆΪΟΣ"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "Μάϊος"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "résumé"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "re\u0301sume\u0301"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "ELİF"));
-    assertU(adoc("id", Integer.toString(idx++), "content_icunormalizer2", "eli\u0307f"));
-
-    // ICUTransformFilterFactory
-    assertU(adoc("id", Integer.toString(idx++), "content_icutransform", "Российская"));
-
-    assertU(commit());
-  }
-
-  @Test
-  public void testICUFolding() {
-    assertQ(req("q", "content_icufolding:BadMagicicuFold*"), "//result[@numFound='1']");
-    assertQ(req("q", "content_icufolding:rU*"), "//result[@numFound='1']");
-    assertQ(req("q", "content_icufolding:Re*Me"), "//result[@numFound='2']");
-    assertQ(req("q", "content_icufolding:RE\u0301su*"), "//result[@numFound='2']");
-    assertQ(req("q", "content_icufolding:El*"), "//result[@numFound='2']");
-  }
-  @Test
-  public void testICUNormalizer2() {
-    assertQ(req("q", "content_icunormalizer2:BadMagicicuFold*"), "//result[@numFound='1']");
-    assertQ(req("q", "content_icunormalizer2:RU*"), "//result[@numFound='1']");
-    assertQ(req("q", "content_icunormalizer2:Μάϊ*"), "//result[@numFound='2']");
-    assertQ(req("q", "content_icunormalizer2:re\u0301Su*"), "//result[@numFound='2']");
-    assertQ(req("q", "content_icunormalizer2:eL*"), "//result[@numFound='2']");
-  }
-  
-  public void testICUTransform() {
-    assertQ(req("q", "content_icutransform:Росс*"), "//result[@numFound='1']");
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java
deleted file mode 100644
index f164080..0000000
--- a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationField.java
+++ /dev/null
@@ -1,192 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.schema;
-
-import java.io.File;
-import java.io.FileOutputStream;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.IOUtils;
-import org.apache.lucene.analysis.util.FilesystemResourceLoader;
-import org.apache.lucene.analysis.util.ResourceLoader;
-import org.apache.lucene.analysis.util.StringMockResourceLoader;
-import org.apache.solr.SolrTestCaseJ4;
-import org.junit.BeforeClass;
-
-import com.ibm.icu.text.Collator;
-import com.ibm.icu.text.RuleBasedCollator;
-import com.ibm.icu.util.ULocale;
-
-/**
- * Tests {@link ICUCollationField} with TermQueries, RangeQueries, and sort order.
- */
-public class TestICUCollationField extends SolrTestCaseJ4 {
-  
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    String home = setupSolrHome();
-    initCore("solrconfig.xml","schema.xml", home);
-    // add some docs
-    assertU(adoc("id", "1", "text", "\u0633\u0627\u0628"));
-    assertU(adoc("id", "2", "text", "I WİLL USE TURKİSH CASING"));
-    assertU(adoc("id", "3", "text", "ı will use turkish casıng"));
-    assertU(adoc("id", "4", "text", "Töne"));
-    assertU(adoc("id", "5", "text", "I W\u0049\u0307LL USE TURKİSH CASING"));
-    assertU(adoc("id", "6", "text", "Ｔｅｓｔｉｎｇ"));
-    assertU(adoc("id", "7", "text", "Tone"));
-    assertU(adoc("id", "8", "text", "Testing"));
-    assertU(adoc("id", "9", "text", "testing"));
-    assertU(adoc("id", "10", "text", "toene"));
-    assertU(adoc("id", "11", "text", "Tzne"));
-    assertU(adoc("id", "12", "text", "\u0698\u0698"));
-    assertU(commit());
-  }
-  
-  /**
-   * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
-   * These are largish files, and jvm-specific (as our documentation says, you should always
-   * look out for jvm differences with collation).
-   * So it's preferable to create this file on-the-fly.
-   */
-  public static String setupSolrHome() throws Exception {
-    String tmpFile = createTempDir().toFile().getAbsolutePath();
-    // make data and conf dirs
-    new File(tmpFile  + "/collection1", "data").mkdirs();
-    File confDir = new File(tmpFile + "/collection1", "conf");
-    confDir.mkdirs();
-    
-    // copy over configuration files
-    FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
-    FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate.xml"), new File(confDir, "schema.xml"));
-    
-    // generate custom collation rules (DIN 5007-2), saving to customrules.dat
-    RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));
-
-    String DIN5007_2_tailorings =
-      "& ae , a\u0308 & AE , A\u0308"+
-      "& oe , o\u0308 & OE , O\u0308"+
-      "& ue , u\u0308 & UE , u\u0308";
-
-    RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
-    String tailoredRules = tailoredCollator.getRules();
-    final String osFileName = "customrules.dat";
-    final FileOutputStream os = new FileOutputStream(new File(confDir, osFileName));
-    IOUtils.write(tailoredRules, os, "UTF-8");
-    os.close();
-
-    final ResourceLoader loader;
-    if (random().nextBoolean()) {
-      loader = new StringMockResourceLoader(tailoredRules);
-    } else {
-      loader = new FilesystemResourceLoader(confDir.toPath());
-    }
-    final Collator readCollator = ICUCollationField.createFromRules(osFileName, loader);
-    assertEquals(tailoredCollator, readCollator);
-
-    return tmpFile;
-  }
-
-  /** 
-   * Test termquery with german DIN 5007-1 primary strength.
-   * In this case, ö is equivalent to o (but not oe) 
-   */
-  public void testBasicTermQuery() {
-    assertQ("Collated TQ: ",
-       req("fl", "id", "q", "sort_de:tone", "sort", "id asc" ),
-              "//*[@numFound='2']",
-              "//result/doc[1]/str[@name='id'][.=4]",
-              "//result/doc[2]/str[@name='id'][.=7]"
-    );
-  }
-  
-  /** 
-   * Test rangequery again with the DIN 5007-1 collator.
-   * We do a range query of tone .. tp, in binary order this
-   * would retrieve nothing due to case and accent differences.
-   */
-  public void testBasicRangeQuery() {
-    assertQ("Collated RangeQ: ",
-        req("fl", "id", "q", "sort_de:[tone TO tp]", "sort", "id asc" ),
-               "//*[@numFound='2']",
-               "//result/doc[1]/str[@name='id'][.=4]",
-               "//result/doc[2]/str[@name='id'][.=7]"
-     );
-  }
-  
-  /** 
-   * Test sort with a danish collator. ö is ordered after z
-   */
-  public void testBasicSort() {
-    assertQ("Collated Sort: ",
-        req("fl", "id", "q", "sort_da:[tz TO töz]", "sort", "sort_da asc" ),
-               "//*[@numFound='2']",
-               "//result/doc[1]/str[@name='id'][.=11]",
-               "//result/doc[2]/str[@name='id'][.=4]"
-     );
-  }
-  
-  /** 
-   * Test sort with an arabic collator. U+0633 is ordered after U+0698.
-   * With a binary collator, the range would also return nothing.
-   */
-  public void testArabicSort() {
-    assertQ("Collated Sort: ",
-        req("fl", "id", "q", "sort_ar:[\u0698 TO \u0633\u0633]", "sort", "sort_ar asc" ),
-               "//*[@numFound='2']",
-               "//result/doc[1]/str[@name='id'][.=12]",
-               "//result/doc[2]/str[@name='id'][.=1]"
-     );
-  }
-
-  /** 
-   * Test rangequery again with an Arabic collator.
-   * Binary order would normally order U+0633 in this range.
-   */
-  public void testNegativeRangeQuery() {
-    assertQ("Collated RangeQ: ",
-        req("fl", "id", "q", "sort_ar:[\u062F TO \u0698]", "sort", "id asc" ),
-               "//*[@numFound='0']"
-     );
-  }
-  /**
-   * Test canonical decomposition with turkish primary strength. 
-   * With this sort order, İ is the uppercase form of i, and I is the uppercase form of ı.
-   * We index a decomposed form of İ.
-   */
-  public void testCanonicalDecomposition() {
-    assertQ("Collated TQ: ",
-        req("fl", "id", "q", "sort_tr_canon:\"I Will Use Turkish Casıng\"", "sort", "id asc" ),
-               "//*[@numFound='3']",
-               "//result/doc[1]/str[@name='id'][.=2]",
-               "//result/doc[2]/str[@name='id'][.=3]",
-               "//result/doc[3]/str[@name='id'][.=5]"
-     );
-  }
-  
-  /** 
-   * Test termquery with custom collator (DIN 5007-2).
-   * In this case, ö is equivalent to oe (but not o) 
-   */
-  public void testCustomCollation() {
-    assertQ("Collated TQ: ",
-        req("fl", "id", "q", "sort_custom:toene"),
-               "//*[@numFound='2']",
-               "//result/doc/str[@name='id'][.=4]",
-               "//result/doc/str[@name='id'][.=10]"
-     );
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java
deleted file mode 100644
index 57b403a..0000000
--- a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldDocValues.java
+++ /dev/null
@@ -1,180 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.schema;
-
-import java.io.File;
-import java.io.FileOutputStream;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.commons.io.IOUtils;
-import org.apache.solr.SolrTestCaseJ4;
-import org.junit.BeforeClass;
-
-import com.ibm.icu.text.Collator;
-import com.ibm.icu.text.RuleBasedCollator;
-import com.ibm.icu.util.ULocale;
-
-/**
- * Tests {@link ICUCollationField} with docValues.
- */
-public class TestICUCollationFieldDocValues extends SolrTestCaseJ4 {
-  
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    String home = setupSolrHome();
-    initCore("solrconfig.xml","schema.xml", home);
-    // add some docs
-    assertU(adoc("id", "1", "text", "\u0633\u0627\u0628"));
-    assertU(adoc("id", "2", "text", "I WİLL USE TURKİSH CASING"));
-    assertU(adoc("id", "3", "text", "ı will use turkish casıng"));
-    assertU(adoc("id", "4", "text", "Töne"));
-    assertU(adoc("id", "5", "text", "I W\u0049\u0307LL USE TURKİSH CASING"));
-    assertU(adoc("id", "6", "text", "Ｔｅｓｔｉｎｇ"));
-    assertU(adoc("id", "7", "text", "Tone"));
-    assertU(adoc("id", "8", "text", "Testing"));
-    assertU(adoc("id", "9", "text", "testing"));
-    assertU(adoc("id", "10", "text", "toene"));
-    assertU(adoc("id", "11", "text", "Tzne"));
-    assertU(adoc("id", "12", "text", "\u0698\u0698"));
-    assertU(commit());
-  }
-  
-  /**
-   * Ugly: but what to do? We want to test custom sort, which reads rules in as a resource.
-   * These are largish files, and jvm-specific (as our documentation says, you should always
-   * look out for jvm differences with collation).
-   * So it's preferable to create this file on-the-fly.
-   */
-  public static String setupSolrHome() throws Exception {
-    File tmpFile = createTempDir().toFile();
-    
-    // make data and conf dirs
-    new File(tmpFile + "/collection1", "data").mkdirs();
-    File confDir = new File(tmpFile + "/collection1", "conf");
-    confDir.mkdirs();
-    
-    // copy over configuration files
-    FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml"), new File(confDir, "solrconfig.xml"));
-    FileUtils.copyFile(getFile("analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml"), new File(confDir, "schema.xml"));
-    
-    // generate custom collation rules (DIN 5007-2), saving to customrules.dat
-    RuleBasedCollator baseCollator = (RuleBasedCollator) Collator.getInstance(new ULocale("de", "DE"));
-
-    String DIN5007_2_tailorings =
-      "& ae , a\u0308 & AE , A\u0308"+
-      "& oe , o\u0308 & OE , O\u0308"+
-      "& ue , u\u0308 & UE , u\u0308";
-
-    RuleBasedCollator tailoredCollator = new RuleBasedCollator(baseCollator.getRules() + DIN5007_2_tailorings);
-    String tailoredRules = tailoredCollator.getRules();
-    FileOutputStream os = new FileOutputStream(new File(confDir, "customrules.dat"));
-    IOUtils.write(tailoredRules, os, "UTF-8");
-    os.close();
-
-    return tmpFile.getAbsolutePath();
-  }
-
-  /** 
-   * Test termquery with german DIN 5007-1 primary strength.
-   * In this case, ö is equivalent to o (but not oe) 
-   */
-  public void testBasicTermQuery() {
-    assertQ("Collated TQ: ",
-       req("fl", "id", "q", "sort_de:tone", "sort", "id asc" ),
-              "//*[@numFound='2']",
-              "//result/doc[1]/str[@name='id'][.=4]",
-              "//result/doc[2]/str[@name='id'][.=7]"
-    );
-  }
-  
-  /** 
-   * Test rangequery again with the DIN 5007-1 collator.
-   * We do a range query of tone .. tp, in binary order this
-   * would retrieve nothing due to case and accent differences.
-   */
-  public void testBasicRangeQuery() {
-    assertQ("Collated RangeQ: ",
-        req("fl", "id", "q", "sort_de:[tone TO tp]", "sort", "id asc" ),
-               "//*[@numFound='2']",
-               "//result/doc[1]/str[@name='id'][.=4]",
-               "//result/doc[2]/str[@name='id'][.=7]"
-     );
-  }
-  
-  /** 
-   * Test sort with a danish collator. ö is ordered after z
-   */
-  public void testBasicSort() {
-    assertQ("Collated Sort: ",
-        req("fl", "id", "q", "sort_da:[tz TO töz]", "sort", "sort_da asc" ),
-               "//*[@numFound='2']",
-               "//result/doc[1]/str[@name='id'][.=11]",
-               "//result/doc[2]/str[@name='id'][.=4]"
-     );
-  }
-  
-  /** 
-   * Test sort with an arabic collator. U+0633 is ordered after U+0698.
-   * With a binary collator, the range would also return nothing.
-   */
-  public void testArabicSort() {
-    assertQ("Collated Sort: ",
-        req("fl", "id", "q", "sort_ar:[\u0698 TO \u0633\u0633]", "sort", "sort_ar asc" ),
-               "//*[@numFound='2']",
-               "//result/doc[1]/str[@name='id'][.=12]",
-               "//result/doc[2]/str[@name='id'][.=1]"
-     );
-  }
-
-  /** 
-   * Test rangequery again with an Arabic collator.
-   * Binary order would normally order U+0633 in this range.
-   */
-  public void testNegativeRangeQuery() {
-    assertQ("Collated RangeQ: ",
-        req("fl", "id", "q", "sort_ar:[\u062F TO \u0698]", "sort", "id asc" ),
-               "//*[@numFound='0']"
-     );
-  }
-  /**
-   * Test canonical decomposition with turkish primary strength. 
-   * With this sort order, İ is the uppercase form of i, and I is the uppercase form of ı.
-   * We index a decomposed form of İ.
-   */
-  public void testCanonicalDecomposition() {
-    assertQ("Collated TQ: ",
-        req("fl", "id", "q", "sort_tr_canon:\"I Will Use Turkish Casıng\"", "sort", "id asc" ),
-               "//*[@numFound='3']",
-               "//result/doc[1]/str[@name='id'][.=2]",
-               "//result/doc[2]/str[@name='id'][.=3]",
-               "//result/doc[3]/str[@name='id'][.=5]"
-     );
-  }
-  
-  /** 
-   * Test termquery with custom collator (DIN 5007-2).
-   * In this case, ö is equivalent to oe (but not o) 
-   */
-  public void testCustomCollation() {
-    assertQ("Collated TQ: ",
-        req("fl", "id", "q", "sort_custom:toene"),
-               "//*[@numFound='2']",
-               "//result/doc/str[@name='id'][.=4]",
-               "//result/doc/str[@name='id'][.=10]"
-     );
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldOptions.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldOptions.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldOptions.java
deleted file mode 100644
index 0b198b7..0000000
--- a/solr/contrib/analysis-extras/src/test/org/apache/solr/schema/TestICUCollationFieldOptions.java
+++ /dev/null
@@ -1,119 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.schema;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.solr.SolrTestCaseJ4;
-import org.junit.BeforeClass;
-
-import java.io.File;
-
-/**
- * Tests expert options of {@link ICUCollationField}.
- */
-public class TestICUCollationFieldOptions extends SolrTestCaseJ4 {
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    File testHome = createTempDir().toFile();
-    FileUtils.copyDirectory(getFile("analysis-extras/solr"), testHome);
-    initCore("solrconfig-icucollate.xml","schema-icucollateoptions.xml", testHome.getAbsolutePath());
-    // add some docs
-    assertU(adoc("id", "1", "text", "foo-bar"));
-    assertU(adoc("id", "2", "text", "foo bar"));
-    assertU(adoc("id", "3", "text", "foobar"));
-    assertU(adoc("id", "4", "text", "foobar-10"));
-    assertU(adoc("id", "5", "text", "foobar-9"));
-    assertU(adoc("id", "6", "text", "resume"));
-    assertU(adoc("id", "7", "text", "Résumé"));
-    assertU(adoc("id", "8", "text", "Resume"));
-    assertU(adoc("id", "9", "text", "résumé"));
-    assertU(commit());
-  }
-  
-  /*
-   * Setting alternate=shifted to shift whitespace, punctuation and symbols
-   * to quaternary level 
-   */
-  public void testIgnorePunctuation() { 
-    assertQ("Collated TQ: ",
-        req("fl", "id", "q", "sort_ignore_punctuation:foobar", "sort", "id asc" ),
-               "//*[@numFound='3']",
-               "//result/doc[1]/str[@name='id'][.=1]",
-               "//result/doc[2]/str[@name='id'][.=2]",
-               "//result/doc[3]/str[@name='id'][.=3]"
-     );
-  }
-  
-  /*
-   * Setting alternate=shifted and variableTop to shift whitespace, but not 
-   * punctuation or symbols, to quaternary level 
-   */
-  public void testIgnoreWhitespace() {
-    assertQ("Collated TQ: ",
-        req("fl", "id", "q", "sort_ignore_space:\"foo bar\"", "sort", "id asc" ),
-               "//*[@numFound='2']",
-               "//result/doc[1]/str[@name='id'][.=2]",
-               "//result/doc[2]/str[@name='id'][.=3]"
-     );
-  }
-  
-  /*
-   * Setting numeric to encode digits with numeric value, so that
-   * foobar-9 sorts before foobar-10
-   */
-  public void testNumerics() {
-    assertQ("Collated sort: ",
-        req("fl", "id", "q", "id:[4 TO 5]", "sort", "sort_numerics asc" ),
-               "//*[@numFound='2']",
-               "//result/doc[1]/str[@name='id'][.=5]",
-               "//result/doc[2]/str[@name='id'][.=4]"
-     );
-  }
-  
-  /*
-   * Setting caseLevel=true to create an additional case level between
-   * secondary and tertiary
-   */
-  public void testIgnoreAccentsButNotCase() {
-    assertQ("Collated TQ: ",
-        req("fl", "id", "q", "sort_ignore_accents:resume", "sort", "id asc" ),
-               "//*[@numFound='2']",
-               "//result/doc[1]/str[@name='id'][.=6]",
-               "//result/doc[2]/str[@name='id'][.=9]"
-     );
-    
-    assertQ("Collated TQ: ",
-        req("fl", "id", "q", "sort_ignore_accents:Resume", "sort", "id asc" ),
-               "//*[@numFound='2']",
-               "//result/doc[1]/str[@name='id'][.=7]",
-               "//result/doc[2]/str[@name='id'][.=8]"
-     );
-  }
-  
-  /*
-   * Setting caseFirst=upper to cause uppercase strings to sort
-   * before lowercase ones.
-   */
-  public void testUpperCaseFirst() {
-    assertQ("Collated sort: ",
-        req("fl", "id", "q", "id:6 OR id:8", "sort", "sort_uppercase_first asc" ),
-               "//*[@numFound='2']",
-               "//result/doc[1]/str[@name='id'][.=8]",
-               "//result/doc[2]/str[@name='id'][.=6]"
-     );
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
deleted file mode 100644
index 851fea0..0000000
--- a/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
+++ /dev/null
@@ -1,195 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.solr.update.processor;
-
-import java.io.File;
-import java.util.Arrays;
-
-import org.apache.commons.io.FileUtils;
-import org.apache.solr.common.SolrInputDocument;
-import org.junit.BeforeClass;
-import org.junit.Test;
-
-public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends UpdateProcessorTestBase {
-
-  @BeforeClass
-  public static void beforeClass() throws Exception {
-    File testHome = createTempDir().toFile();
-    FileUtils.copyDirectory(getFile("analysis-extras/solr"), testHome);
-    initCore("solrconfig-opennlp-extract.xml", "schema-opennlp-extract.xml", testHome.getAbsolutePath());
-  }
-
-  @Test
-  public void testSimpleExtract() throws Exception {
-    SolrInputDocument doc = processAdd("extract-single",
-        doc(f("id", "1"),
-            f("source1_s", "Take this to Mr. Flashman.")));
-    assertEquals("dest_s should have stringValue", "Flashman", doc.getFieldValue("dest_s"));
-  }
-
-  @Test
-  public void testMultiExtract() throws Exception {
-    SolrInputDocument doc = processAdd("extract-multi",
-        doc(f("id", "1"),
-            f("source1_s", "Hello Flashman."),
-            f("source2_s", "Calling Flashman.")));
-
-    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
-  }
-
-  @Test
-  public void testArrayExtract() throws Exception {
-    SolrInputDocument doc = processAdd("extract-array",
-        doc(f("id", "1"),
-            f("source1_s", "Currently we have Flashman. Not much else."),
-            f("source2_s", "Flashman. Is. Not. There.")));
-
-    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
-  }
-
-  @Test
-  public void testSelectorExtract() throws Exception {
-    SolrInputDocument doc = processAdd("extract-selector",
-        doc(f("id", "1"),
-            f("source0_s", "Flashman. Or not."),
-            f("source1_s", "Serendipitously, he was. I mean, Flashman. And yet."),
-            f("source2_s", "Correct, Flashman.")));
-
-    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
-  }
-
-  public void testMultipleExtracts() throws Exception {
-    // test example from the javadocs
-    SolrInputDocument doc = processAdd("multiple-extract",
-        doc(f("id", "1"),
-            f("text", "From Flashman. To Panman."),
-            f("title", "It's Captain Flashman.", "Privately, Flashman."),
-            f("subtitle", "Ineluctably, Flashman."),
-            f("corrolary_txt", "Forsooth thou bringeth Flashman."),
-            f("notes_txt", "Yes Flashman."),
-            f("summary", "Many aspire to be Flashman in London."),
-            f("descs", "Courage, Flashman.", "Ain't he Flashman."),
-            f("descriptions", "Flashman. Flashman. Flashman.")));
-
-    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("people_s"));
-    assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("titular_people"));
-    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("key_desc_people"));
-    assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("key_description_people"));
-    assertEquals("Flashman", doc.getFieldValue("summary_person_s")); // {EntityType} field name interpolation
-    assertEquals("London", doc.getFieldValue("summary_location_s")); // {EntityType} field name interpolation
-  }
-
-  public void testEquivalentExtraction() throws Exception {
-    SolrInputDocument d;
-
-    // regardless of chain, all of these checks should be equivalent
-    for (String chain : Arrays.asList("extract-single", "extract-single-regex",
-        "extract-multi", "extract-multi-regex",
-        "extract-array", "extract-array-regex",
-        "extract-selector", "extract-selector-regex")) {
-
-      // simple extract
-      d = processAdd(chain,
-          doc(f("id", "1111"),
-              f("source0_s", "Totally Flashman."), // not extracted
-              f("source1_s", "One nation under Flashman.", "Good Flashman.")));
-      assertNotNull(chain, d);
-      assertEquals(chain, Arrays.asList("Flashman", "Flashman"), d.getFieldValues("dest_s"));
-
-      // append to existing values
-      d = processAdd(chain,
-          doc(f("id", "1111"),
-              field("dest_s", "orig1", "orig2"),
-              f("source0_s", "Flashman. In totality."), // not extracted
-              f("source1_s", "Two nations under Flashman.", "Meh Flashman.")));
-      assertNotNull(chain, d);
-      assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
-    }
-
-    // should be equivalent for any chain matching source1_s and source2_s (but not source0_s)
-    for (String chain : Arrays.asList("extract-multi", "extract-multi-regex",
-        "extract-array", "extract-array-regex",
-        "extract-selector", "extract-selector-regex")) {
-
-      // simple extract
-      d = processAdd(chain,
-          doc(f("id", "1111"),
-              f("source0_s", "Not Flashman."), // not extracted
-              f("source1_s", "Could have had a Flashman.", "Bad Flashman."),
-              f("source2_s", "Indubitably Flashman.")));
-      assertNotNull(chain, d);
-      assertEquals(chain, Arrays.asList("Flashman", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
-
-      // append to existing values
-      d = processAdd(chain,
-          doc(f("id", "1111"),
-              field("dest_s", "orig1", "orig2"),
-              f("source0_s", "Never Flashman."), // not extracted
-              f("source1_s", "Seeking Flashman.", "Evil incarnate Flashman."),
-              f("source2_s", "Perfunctorily Flashman.")));
-      assertNotNull(chain, d);
-      assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
-    }
-
-    // any chain that copies source1_s to dest_s should be equivalent for these assertions
-    for (String chain : Arrays.asList("extract-single", "extract-single-regex",
-        "extract-multi", "extract-multi-regex",
-        "extract-array", "extract-array-regex",
-        "extract-selector", "extract-selector-regex")) {
-
-      // simple extract
-      d = processAdd(chain,
-          doc(f("id", "1111"),
-              f("source1_s", "Always Flashman.", "Flashman. Noone else.")));
-      assertNotNull(chain, d);
-      assertEquals(chain, Arrays.asList("Flashman", "Flashman"), d.getFieldValues("dest_s"));
-
-      // append to existing values
-      d = processAdd(chain,
-          doc(f("id", "1111"),
-              field("dest_s", "orig1", "orig2"),
-              f("source1_s", "Flashman.  And, scene.", "Contemporary Flashman. Yeesh.")));
-      assertNotNull(chain, d);
-      assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
-    }
-  }
-
-  public void testExtractFieldRegexReplaceAll() throws Exception {
-    SolrInputDocument d = processAdd("extract-regex-replaceall",
-        doc(f("id", "1111"),
-            f("foo_x2_s", "Infrequently Flashman.", "In the words of Flashman."),
-            f("foo_x3_x7_s", "Flashman. Whoa.")));
-
-    assertNotNull(d);
-    assertEquals(Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_y2_s"));
-    assertEquals("Flashman", d.getFieldValue("foo_y3_y7_s"));
-  }
-
-  public void testExtractFieldRegexReplaceAllWithEntityType() throws Exception {
-    SolrInputDocument d = processAdd("extract-regex-replaceall-with-entity-type",
-        doc(f("id", "1111"),
-            f("foo_x2_s", "Infrequently Flashman in London.", "In the words of Flashman in London."),
-            f("foo_x3_x7_s", "Flashman in London. Whoa.")));
-
-    assertNotNull(d);
-    assertEquals(d.getFieldNames().toString(), Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_person_y2_s"));
-    assertEquals(d.getFieldNames().toString(), Arrays.asList("London", "London"), d.getFieldValues("foo_location_y2_s"));
-    assertEquals(d.getFieldNames().toString(),"Flashman", d.getFieldValue("foo_person_y3_person_y7_s"));
-    assertEquals(d.getFieldNames().toString(),"London", d.getFieldValue("foo_location_y3_location_y7_s"));
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-ner.bin
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-ner.bin b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-ner.bin
new file mode 100644
index 0000000..b4d8cdc
Binary files /dev/null and b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-ner.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-sent.bin
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-sent.bin b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-sent.bin
new file mode 100644
index 0000000..6e19e6b
Binary files /dev/null and b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-sent.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin
new file mode 100644
index 0000000..796a744
Binary files /dev/null and b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-folding-extra.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-folding-extra.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-folding-extra.xml
new file mode 100644
index 0000000..573ca53
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-folding-extra.xml
@@ -0,0 +1,52 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one or more
+  contributor license agreements.  See the NOTICE file distributed with
+  this work for additional information regarding copyright ownership.
+  The ASF licenses this file to You under the Apache License, Version 2.0
+  (the "License"); you may not use this file except in compliance with
+  the License.  You may obtain a copy of the License at
+
+      http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing, software
+  distributed under the License is distributed on an "AS IS" BASIS,
+  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+  See the License for the specific language governing permissions and
+  limitations under the License.
+  -->
+
+<schema name="test" version="1.0">
+  <fieldType name="string" class="solr.StrField" sortMissingLast="true" multiValued="false"/>
+
+
+  <fieldType name="text_icufolding" class="solr.TextField">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      <filter class="solr.ICUFoldingFilterFactory"/>
+    </analyzer>
+  </fieldType>
+
+  <fieldType name="text_icunormalizer2" class="solr.TextField">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      <filter class="solr.ICUNormalizer2FilterFactory" name="nfkc_cf" mode="compose"/>
+    </analyzer>
+  </fieldType>
+
+  <fieldType name="text_icutransform" class="solr.TextField">
+    <analyzer>
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      <filter class="solr.ICUTransformFilterFactory" id="Cyrillic-Latin"/>
+    </analyzer>
+  </fieldType>
+
+
+  <field name="id" type="string" indexed="true" stored="true" required="true"/>
+  <field name="content_icufolding" type="text_icufolding" indexed="true" stored="true"/>
+  <field name="content_icunormalizer2" type="text_icunormalizer2" indexed="true" stored="true"/>
+  <field name="content_icutransform" type="text_icutransform" indexed="true" stored="true"/>
+
+
+  <uniqueKey>id</uniqueKey>
+
+</schema>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
new file mode 100644
index 0000000..63f7330
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate-dv.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for CollationField (docvalues) -->
+
+<schema name="test" version="1.0">
+
+  <fieldType name="string" class="solr.StrField" omitNorms="true" positionIncrementGap="0"/>
+
+  <!-- basic text field -->
+  <fieldType name="text" class="solr.TextField">
+    <analyzer>
+      <tokenizer class="solr.StandardTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+    </analyzer>
+  </fieldType>
+
+  <fieldType name="sort_ar_t" class="solr.ICUCollationField" locale="ar"/>
+  <fieldType name="sort_de_t" class="solr.ICUCollationField" locale="de" strength="primary"/>
+  <fieldType name="sort_tr_canon_t" class="solr.ICUCollationField" locale="tr" strength="primary"
+             decomposition="canonical"/>
+  <fieldType name="sort_da_t" class="solr.ICUCollationField" locale="da" strength="primary"/>
+  <fieldType name="sort_custom_t" class="solr.ICUCollationField" custom="customrules.dat" strength="primary"/>
+
+  <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
+  <field name="text" type="text" indexed="true" stored="false"/>
+  <field name="sort_ar" type="sort_ar_t" indexed="false" stored="false" multiValued="false" docValues="true"/>
+  <field name="sort_de" type="sort_de_t" indexed="false" stored="false" multiValued="false" docValues="true"/>
+  <field name="sort_tr_canon" type="sort_tr_canon_t" indexed="false" stored="false" multiValued="true"
+         docValues="true"/>
+  <field name="sort_da" type="sort_da_t" indexed="false" stored="false" multiValued="false" docValues="true"/>
+  <field name="sort_custom" type="sort_custom_t" indexed="false" stored="false" multiValued="true" docValues="true"/>
+
+  <uniqueKey>id</uniqueKey>
+
+  <!-- copy our text to some sort fields with different orders -->
+  <copyField source="text" dest="sort_ar"/>
+  <copyField source="text" dest="sort_de"/>
+  <copyField source="text" dest="sort_tr_canon"/>
+  <copyField source="text" dest="sort_da"/>
+  <copyField source="text" dest="sort_custom"/>
+</schema>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate.xml
new file mode 100644
index 0000000..9698013
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollate.xml
@@ -0,0 +1,57 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for CollationField -->
+
+<schema name="test" version="1.0">
+
+  <fieldType name="string" class="solr.StrField" omitNorms="true" positionIncrementGap="0"/>
+
+  <!-- basic text field -->
+  <fieldType name="text" class="solr.TextField">
+    <analyzer>
+      <tokenizer class="solr.StandardTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+    </analyzer>
+  </fieldType>
+
+  <fieldType name="sort_ar_t" class="solr.ICUCollationField" locale="ar"/>
+  <fieldType name="sort_de_t" class="solr.ICUCollationField" locale="de" strength="primary"/>
+  <fieldType name="sort_tr_canon_t" class="solr.ICUCollationField" locale="tr" strength="primary"
+             decomposition="canonical"/>
+  <fieldType name="sort_da_t" class="solr.ICUCollationField" locale="da" strength="primary"/>
+  <fieldType name="sort_custom_t" class="solr.ICUCollationField" custom="customrules.dat" strength="primary"/>
+
+  <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
+  <field name="text" type="text" indexed="true" stored="false"/>
+  <field name="sort_ar" type="sort_ar_t" indexed="true" stored="false" multiValued="false"/>
+  <field name="sort_de" type="sort_de_t" indexed="true" stored="false" multiValued="false"/>
+  <field name="sort_tr_canon" type="sort_tr_canon_t" indexed="true" stored="false" multiValued="false"/>
+  <field name="sort_da" type="sort_da_t" indexed="true" stored="false" multiValued="false"/>
+  <field name="sort_custom" type="sort_custom_t" indexed="true" stored="false" multiValued="false"/>
+
+
+  <uniqueKey>id</uniqueKey>
+
+  <!-- copy our text to some sort fields with different orders -->
+  <copyField source="text" dest="sort_ar"/>
+  <copyField source="text" dest="sort_de"/>
+  <copyField source="text" dest="sort_tr_canon"/>
+  <copyField source="text" dest="sort_da"/>
+  <copyField source="text" dest="sort_custom"/>
+</schema>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollateoptions.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollateoptions.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollateoptions.xml
new file mode 100644
index 0000000..59b8d25
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-icucollateoptions.xml
@@ -0,0 +1,68 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Test schema file for CollationField options -->
+
+<schema name="test" version="1.0">
+
+  <fieldType name="string" class="solr.StrField" omitNorms="true" positionIncrementGap="0"/>
+
+  <!-- basic text field -->
+  <fieldType name="text" class="solr.TextField">
+    <analyzer>
+      <tokenizer class="solr.StandardTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+    </analyzer>
+  </fieldType>
+
+  <!-- ignores punctuation and whitespace -->
+  <fieldType name="sort_ignore_punctuation_t" class="solr.ICUCollationField"
+             locale="en" strength="primary" alternate="shifted"/>
+  <!-- ignores only whitespace -->
+  <fieldType name="sort_ignore_space_t" class="solr.ICUCollationField"
+             locale="en" strength="primary" alternate="shifted" variableTop=" "/>
+  <!-- ignores only accents, but not case -->
+  <fieldType name="sort_ignore_accents_t" class="solr.ICUCollationField"
+             locale="en" strength="primary" caseLevel="true"/>
+  <!-- sorts numerics in numeric order -->
+  <fieldType name="sort_numerics_t" class="solr.ICUCollationField"
+             locale="en" numeric="true"/>
+  <!-- sorts uppercase before lowercase -->
+  <fieldType name="sort_uppercase_first_t" class="solr.ICUCollationField"
+             locale="en" strength="tertiary" caseFirst="upper"/>
+
+
+  <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="false"/>
+  <field name="text" type="text" indexed="true" stored="false"/>
+  <field name="sort_ignore_punctuation" type="sort_ignore_punctuation_t" indexed="true" stored="false"
+         multiValued="false"/>
+  <field name="sort_ignore_space" type="sort_ignore_space_t" indexed="true" stored="false" multiValued="false"/>
+  <field name="sort_ignore_accents" type="sort_ignore_accents_t" indexed="true" stored="false" multiValued="false"/>
+  <field name="sort_numerics" type="sort_numerics_t" indexed="true" stored="false" multiValued="false"/>
+  <field name="sort_uppercase_first" type="sort_uppercase_first_t" indexed="true" stored="false" multiValued="false"/>
+
+
+  <uniqueKey>id</uniqueKey>
+
+  <!-- copy our text to some sort fields with different orders -->
+  <copyField source="text" dest="sort_ignore_punctuation"/>
+  <copyField source="text" dest="sort_ignore_space"/>
+  <copyField source="text" dest="sort_ignore_accents"/>
+  <copyField source="text" dest="sort_numerics"/>
+  <copyField source="text" dest="sort_uppercase_first"/>
+</schema>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml
new file mode 100644
index 0000000..fc13431
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml
@@ -0,0 +1,49 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="test-opennlp-extract" version="1.6">
+  <fieldType name="opennlp-en-tokenization" class="solr.TextField">
+    <analyzer>
+      <tokenizer class="solr.OpenNLPTokenizerFactory"
+                 sentenceModel="en-test-sent.bin"
+                 tokenizerModel="en-test-tokenizer.bin"/>
+    </analyzer>
+  </fieldType>
+
+  <fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
+
+  <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+    <analyzer>
+      <tokenizer class="solr.MockTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+      <filter class="solr.PorterStemFilterFactory"/>
+    </analyzer>
+  </fieldType>
+
+  <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
+  <field name="text" type="text" indexed="true" stored="false"/>
+  <field name="subject" type="text" indexed="true" stored="true"/>
+  <field name="title" type="text" indexed="true" stored="true"/>
+  <field name="subtitle" type="text" indexed="true" stored="true"/>
+  <field name="descs" type="text" indexed="true" stored="true"/>
+  <field name="descriptions" type="text" indexed="true" stored="true"/>
+
+  <dynamicField name="*_txt" type="text" indexed="true" stored="true"/>
+  <dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/>
+  <dynamicField name="*_people" type="string" indexed="true" stored="true" multiValued="true"/>
+</schema>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml
new file mode 100644
index 0000000..90c52d7
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-icucollate.xml
@@ -0,0 +1,27 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<config>
+  <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+  <indexConfig>
+    <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
+  </indexConfig>
+  <requestHandler name="/select" class="solr.SearchHandler"></requestHandler>
+  <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
+</config>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml
new file mode 100644
index 0000000..7fd793e
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml
@@ -0,0 +1,206 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<config>
+  <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+  <xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+  <requestHandler name="/select" class="solr.SearchHandler"></requestHandler>
+  <requestHandler name="/update" class="solr.UpdateRequestHandler"  />
+  <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
+  <schemaFactory class="ClassicIndexSchemaFactory"/>
+
+  <updateRequestProcessorChain name="extract-single">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">source1_s</str>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-single-regex">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">source1_s</str>
+      <lst name="dest">
+        <str name="pattern">source\d(_s)</str>
+        <str name="replacement">dest$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-multi">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">source1_s</str>
+      <str name="source">source2_s</str>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-multi-regex">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">source1_s</str>
+      <str name="source">source2_s</str>
+      <lst name="dest">
+        <str name="pattern">source\d(_s)</str>
+        <str name="replacement">dest$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-array">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <arr name="source">
+        <str>source1_s</str>
+        <str>source2_s</str>
+      </arr>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-array-regex">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <arr name="source">
+        <str>source1_s</str>
+        <str>source2_s</str>
+      </arr>
+      <lst name="dest">
+        <str name="pattern">source\d(_s)</str>
+        <str name="replacement">dest$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-selector">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">source\d_.*</str>
+        <lst name="exclude">
+          <str name="fieldRegex">source0_.*</str>
+        </lst>
+      </lst>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-selector-regex">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">source\d_.*</str>
+        <lst name="exclude">
+          <str name="fieldRegex">source0_.*</str>
+        </lst>
+      </lst>
+      <lst name="dest">
+        <str name="pattern">source\d(_s)</str>
+        <str name="replacement">dest$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-regex-replaceall">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">foo.*</str>
+      </lst>
+      <lst name="dest">
+        <!-- unbounded pattern that can be replaced multiple times in field name -->
+        <str name="pattern">x(\d)</str>
+        <str name="replacement">y$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-regex-replaceall-with-entity-type">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">foo.*</str>
+      </lst>
+      <lst name="dest">
+        <!-- unbounded pattern that can be replaced multiple times in field name -->
+        <str name="pattern">x(\d)</str>
+        <str name="replacement">{EntityType}_y$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <!-- example used in OpenNLPExtractNamedEntitiesUpdateProcessorFactory javadocs -->
+  <updateRequestProcessorChain name="multiple-extract">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">text</str>
+      <str name="dest">people_s</str>
+    </processor>
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <arr name="source">
+        <str>title</str>
+        <str>subtitle</str>
+      </arr>
+      <str name="dest">titular_people</str>
+    </processor>
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">.*_txt$</str>
+        <lst name="exclude">
+          <str name="fieldName">notes_txt</str>
+        </lst>
+      </lst>
+      <str name="dest">people_s</str>
+    </processor>
+    <processor class="solr.processor.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">^desc(.*)s$</str>
+      </lst>
+      <lst name="dest">
+        <str name="pattern">^desc(.*)s$</str>
+        <str name="replacement">key_desc$1_people</str>
+      </lst>
+    </processor>
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">summary</str>
+      <str name="dest">summary_{EntityType}_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+</config>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
new file mode 100644
index 0000000..23516b0
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/resources/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+A solrconfig.xml snippet containing indexConfig settings for randomized testing.
+-->
+<indexConfig>
+  <!-- this sys property is not set by SolrTestCaseJ4 because we ideally want to use
+       the RandomMergePolicy in all tests - but some tests expect very specific
+       Merge behavior, so those tests can set it as needed.
+  -->
+  <mergePolicyFactory class="${solr.tests.mergePolicyFactory:org.apache.solr.util.RandomMergePolicyFactory}" />
+
+  <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
+
+  <maxBufferedDocs>${solr.tests.maxBufferedDocs}</maxBufferedDocs>
+  <ramBufferSizeMB>${solr.tests.ramBufferSizeMB}</ramBufferSizeMB>
+
+  <mergeScheduler class="${solr.tests.mergeScheduler}" />
+
+  <writeLockTimeout>1000</writeLockTimeout>
+  <commitLockTimeout>10000</commitLockTimeout>
+
+  <!-- this sys property is not set by SolrTestCaseJ4 because almost all tests should
+       use the single process lockType for speed - but tests that explicitly need
+       to vary the lockType can set it as needed.
+  -->
+  <lockType>${solr.tests.lockType:single}</lockType>
+
+  <infoStream>${solr.tests.infostream:false}</infoStream>
+
+</indexConfig>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/6c070b4a/solr/core/build.gradle
----------------------------------------------------------------------
diff --git a/solr/core/build.gradle b/solr/core/build.gradle
index e8cc58e..afe4834 100644
--- a/solr/core/build.gradle
+++ b/solr/core/build.gradle
@@ -133,4 +133,5 @@ dependencies {
 	testCompile project(':solr:test-framework')
 	testCompile project(path: ':lucene:backward-codecs', configuration: 'testOutput')
 	testCompile project(path: ':lucene:queryparser', configuration: 'testOutput')
+	testCompile project(':solr:contrib:analysis-extras')
 }
\ No newline at end of file