You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2017/12/15 16:25:54 UTC

[01/12] lucene-solr:master: LUCENE-2899: Add OpenNLP Analysis capabilities as a module

Repository: lucene-solr
Updated Branches:
  refs/heads/branch_7x 7f3064623 -> b720e1ee3
  refs/heads/master d02d1f1ca -> 3e2f9e62d


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/licenses/opennlp-maxent-LICENSE-ASL.txt
----------------------------------------------------------------------
diff --git a/solr/licenses/opennlp-maxent-LICENSE-ASL.txt b/solr/licenses/opennlp-maxent-LICENSE-ASL.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/solr/licenses/opennlp-maxent-LICENSE-ASL.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/licenses/opennlp-maxent-NOTICE.txt
----------------------------------------------------------------------
diff --git a/solr/licenses/opennlp-maxent-NOTICE.txt b/solr/licenses/opennlp-maxent-NOTICE.txt
new file mode 100644
index 0000000..9b97287
--- /dev/null
+++ b/solr/licenses/opennlp-maxent-NOTICE.txt
@@ -0,0 +1,6 @@
+
+Apache OpenNLP Maxent
+Copyright 2013 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/licenses/opennlp-tools-1.8.3.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/opennlp-tools-1.8.3.jar.sha1 b/solr/licenses/opennlp-tools-1.8.3.jar.sha1
new file mode 100644
index 0000000..c6a7549
--- /dev/null
+++ b/solr/licenses/opennlp-tools-1.8.3.jar.sha1
@@ -0,0 +1 @@
+3ce7c9056048f55478d983248cf18c7e02b1d072

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/licenses/opennlp-tools-LICENSE-ASL.txt
----------------------------------------------------------------------
diff --git a/solr/licenses/opennlp-tools-LICENSE-ASL.txt b/solr/licenses/opennlp-tools-LICENSE-ASL.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/solr/licenses/opennlp-tools-LICENSE-ASL.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/licenses/opennlp-tools-NOTICE.txt
----------------------------------------------------------------------
diff --git a/solr/licenses/opennlp-tools-NOTICE.txt b/solr/licenses/opennlp-tools-NOTICE.txt
new file mode 100644
index 0000000..68a08dc
--- /dev/null
+++ b/solr/licenses/opennlp-tools-NOTICE.txt
@@ -0,0 +1,6 @@
+
+Apache OpenNLP Tools
+Copyright 2015 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/solr-ref-guide/src/filter-descriptions.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/filter-descriptions.adoc b/solr/solr-ref-guide/src/filter-descriptions.adoc
index b8ab6de..09dd0f6 100644
--- a/solr/solr-ref-guide/src/filter-descriptions.adoc
+++ b/solr/solr-ref-guide/src/filter-descriptions.adoc
@@ -1576,6 +1576,38 @@ This filter adds the token's type, as an encoded byte sequence, as its payload.
 
 *Out:* "Pay"[<ALPHANUM>], "Bob's"[<APOSTROPHE>], "I.O.U."[<ACRONYM>]
 
+== Type As Synonym Filter
+
+This filter adds the token's type, as a token at the same position as the token, optionally with a configurable prefix prepended.
+
+*Factory class:* `solr.TypeAsSynonymFilterFactory`
+
+*Arguments:*
+
+`prefix`:: (optional) The prefix to prepend to the token's type.
+
+*Examples:*
+
+With the example below, each token's type will be emitted verbatim at the same position:
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.StandardTokenizerFactory"/>
+  <filter class="solr.TypeAsSynonymFilterFactory"/>
+</analyzer>
+----
+
+With the example below, for a token "example.com" with type `<URL>`, the token emitted at the same position will be "\_type_<URL>":
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.UAX29URLEmailTokenizerFactory"/>
+  <filter class="solr.TypeAsSynonymFilterFactory" prefix="_type_"/>
+</analyzer>
+----
+
 == Type Token Filter
 
 This filter blacklists or whitelists a specified list of token types, assuming the tokens have type metadata associated with them. For example, the <<tokenizers.adoc#uax29-url-email-tokenizer,UAX29 URL Email Tokenizer>> emits "<URL>" and "<EMAIL>" typed tokens, as well as other types. This filter would allow you to pull out only e-mail addresses from text as tokens, if you wish.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/solr-ref-guide/src/language-analysis.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/language-analysis.adoc b/solr/solr-ref-guide/src/language-analysis.adoc
index 8e6a214..9d00325 100644
--- a/solr/solr-ref-guide/src/language-analysis.adoc
+++ b/solr/solr-ref-guide/src/language-analysis.adoc
@@ -355,6 +355,214 @@ This can increase recall by causing more matches. On the other hand, it can redu
 </analyzer>
 ----
 
+== OpenNLP Integration
+
+The `lucene/analysis/opennlp` module provides OpenNLP integration via several analysis components: a tokenizer, a part-of-speech tagging filter, a phrase chunking filter, and a lemmatization filter.  In addition to these analysis components, Solr also provides an update request processor to extract named entities - see <<update-request-processors.adoc#update-processor-factories-that-can-be-loaded-as-plugins,Update Processor Factories That Can Be Loaded as Plugins>>.
+
+NOTE: The <<OpenNLP Tokenizer>> must be used with all other OpenNLP analysis components, for two reasons: first, the OpenNLP Tokenizer detects and marks the sentence boundaries required by all the OpenNLP filters; and second, since the pre-trained OpenNLP models used by these filters were trained using the corresponding language-specific sentence-detection/tokenization models, the same tokenization, using the same models, must be used at runtime for optimal performance.
+
+See `solr/contrib/analysis-extras/README.txt` for information on which jars you need to add to your `SOLR_HOME/lib`.
+
+=== OpenNLP Tokenizer
+
+The OpenNLP Tokenizer takes two language-specific binary model files as parameters: a sentence detector model and a tokenizer model.  The last token in each sentence is flagged, so that following OpenNLP-based filters can use this information to apply operations to tokens one sentence at a time. See the http://opennlp.apache.org/models.html[OpenNLP website] for information on downloading pre-trained models.
+
+*Factory class:* `solr.OpenNLPTokenizerFactory`
+
+*Arguments:*
+
+`sentenceModel`:: (required) The path of a language-specific OpenNLP sentence detection model file. This path may be an absolute path, or path relative to the Solr config directory.
+
+`tokenizerModel`:: (required) The path of a language-specific OpenNLP tokenization model file. This path may be an absolute path, or path relative to the Solr config directory.
+
+*Example:*
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+</analyzer>
+----
+
+=== OpenNLP Part-Of-Speech Filter
+
+This filter sets each token's type attribute to the part of speech (POS) assigned by the configured model. See the http://opennlp.apache.org/models.html[OpenNLP website] for information on downloading pre-trained models.
+
+NOTE: Lucene currently does not index token types, so if you want to keep this information, you have to preserve it either in a payload or as a synonym; see the examples below.
+
+*Factory class:* `solr.OpenNLPPOSFilterFactory`
+
+*Arguments:*
+
+`posTaggerModel`:: (required) The path of a language-specific OpenNLP POS tagger model file. This path may be an absolute path, or path relative to the Solr config directory.
+
+*Examples:*
+
+The OpenNLP tokenizer will tokenize punctuation, which is useful for following token filters, but ordinarily you don't want to include punctuation in your index, so the `TypeTokenFilter` (<<filter-descriptions.adoc#type-token-filter,described here>>) is included in the examples below, with `stop.pos.txt` containing the following:
+
+.stop.pos.txt
+[source,text]
+----
+#
+$
+''
+``
+,
+-LRB-
+-RRB-
+:
+.
+----
+
+Index the POS for each token as a payload:
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.TypeAsPayloadFilterFactory"/>
+  <filter class="solr.TypeTokenFilterFactory" types="stop.pos.txt"/>
+</analyzer>
+----
+
+Index the POS for each token as a synonym, after prefixing the POS with "@" (see the <<filter-descriptions.adoc#type-as-synonym-filter,TypeAsSynonymFilter description>>):
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.TypeAsSynonymFilterFactory" prefix="@"/>
+  <filter class="solr.TypeTokenFilterFactory" types="stop.pos.txt"/>
+</analyzer>
+----
+
+Only index nouns - the `keep.pos.txt` file contains lines `NN`, `NNS`, `NNP` and `NNPS`:
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.TypeTokenFilterFactory" types="keep.pos.txt" useWhitelist="true"/>
+</analyzer>
+----
+
+=== OpenNLP Phrase Chunking Filter
+
+This filter sets each token's type attribute based on the output of an OpenNLP phrase chunking model.  The chunk labels replace the POS tags that previously were in each token's type attribute. See the http://opennlp.apache.org/models.html[OpenNLP website] for information on downloading pre-trained models.
+
+Prerequisite: the <<OpenNLP Tokenizer>> and the <<OpenNLP Part-Of-Speech Filter>> must precede this filter.
+
+NOTE: Lucene currently does not index token types, so if you want to keep this information, you have to preserve it either in a payload or as a synonym; see the examples below.
+
+*Factory class:* `solr.OpenNLPChunkerFilter`
+
+*Arguments:*
+
+`chunkerModel`:: (required) The path of a language-specific OpenNLP phrase chunker model file. This path may be an absolute path, or path relative to the Solr config directory.
+
+*Examples*:
+
+Index the phrase chunk label for each token as a payload:
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.OpenNLPChunkerFactory" chunkerModel="en-chunker.bin"/>
+  <filter class="solr.TypeAsPayloadFilterFactory"/>
+</analyzer>
+----
+
+Index the phrase chunk label for each token as a synonym, after prefixing it with "#" (see the <<filter-descriptions.adoc#type-as-synonym-filter,TypeAsSynonymFilter description>>):
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.OpenNLPChunkerFactory" chunkerModel="en-chunker.bin"/>
+  <filter class="solr.TypeAsSynonymFilterFactory" prefix="#"/>
+</analyzer>
+----
+
+=== OpenNLP Lemmatizer Filter
+
+This filter replaces the text of each token with its lemma. Both a dictionary-based lemmatizer and a model-based lemmatizer are supported. If both are configured, the dictionary-based lemmatizer is tried first, and then the model-based lemmatizer is consulted for out-of-vocabulary tokens. See the http://opennlp.apache.org/models.html[OpenNLP website] for information on downloading pre-trained models.
+
+*Factory class:* `solr.OpenNLPLemmatizerFilter`
+
+*Arguments:*
+
+Either `dictionary` or `lemmatizerModel` must be provided, and both may be provided - see the examples below:
+
+`dictionary`:: (optional) The path of a lemmatization dictionary file. This path may be an absolute path, or path relative to the Solr config directory. The dictionary file must be encoded as UTF-8, with one entry per line, in the form `word[tab]lemma[tab]part-of-speech`, e.g. `wrote[tab]write[tab]VBD`.
+
+`lemmatizerModel`:: (optional) The path of a language-specific OpenNLP lemmatizer model file. This path may be an absolute path, or path relative to the Solr config directory.
+
+*Examples:*
+
+Perform dictionary-based lemmatization, and fall back to model-based lemmatization for out-of-vocabulary tokens (see the <<OpenNLP Part-Of-Speech Filter>> section above for information about using `TypeTokenFilter` to avoid indexing punctuation):
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.OpenNLPLemmatizerFilterFactory"
+          dictionary="lemmas.txt"
+          lemmatizerModel="en-lemmatizer.bin"/>
+  <filter class="solr.TypeTokenFilterFactory" types="stop.pos.txt"/>
+</analyzer>
+----
+
+Perform dictionary-based lemmatization only:
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.OpenNLPLemmatizerFilterFactory" dictionary="lemmas.txt"/>
+  <filter class="solr.TypeTokenFilterFactory" types="stop.pos.txt"/>
+</analyzer>
+----
+
+Perform model-based lemmatization only, preserving the original token and emitting the lemma as a synonym (see the <<KeywordRepeatFilterFactory,KeywordRepeatFilterFactory description>>)):
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.KeywordRepeatFilterFactory"/>
+  <filter class="solr.OpenNLPLemmatizerFilterFactory" lemmatizerModel="en-lemmatizer.bin"/>
+  <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+  <filter class="solr.TypeTokenFilterFactory" types="stop.pos.txt"/>
+</analyzer>
+----
+
 == Language-Specific Factories
 
 These factories are each designed to work with specific languages. The languages covered here are:

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/solr-ref-guide/src/tokenizers.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/tokenizers.adoc b/solr/solr-ref-guide/src/tokenizers.adoc
index 89dad34..c2fcf60 100644
--- a/solr/solr-ref-guide/src/tokenizers.adoc
+++ b/solr/solr-ref-guide/src/tokenizers.adoc
@@ -502,3 +502,7 @@ Specifies how to define whitespace for the purpose of tokenization. Valid values
 *In:* "To be, or what?"
 
 *Out:* "To", "be,", "or", "what?"
+
+== OpenNLP Tokenizer and OpenNLP Filters
+
+See <<language-analysis.adoc#opennlp-integration,OpenNLP Integration>> for information about using the OpenNLP Tokenizer, along with information about available OpenNLP token filters.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/solr-ref-guide/src/update-request-processors.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/update-request-processors.adoc b/solr/solr-ref-guide/src/update-request-processors.adoc
index 09c49d1..921677a 100644
--- a/solr/solr-ref-guide/src/update-request-processors.adoc
+++ b/solr/solr-ref-guide/src/update-request-processors.adoc
@@ -275,6 +275,8 @@ What follows are brief descriptions of the currently available update request pr
 
 {solr-javadocs}/solr-core/org/apache/solr/update/processor/IgnoreCommitOptimizeUpdateProcessorFactory.html[IgnoreCommitOptimizeUpdateProcessorFactory]:: Allows you to ignore commit and/or optimize requests from client applications when running in SolrCloud mode, for more information, see: Shards and Indexing Data in SolrCloud
 
+{solr-javadocs}/solr-core/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactory.html[CloneFieldUpdateProcessorFactory]:: Clones the values found in any matching _source_ field into the configured _dest_ field.
+
 {solr-javadocs}/solr-core/org/apache/solr/update/processor/RegexpBoostProcessorFactory.html[RegexpBoostProcessorFactory]:: A processor which will match content of "inputField" against regular expressions found in "boostFilename", and if it matches will return the corresponding boost value from the file and output this to "boostField" as a double value.
 
 {solr-javadocs}/solr-core/org/apache/solr/update/processor/SignatureUpdateProcessorFactory.html[SignatureUpdateProcessorFactory]:: Uses a defined set of fields to generate a hash "signature" for the document. Useful for only indexing one copy of "similar" documents.
@@ -351,6 +353,10 @@ The {solr-javadocs}/solr-uima/index.html[`uima`] contrib provides::
 
 {solr-javadocs}/solr-uima/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.html[UIMAUpdateRequestProcessorFactory]::: Update document(s) to be indexed with UIMA extracted information.
 
+The {solr-javadocs}/solr-analysis-extras/index.html[`analysis-extras`] contrib provides::
+
+{solr-javadocs}/solr-analysis-extras/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesProcessorFactory.html[OpenNLPExtractNamedEntitiesProcessorFactory]::: Update document(s) to be indexed with named entities extracted using an OpenNLP NER model.
+
 === Update Processor Factories You Should _Not_ Modify or Remove
 
 These are listed for completeness, but are part of the Solr infrastructure, particularly SolrCloud. Other than insuring you do _not_ remove them when modifying the update request handlers (or any copies you make), you will rarely, if ever, need to change these.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/test-framework/src/java/org/apache/solr/update/processor/UpdateProcessorTestBase.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/update/processor/UpdateProcessorTestBase.java b/solr/test-framework/src/java/org/apache/solr/update/processor/UpdateProcessorTestBase.java
new file mode 100644
index 0000000..d3aa979
--- /dev/null
+++ b/solr/test-framework/src/java/org/apache/solr/update/processor/UpdateProcessorTestBase.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.update.processor;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.IOUtils;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrInputField;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.SolrRequestInfo;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.update.CommitUpdateCommand;
+import org.apache.solr.update.DeleteUpdateCommand;
+
+import java.io.IOException;
+
+public class UpdateProcessorTestBase extends SolrTestCaseJ4 {
+
+  /**
+   * Runs a document through the specified chain, and returns the final
+   * document used when the chain is completed (NOTE: some chains may
+   * modify the document in place
+   */
+  protected SolrInputDocument processAdd(final String chain,
+                                         final SolrInputDocument docIn)
+    throws IOException {
+
+    return processAdd(chain, new ModifiableSolrParams(), docIn);
+  }
+
+  /**
+   * Runs a document through the specified chain, and returns the final
+   * document used when the chain is completed (NOTE: some chains may
+   * modify the document in place
+   */
+  protected SolrInputDocument processAdd(final String chain,
+                                         final SolrParams requestParams,
+                                         final SolrInputDocument docIn)
+    throws IOException {
+
+    SolrCore core = h.getCore();
+    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
+    assertNotNull("No Chain named: " + chain, pc);
+
+    SolrQueryResponse rsp = new SolrQueryResponse();
+
+    SolrQueryRequest req = new LocalSolrQueryRequest(core, requestParams);
+    try {
+      SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
+      AddUpdateCommand cmd = new AddUpdateCommand(req);
+      cmd.solrDoc = docIn;
+
+      UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
+      if (null != processor) {
+        // test chain might be empty or short circuited.
+        processor.processAdd(cmd);
+      }
+
+      return cmd.solrDoc;
+    } finally {
+      SolrRequestInfo.clearRequestInfo();
+      req.close();
+    }
+  }
+
+  protected void processCommit(final String chain) throws IOException {
+    SolrCore core = h.getCore();
+    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
+    assertNotNull("No Chain named: " + chain, pc);
+
+    SolrQueryResponse rsp = new SolrQueryResponse();
+
+    SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
+
+    CommitUpdateCommand cmd = new CommitUpdateCommand(req,false);
+    UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
+    try {
+      processor.processCommit(cmd);
+    } finally {
+      req.close();
+    }
+  }
+
+  protected void processDeleteById(final String chain, String id) throws IOException {
+    SolrCore core = h.getCore();
+    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
+    assertNotNull("No Chain named: " + chain, pc);
+
+    SolrQueryResponse rsp = new SolrQueryResponse();
+
+    SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
+
+    DeleteUpdateCommand cmd = new DeleteUpdateCommand(req);
+    cmd.setId(id);
+    UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
+    try {
+      processor.processDelete(cmd);
+    } finally {
+      req.close();
+    }
+  }
+
+  protected void finish(final String chain) throws IOException {
+    SolrCore core = h.getCore();
+    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
+    assertNotNull("No Chain named: " + chain, pc);
+
+    SolrQueryResponse rsp = new SolrQueryResponse();
+    SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
+
+    UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
+    try {
+      processor.finish();
+    } finally {
+      IOUtils.closeQuietly(processor);
+      req.close();
+    }
+  }
+
+
+  /**
+   * Convenience method for building up SolrInputDocuments
+   */
+  final SolrInputDocument doc(SolrInputField... fields) {
+    SolrInputDocument d = new SolrInputDocument();
+    for (SolrInputField f : fields) {
+      d.put(f.getName(), f);
+    }
+    return d;
+  }
+
+  /**
+   * Convenience method for building up SolrInputFields
+   */
+  final SolrInputField field(String name, Object... values) {
+    SolrInputField f = new SolrInputField(name);
+    for (Object v : values) {
+      f.addValue(v);
+    }
+    return f;
+  }
+
+  /**
+   * Convenience method for building up SolrInputFields with default boost
+   */
+  final SolrInputField f(String name, Object... values) {
+    return field(name, values);
+  }
+}


[07/12] lucene-solr:branch_7x: LUCENE-2899: Add OpenNLP Analysis capabilities as a module

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/licenses/opennlp-maxent-LICENSE-ASL.txt
----------------------------------------------------------------------
diff --git a/solr/licenses/opennlp-maxent-LICENSE-ASL.txt b/solr/licenses/opennlp-maxent-LICENSE-ASL.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/solr/licenses/opennlp-maxent-LICENSE-ASL.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/licenses/opennlp-maxent-NOTICE.txt
----------------------------------------------------------------------
diff --git a/solr/licenses/opennlp-maxent-NOTICE.txt b/solr/licenses/opennlp-maxent-NOTICE.txt
new file mode 100644
index 0000000..9b97287
--- /dev/null
+++ b/solr/licenses/opennlp-maxent-NOTICE.txt
@@ -0,0 +1,6 @@
+
+Apache OpenNLP Maxent
+Copyright 2013 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/licenses/opennlp-tools-1.8.3.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/opennlp-tools-1.8.3.jar.sha1 b/solr/licenses/opennlp-tools-1.8.3.jar.sha1
new file mode 100644
index 0000000..c6a7549
--- /dev/null
+++ b/solr/licenses/opennlp-tools-1.8.3.jar.sha1
@@ -0,0 +1 @@
+3ce7c9056048f55478d983248cf18c7e02b1d072

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/licenses/opennlp-tools-LICENSE-ASL.txt
----------------------------------------------------------------------
diff --git a/solr/licenses/opennlp-tools-LICENSE-ASL.txt b/solr/licenses/opennlp-tools-LICENSE-ASL.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/solr/licenses/opennlp-tools-LICENSE-ASL.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/licenses/opennlp-tools-NOTICE.txt
----------------------------------------------------------------------
diff --git a/solr/licenses/opennlp-tools-NOTICE.txt b/solr/licenses/opennlp-tools-NOTICE.txt
new file mode 100644
index 0000000..68a08dc
--- /dev/null
+++ b/solr/licenses/opennlp-tools-NOTICE.txt
@@ -0,0 +1,6 @@
+
+Apache OpenNLP Tools
+Copyright 2015 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/solr-ref-guide/src/filter-descriptions.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/filter-descriptions.adoc b/solr/solr-ref-guide/src/filter-descriptions.adoc
index b8ab6de..09dd0f6 100644
--- a/solr/solr-ref-guide/src/filter-descriptions.adoc
+++ b/solr/solr-ref-guide/src/filter-descriptions.adoc
@@ -1576,6 +1576,38 @@ This filter adds the token's type, as an encoded byte sequence, as its payload.
 
 *Out:* "Pay"[<ALPHANUM>], "Bob's"[<APOSTROPHE>], "I.O.U."[<ACRONYM>]
 
+== Type As Synonym Filter
+
+This filter adds the token's type, as a token at the same position as the token, optionally with a configurable prefix prepended.
+
+*Factory class:* `solr.TypeAsSynonymFilterFactory`
+
+*Arguments:*
+
+`prefix`:: (optional) The prefix to prepend to the token's type.
+
+*Examples:*
+
+With the example below, each token's type will be emitted verbatim at the same position:
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.StandardTokenizerFactory"/>
+  <filter class="solr.TypeAsSynonymFilterFactory"/>
+</analyzer>
+----
+
+With the example below, for a token "example.com" with type `<URL>`, the token emitted at the same position will be "\_type_<URL>":
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.UAX29URLEmailTokenizerFactory"/>
+  <filter class="solr.TypeAsSynonymFilterFactory" prefix="_type_"/>
+</analyzer>
+----
+
 == Type Token Filter
 
 This filter blacklists or whitelists a specified list of token types, assuming the tokens have type metadata associated with them. For example, the <<tokenizers.adoc#uax29-url-email-tokenizer,UAX29 URL Email Tokenizer>> emits "<URL>" and "<EMAIL>" typed tokens, as well as other types. This filter would allow you to pull out only e-mail addresses from text as tokens, if you wish.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/solr-ref-guide/src/language-analysis.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/language-analysis.adoc b/solr/solr-ref-guide/src/language-analysis.adoc
index 8e6a214..9d00325 100644
--- a/solr/solr-ref-guide/src/language-analysis.adoc
+++ b/solr/solr-ref-guide/src/language-analysis.adoc
@@ -355,6 +355,214 @@ This can increase recall by causing more matches. On the other hand, it can redu
 </analyzer>
 ----
 
+== OpenNLP Integration
+
+The `lucene/analysis/opennlp` module provides OpenNLP integration via several analysis components: a tokenizer, a part-of-speech tagging filter, a phrase chunking filter, and a lemmatization filter.  In addition to these analysis components, Solr also provides an update request processor to extract named entities - see <<update-request-processors.adoc#update-processor-factories-that-can-be-loaded-as-plugins,Update Processor Factories That Can Be Loaded as Plugins>>.
+
+NOTE: The <<OpenNLP Tokenizer>> must be used with all other OpenNLP analysis components, for two reasons: first, the OpenNLP Tokenizer detects and marks the sentence boundaries required by all the OpenNLP filters; and second, since the pre-trained OpenNLP models used by these filters were trained using the corresponding language-specific sentence-detection/tokenization models, the same tokenization, using the same models, must be used at runtime for optimal performance.
+
+See `solr/contrib/analysis-extras/README.txt` for information on which jars you need to add to your `SOLR_HOME/lib`.
+
+=== OpenNLP Tokenizer
+
+The OpenNLP Tokenizer takes two language-specific binary model files as parameters: a sentence detector model and a tokenizer model.  The last token in each sentence is flagged, so that following OpenNLP-based filters can use this information to apply operations to tokens one sentence at a time. See the http://opennlp.apache.org/models.html[OpenNLP website] for information on downloading pre-trained models.
+
+*Factory class:* `solr.OpenNLPTokenizerFactory`
+
+*Arguments:*
+
+`sentenceModel`:: (required) The path of a language-specific OpenNLP sentence detection model file. This path may be an absolute path, or path relative to the Solr config directory.
+
+`tokenizerModel`:: (required) The path of a language-specific OpenNLP tokenization model file. This path may be an absolute path, or path relative to the Solr config directory.
+
+*Example:*
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+</analyzer>
+----
+
+=== OpenNLP Part-Of-Speech Filter
+
+This filter sets each token's type attribute to the part of speech (POS) assigned by the configured model. See the http://opennlp.apache.org/models.html[OpenNLP website] for information on downloading pre-trained models.
+
+NOTE: Lucene currently does not index token types, so if you want to keep this information, you have to preserve it either in a payload or as a synonym; see the examples below.
+
+*Factory class:* `solr.OpenNLPPOSFilterFactory`
+
+*Arguments:*
+
+`posTaggerModel`:: (required) The path of a language-specific OpenNLP POS tagger model file. This path may be an absolute path, or path relative to the Solr config directory.
+
+*Examples:*
+
+The OpenNLP tokenizer will tokenize punctuation, which is useful for following token filters, but ordinarily you don't want to include punctuation in your index, so the `TypeTokenFilter` (<<filter-descriptions.adoc#type-token-filter,described here>>) is included in the examples below, with `stop.pos.txt` containing the following:
+
+.stop.pos.txt
+[source,text]
+----
+#
+$
+''
+``
+,
+-LRB-
+-RRB-
+:
+.
+----
+
+Index the POS for each token as a payload:
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.TypeAsPayloadFilterFactory"/>
+  <filter class="solr.TypeTokenFilterFactory" types="stop.pos.txt"/>
+</analyzer>
+----
+
+Index the POS for each token as a synonym, after prefixing the POS with "@" (see the <<filter-descriptions.adoc#type-as-synonym-filter,TypeAsSynonymFilter description>>):
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.TypeAsSynonymFilterFactory" prefix="@"/>
+  <filter class="solr.TypeTokenFilterFactory" types="stop.pos.txt"/>
+</analyzer>
+----
+
+Only index nouns - the `keep.pos.txt` file contains lines `NN`, `NNS`, `NNP` and `NNPS`:
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.TypeTokenFilterFactory" types="keep.pos.txt" useWhitelist="true"/>
+</analyzer>
+----
+
+=== OpenNLP Phrase Chunking Filter
+
+This filter sets each token's type attribute based on the output of an OpenNLP phrase chunking model.  The chunk labels replace the POS tags that previously were in each token's type attribute. See the http://opennlp.apache.org/models.html[OpenNLP website] for information on downloading pre-trained models.
+
+Prerequisite: the <<OpenNLP Tokenizer>> and the <<OpenNLP Part-Of-Speech Filter>> must precede this filter.
+
+NOTE: Lucene currently does not index token types, so if you want to keep this information, you have to preserve it either in a payload or as a synonym; see the examples below.
+
+*Factory class:* `solr.OpenNLPChunkerFilter`
+
+*Arguments:*
+
+`chunkerModel`:: (required) The path of a language-specific OpenNLP phrase chunker model file. This path may be an absolute path, or path relative to the Solr config directory.
+
+*Examples*:
+
+Index the phrase chunk label for each token as a payload:
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.OpenNLPChunkerFactory" chunkerModel="en-chunker.bin"/>
+  <filter class="solr.TypeAsPayloadFilterFactory"/>
+</analyzer>
+----
+
+Index the phrase chunk label for each token as a synonym, after prefixing it with "#" (see the <<filter-descriptions.adoc#type-as-synonym-filter,TypeAsSynonymFilter description>>):
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.OpenNLPChunkerFactory" chunkerModel="en-chunker.bin"/>
+  <filter class="solr.TypeAsSynonymFilterFactory" prefix="#"/>
+</analyzer>
+----
+
+=== OpenNLP Lemmatizer Filter
+
+This filter replaces the text of each token with its lemma. Both a dictionary-based lemmatizer and a model-based lemmatizer are supported. If both are configured, the dictionary-based lemmatizer is tried first, and then the model-based lemmatizer is consulted for out-of-vocabulary tokens. See the http://opennlp.apache.org/models.html[OpenNLP website] for information on downloading pre-trained models.
+
+*Factory class:* `solr.OpenNLPLemmatizerFilter`
+
+*Arguments:*
+
+Either `dictionary` or `lemmatizerModel` must be provided, and both may be provided - see the examples below:
+
+`dictionary`:: (optional) The path of a lemmatization dictionary file. This path may be an absolute path, or path relative to the Solr config directory. The dictionary file must be encoded as UTF-8, with one entry per line, in the form `word[tab]lemma[tab]part-of-speech`, e.g. `wrote[tab]write[tab]VBD`.
+
+`lemmatizerModel`:: (optional) The path of a language-specific OpenNLP lemmatizer model file. This path may be an absolute path, or path relative to the Solr config directory.
+
+*Examples:*
+
+Perform dictionary-based lemmatization, and fall back to model-based lemmatization for out-of-vocabulary tokens (see the <<OpenNLP Part-Of-Speech Filter>> section above for information about using `TypeTokenFilter` to avoid indexing punctuation):
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.OpenNLPLemmatizerFilterFactory"
+          dictionary="lemmas.txt"
+          lemmatizerModel="en-lemmatizer.bin"/>
+  <filter class="solr.TypeTokenFilterFactory" types="stop.pos.txt"/>
+</analyzer>
+----
+
+Perform dictionary-based lemmatization only:
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.OpenNLPLemmatizerFilterFactory" dictionary="lemmas.txt"/>
+  <filter class="solr.TypeTokenFilterFactory" types="stop.pos.txt"/>
+</analyzer>
+----
+
+Perform model-based lemmatization only, preserving the original token and emitting the lemma as a synonym (see the <<KeywordRepeatFilterFactory,KeywordRepeatFilterFactory description>>)):
+
+[source,xml]
+----
+<analyzer>
+  <tokenizer class="solr.OpenNLPTokenizerFactory"
+             sentenceModel="en-sent.bin"
+             tokenizerModel="en-tokenizer.bin"/>
+  <filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="en-pos-maxent.bin"/>
+  <filter class="solr.KeywordRepeatFilterFactory"/>
+  <filter class="solr.OpenNLPLemmatizerFilterFactory" lemmatizerModel="en-lemmatizer.bin"/>
+  <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+  <filter class="solr.TypeTokenFilterFactory" types="stop.pos.txt"/>
+</analyzer>
+----
+
 == Language-Specific Factories
 
 These factories are each designed to work with specific languages. The languages covered here are:

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/solr-ref-guide/src/tokenizers.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/tokenizers.adoc b/solr/solr-ref-guide/src/tokenizers.adoc
index 89dad34..c2fcf60 100644
--- a/solr/solr-ref-guide/src/tokenizers.adoc
+++ b/solr/solr-ref-guide/src/tokenizers.adoc
@@ -502,3 +502,7 @@ Specifies how to define whitespace for the purpose of tokenization. Valid values
 *In:* "To be, or what?"
 
 *Out:* "To", "be,", "or", "what?"
+
+== OpenNLP Tokenizer and OpenNLP Filters
+
+See <<language-analysis.adoc#opennlp-integration,OpenNLP Integration>> for information about using the OpenNLP Tokenizer, along with information about available OpenNLP token filters.
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/solr-ref-guide/src/update-request-processors.adoc
----------------------------------------------------------------------
diff --git a/solr/solr-ref-guide/src/update-request-processors.adoc b/solr/solr-ref-guide/src/update-request-processors.adoc
index 09c49d1..921677a 100644
--- a/solr/solr-ref-guide/src/update-request-processors.adoc
+++ b/solr/solr-ref-guide/src/update-request-processors.adoc
@@ -275,6 +275,8 @@ What follows are brief descriptions of the currently available update request pr
 
 {solr-javadocs}/solr-core/org/apache/solr/update/processor/IgnoreCommitOptimizeUpdateProcessorFactory.html[IgnoreCommitOptimizeUpdateProcessorFactory]:: Allows you to ignore commit and/or optimize requests from client applications when running in SolrCloud mode, for more information, see: Shards and Indexing Data in SolrCloud
 
+{solr-javadocs}/solr-core/org/apache/solr/update/processor/CloneFieldUpdateProcessorFactory.html[CloneFieldUpdateProcessorFactory]:: Clones the values found in any matching _source_ field into the configured _dest_ field.
+
 {solr-javadocs}/solr-core/org/apache/solr/update/processor/RegexpBoostProcessorFactory.html[RegexpBoostProcessorFactory]:: A processor which will match content of "inputField" against regular expressions found in "boostFilename", and if it matches will return the corresponding boost value from the file and output this to "boostField" as a double value.
 
 {solr-javadocs}/solr-core/org/apache/solr/update/processor/SignatureUpdateProcessorFactory.html[SignatureUpdateProcessorFactory]:: Uses a defined set of fields to generate a hash "signature" for the document. Useful for only indexing one copy of "similar" documents.
@@ -351,6 +353,10 @@ The {solr-javadocs}/solr-uima/index.html[`uima`] contrib provides::
 
 {solr-javadocs}/solr-uima/org/apache/solr/uima/processor/UIMAUpdateRequestProcessorFactory.html[UIMAUpdateRequestProcessorFactory]::: Update document(s) to be indexed with UIMA extracted information.
 
+The {solr-javadocs}/solr-analysis-extras/index.html[`analysis-extras`] contrib provides::
+
+{solr-javadocs}/solr-analysis-extras/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesProcessorFactory.html[OpenNLPExtractNamedEntitiesProcessorFactory]::: Update document(s) to be indexed with named entities extracted using an OpenNLP NER model.
+
 === Update Processor Factories You Should _Not_ Modify or Remove
 
 These are listed for completeness, but are part of the Solr infrastructure, particularly SolrCloud. Other than insuring you do _not_ remove them when modifying the update request handlers (or any copies you make), you will rarely, if ever, need to change these.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/test-framework/src/java/org/apache/solr/update/processor/UpdateProcessorTestBase.java
----------------------------------------------------------------------
diff --git a/solr/test-framework/src/java/org/apache/solr/update/processor/UpdateProcessorTestBase.java b/solr/test-framework/src/java/org/apache/solr/update/processor/UpdateProcessorTestBase.java
new file mode 100644
index 0000000..d3aa979
--- /dev/null
+++ b/solr/test-framework/src/java/org/apache/solr/update/processor/UpdateProcessorTestBase.java
@@ -0,0 +1,168 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.solr.update.processor;
+
+import org.apache.solr.SolrTestCaseJ4;
+import org.apache.solr.common.params.SolrParams;
+import org.apache.solr.common.util.IOUtils;
+import org.apache.solr.common.params.ModifiableSolrParams;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrInputField;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.SolrRequestInfo;
+import org.apache.solr.request.LocalSolrQueryRequest;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.update.CommitUpdateCommand;
+import org.apache.solr.update.DeleteUpdateCommand;
+
+import java.io.IOException;
+
+public class UpdateProcessorTestBase extends SolrTestCaseJ4 {
+
+  /**
+   * Runs a document through the specified chain, and returns the final
+   * document used when the chain is completed (NOTE: some chains may
+   * modify the document in place
+   */
+  protected SolrInputDocument processAdd(final String chain,
+                                         final SolrInputDocument docIn)
+    throws IOException {
+
+    return processAdd(chain, new ModifiableSolrParams(), docIn);
+  }
+
+  /**
+   * Runs a document through the specified chain, and returns the final
+   * document used when the chain is completed (NOTE: some chains may
+   * modify the document in place
+   */
+  protected SolrInputDocument processAdd(final String chain,
+                                         final SolrParams requestParams,
+                                         final SolrInputDocument docIn)
+    throws IOException {
+
+    SolrCore core = h.getCore();
+    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
+    assertNotNull("No Chain named: " + chain, pc);
+
+    SolrQueryResponse rsp = new SolrQueryResponse();
+
+    SolrQueryRequest req = new LocalSolrQueryRequest(core, requestParams);
+    try {
+      SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
+      AddUpdateCommand cmd = new AddUpdateCommand(req);
+      cmd.solrDoc = docIn;
+
+      UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
+      if (null != processor) {
+        // test chain might be empty or short circuited.
+        processor.processAdd(cmd);
+      }
+
+      return cmd.solrDoc;
+    } finally {
+      SolrRequestInfo.clearRequestInfo();
+      req.close();
+    }
+  }
+
+  protected void processCommit(final String chain) throws IOException {
+    SolrCore core = h.getCore();
+    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
+    assertNotNull("No Chain named: " + chain, pc);
+
+    SolrQueryResponse rsp = new SolrQueryResponse();
+
+    SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
+
+    CommitUpdateCommand cmd = new CommitUpdateCommand(req,false);
+    UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
+    try {
+      processor.processCommit(cmd);
+    } finally {
+      req.close();
+    }
+  }
+
+  protected void processDeleteById(final String chain, String id) throws IOException {
+    SolrCore core = h.getCore();
+    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
+    assertNotNull("No Chain named: " + chain, pc);
+
+    SolrQueryResponse rsp = new SolrQueryResponse();
+
+    SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
+
+    DeleteUpdateCommand cmd = new DeleteUpdateCommand(req);
+    cmd.setId(id);
+    UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
+    try {
+      processor.processDelete(cmd);
+    } finally {
+      req.close();
+    }
+  }
+
+  protected void finish(final String chain) throws IOException {
+    SolrCore core = h.getCore();
+    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
+    assertNotNull("No Chain named: " + chain, pc);
+
+    SolrQueryResponse rsp = new SolrQueryResponse();
+    SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
+
+    UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
+    try {
+      processor.finish();
+    } finally {
+      IOUtils.closeQuietly(processor);
+      req.close();
+    }
+  }
+
+
+  /**
+   * Convenience method for building up SolrInputDocuments
+   */
+  final SolrInputDocument doc(SolrInputField... fields) {
+    SolrInputDocument d = new SolrInputDocument();
+    for (SolrInputField f : fields) {
+      d.put(f.getName(), f);
+    }
+    return d;
+  }
+
+  /**
+   * Convenience method for building up SolrInputFields
+   */
+  final SolrInputField field(String name, Object... values) {
+    SolrInputField f = new SolrInputField(name);
+    for (Object v : values) {
+      f.addValue(v);
+    }
+    return f;
+  }
+
+  /**
+   * Convenience method for building up SolrInputFields with default boost
+   */
+  final SolrInputField f(String name, Object... values) {
+    return field(name, values);
+  }
+}


[02/12] lucene-solr:master: LUCENE-2899: Add OpenNLP Analysis capabilities as a module

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/licenses/opennlp-tools-LICENSE-ASL.txt
----------------------------------------------------------------------
diff --git a/lucene/licenses/opennlp-tools-LICENSE-ASL.txt b/lucene/licenses/opennlp-tools-LICENSE-ASL.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/lucene/licenses/opennlp-tools-LICENSE-ASL.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/licenses/opennlp-tools-NOTICE.txt
----------------------------------------------------------------------
diff --git a/lucene/licenses/opennlp-tools-NOTICE.txt b/lucene/licenses/opennlp-tools-NOTICE.txt
new file mode 100644
index 0000000..68a08dc
--- /dev/null
+++ b/lucene/licenses/opennlp-tools-NOTICE.txt
@@ -0,0 +1,6 @@
+
+Apache OpenNLP Tools
+Copyright 2015 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/module-build.xml
----------------------------------------------------------------------
diff --git a/lucene/module-build.xml b/lucene/module-build.xml
index d48ae37..c2159b6 100644
--- a/lucene/module-build.xml
+++ b/lucene/module-build.xml
@@ -285,6 +285,28 @@
     <property name="analyzers-icu-javadocs.uptodate" value="true"/>
   </target>
 
+  <property name="analyzers-opennlp.jar" value="${common.dir}/build/analysis/opennlp/lucene-analyzers-opennlp-${version}.jar"/>
+  <target name="check-analyzers-opennlp-uptodate" unless="analyzers-opennlp.uptodate">
+    <module-uptodate name="analysis/opennlp" jarfile="${analyzers-opennlp.jar}" property="analyzers-opennlp.uptodate"/>
+  </target>
+  <target name="jar-analyzers-opennlp" unless="analyzers-opennlp.uptodate" depends="check-analyzers-opennlp-uptodate">
+    <ant dir="${common.dir}/analysis/opennlp" target="jar-core" inheritAll="false">
+      <propertyset refid="uptodate.and.compiled.properties"/>
+    </ant>
+    <property name="analyzers-opennlp.uptodate" value="true"/>
+  </target>
+
+  <property name="analyzers-opennlp-javadoc.jar" value="${common.dir}/build/analysis/opennlp/lucene-analyzers-opennlp-${version}-javadoc.jar"/>
+  <target name="check-analyzers-opennlp-javadocs-uptodate" unless="analyzers-opennlp-javadocs.uptodate">
+    <module-uptodate name="analysis/opennlp" jarfile="${analyzers-opennlp-javadoc.jar}" property="analyzers-opennlp-javadocs.uptodate"/>
+  </target>
+  <target name="javadocs-analyzers-opennlp" unless="analyzers-opennlp-javadocs.uptodate" depends="check-analyzers-opennlp-javadocs-uptodate">
+    <ant dir="${common.dir}/analysis/opennlp" target="javadocs" inheritAll="false">
+      <propertyset refid="uptodate.and.compiled.properties"/>
+    </ant>
+    <property name="analyzers-opennlp-javadocs.uptodate" value="true"/>
+  </target>
+
   <property name="analyzers-phonetic.jar" value="${common.dir}/build/analysis/phonetic/lucene-analyzers-phonetic-${version}.jar"/>
   <target name="check-analyzers-phonetic-uptodate" unless="analyzers-phonetic.uptodate">
     <module-uptodate name="analysis/phonetic" jarfile="${analyzers-phonetic.jar}" property="analyzers-phonetic.uptodate"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
index 070eab2..3e1e375 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
@@ -41,6 +41,7 @@ import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.AttributeReflector;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
@@ -127,7 +128,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
   //     lastStartOffset)
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
                                                int posLengths[], Integer finalOffset, Integer finalPosInc, boolean[] keywordAtts,
-                                               boolean offsetsAreCorrect) throws IOException {
+                                               boolean offsetsAreCorrect, byte[][] payloads) throws IOException {
     assertNotNull(output);
     CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
     
@@ -166,6 +167,12 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
       assertTrue("has no KeywordAttribute", ts.hasAttribute(KeywordAttribute.class));
       keywordAtt = ts.getAttribute(KeywordAttribute.class);
     }
+
+    PayloadAttribute payloadAtt = null;
+    if (payloads != null) {
+      assertTrue("has no PayloadAttribute", ts.hasAttribute(PayloadAttribute.class));
+      payloadAtt = ts.getAttribute(PayloadAttribute.class);
+    }
     
     // Maps position to the start/end offset:
     final Map<Integer,Integer> posToStartOffset = new HashMap<>();
@@ -185,6 +192,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
       if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
       if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
       if (keywordAtt != null) keywordAtt.setKeyword((i&1) == 0);
+      if (payloadAtt != null) payloadAtt.setPayload(new BytesRef(new byte[] { 0x00, -0x21, 0x12, -0x43, 0x24 }));
       
       checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
       assertTrue("token "+i+" does not exist", ts.incrementToken());
@@ -209,7 +217,14 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
       if (keywordAtts != null) {
         assertEquals("keywordAtt " + i + " term=" + termAtt, keywordAtts[i], keywordAtt.isKeyword());
       }
-      
+      if (payloads != null) {
+        if (payloads[i] != null) {
+          assertEquals("payloads " + i, new BytesRef(payloads[i]), payloadAtt.getPayload());
+        } else {
+          assertNull("payloads " + i, payloads[i]);
+        }
+      }
+
       // we can enforce some basic things about a few attributes even if the caller doesn't check:
       if (offsetAtt != null) {
         final int startOffset = offsetAtt.startOffset();
@@ -283,7 +298,9 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
     if (typeAtt != null) typeAtt.setType("bogusType");
     if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
     if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
-    
+    if (keywordAtt != null) keywordAtt.setKeyword(true);
+    if (payloadAtt != null) payloadAtt.setPayload(new BytesRef(new byte[] { 0x00, -0x21, 0x12, -0x43, 0x24 }));
+
     checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
 
     ts.end();
@@ -305,7 +322,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
                                                int posLengths[], Integer finalOffset, boolean[] keywordAtts,
                                                boolean offsetsAreCorrect) throws IOException {
-    assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, null, offsetsAreCorrect);
+    assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, keywordAtts, offsetsAreCorrect, null);
   }
 
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset, boolean offsetsAreCorrect) throws IOException {
@@ -373,7 +390,12 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
     checkAnalysisConsistency(random(), a, true, input, offsetsAreCorrect);
     assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length(), offsetsAreCorrect);
   }
-  
+
+  public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], boolean offsetsAreCorrect, byte[][] payloads) throws IOException {
+    checkResetException(a, input);
+    assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length(), null, null, offsetsAreCorrect, payloads);
+  }
+
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException {
     assertAnalyzesTo(a, input, output, null, null, null, null, null);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index a3f6f75..e60262d 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -53,6 +53,13 @@ New Features
 ----------------------
 * SOLR-11285: Simulation framework for autoscaling. (ab)
 
+* LUCENE-2899: In the Solr analysis-extras contrib, added support for the
+  OpenNLP-based analysis components in the Lucene analysis/opennlp module:
+  tokenization, part-of-speech tagging, phrase chunking, and lemmatization.
+  Also added OpenNLP-based named entity extraction as a Solr update request
+  processor.  (Lance Norskog, Grant Ingersoll, Joern Kottmann, Em, Kai Gülzau,
+  Rene Nederhand, Robert Muir, Steven Bower, Steve Rowe)
+
 Optimizations
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/contrib/analysis-extras/README.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/README.txt b/solr/contrib/analysis-extras/README.txt
index 3863420..fb8381a 100644
--- a/solr/contrib/analysis-extras/README.txt
+++ b/solr/contrib/analysis-extras/README.txt
@@ -1,8 +1,10 @@
 The analysis-extras plugin provides additional analyzers that rely
 upon large dependencies/dictionaries.
 
-It includes integration with ICU for multilingual support, and 
-analyzers for Chinese and Polish.
+It includes integration with ICU for multilingual support,
+analyzers for Chinese and Polish, and integration with
+OpenNLP for multilingual tokenization, part-of-speech tagging
+lemmatization, phrase chunking, and named-entity recognition.
 
 ICU relies upon lucene-libs/lucene-analyzers-icu-X.Y.jar
 and lib/icu4j-X.Y.jar
@@ -13,4 +15,6 @@ Stempel relies on lucene-libs/lucene-analyzers-stempel-X.Y.jar
 
 Morfologik relies on lucene-libs/lucene-analyzers-morfologik-X.Y.jar
 and lib/morfologik-*.jar
- 
+
+OpenNLP relies on lucene-libs/lucene-analyzers-opennlp-X.Y.jar
+and lib/opennlp-*.jar

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/contrib/analysis-extras/build.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/build.xml b/solr/contrib/analysis-extras/build.xml
index 38d67dd..68a88ad 100644
--- a/solr/contrib/analysis-extras/build.xml
+++ b/solr/contrib/analysis-extras/build.xml
@@ -7,9 +7,9 @@
     The ASF licenses this file to You under the Apache License, Version 2.0
     the "License"); you may not use this file except in compliance with
     the License.  You may obtain a copy of the License at
- 
+
         http://www.apache.org/licenses/LICENSE-2.0
- 
+
     Unless required by applicable law or agreed to in writing, software
     distributed under the License is distributed on an "AS IS" BASIS,
     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -24,19 +24,20 @@
   </description>
 
   <import file="../contrib-build.xml"/>
-  
+
   <target name="compile-test" depends="-compile-test-lucene-analysis,common-solr.compile-test"/>
 
   <path id="analysis.extras.lucene.libs">
     <pathelement location="${analyzers-icu.jar}"/>
-    <!-- 
-      Although the smartcn, stempel, and morfologik jars are not dependencies of
+    <!--
+      Although the smartcn, stempel, morfologik and opennlp jars are not dependencies of
       code in the analysis-extras contrib, they must remain here in order to
       populate the Solr distribution
      -->
     <pathelement location="${analyzers-smartcn.jar}"/>
     <pathelement location="${analyzers-stempel.jar}"/>
     <pathelement location="${analyzers-morfologik.jar}"/>
+    <pathelement location="${analyzers-opennlp.jar}"/>
   </path>
 
   <path id="classpath">
@@ -53,12 +54,12 @@
     </dirset>
   </path>
 
-  <!-- 
-    Although the smartcn, stempel, and morfologik jars are not dependencies of
+  <!--
+    Although the smartcn, stempel, morfologik and opennlp jars are not dependencies of
     code in the analysis-extras contrib, they must remain here in order to
     populate the Solr distribution
    -->
-  <target name="module-jars-to-solr" 
+  <target name="module-jars-to-solr"
           depends="-module-jars-to-solr-not-for-package,-module-jars-to-solr-package"/>
   <target name="-module-jars-to-solr-not-for-package" unless="called.from.create-package">
     <antcall inheritall="true">
@@ -66,6 +67,7 @@
       <target name="jar-analyzers-smartcn"/>
       <target name="jar-analyzers-stempel"/>
       <target name="jar-analyzers-morfologik"/>
+      <target name="jar-analyzers-opennlp"/>
     </antcall>
     <property name="analyzers-icu.uptodate" value="true"/> <!-- compile-time dependency -->
     <mkdir dir="${build.dir}/lucene-libs"/>
@@ -85,6 +87,6 @@
     </copy>
   </target>
 
-  <target name="compile-core" depends="jar-analyzers-icu, solr-contrib-build.compile-core"/>
+  <target name="compile-core" depends="jar-analyzers-icu, jar-analyzers-opennlp, solr-contrib-build.compile-core"/>
   <target name="dist" depends="module-jars-to-solr, common-solr.dist"/>
 </project>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/contrib/analysis-extras/ivy.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/ivy.xml b/solr/contrib/analysis-extras/ivy.xml
index 0c71701..cfc30c1 100644
--- a/solr/contrib/analysis-extras/ivy.xml
+++ b/solr/contrib/analysis-extras/ivy.xml
@@ -24,6 +24,9 @@
   </configurations>
   <dependencies>
     <dependency org="com.ibm.icu" name="icu4j" rev="${/com.ibm.icu/icu4j}" conf="compile"/>
+    <dependency org="org.apache.opennlp" name="opennlp-tools" rev="${/org.apache.opennlp/opennlp-tools}" conf="compile" />
+    <dependency org="org.apache.opennlp" name="opennlp-maxent" rev="${/org.apache.opennlp/opennlp-maxent}" conf="compile" />
+
     <!--
       Although the 3rd party morfologik jars are not dependencies of code in
       the analysis-extras contrib, they must remain here in order to

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java b/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
new file mode 100644
index 0000000..d00df2b
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
@@ -0,0 +1,571 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import opennlp.tools.util.Span;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.OpenNLPTokenizer;
+import org.apache.lucene.analysis.opennlp.tools.NLPNERTaggerOp;
+import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrInputField;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.Pair;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.update.processor.FieldMutatingUpdateProcessor.FieldNameSelector;
+import org.apache.solr.update.processor.FieldMutatingUpdateProcessorFactory.SelectorParams;
+import org.apache.solr.util.plugin.SolrCoreAware;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Extracts named entities using an OpenNLP NER <code>modelFile</code> from the values found in
+ * any matching <code>source</code> field into a configured <code>dest</code> field, after
+ * first tokenizing the source text using the index analyzer on the configured
+ * <code>analyzerFieldType</code>, which must include <code>solr.OpenNLPTokenizerFactory</code>
+ * as the tokenizer. E.g.:
+ *
+ * <pre class="prettyprint">
+ *   &lt;fieldType name="opennlp-en-tokenization" class="solr.TextField"&gt;
+ *     &lt;analyzer&gt;
+ *       &lt;tokenizer class="solr.OpenNLPTokenizerFactory"
+ *                  sentenceModel="en-sent.bin"
+ *                  tokenizerModel="en-tokenizer.bin"/&gt;
+ *     &lt;/analyzer&gt;
+ *   &lt;/fieldType&gt;
+ * </pre>
+ * 
+ * <p>See the <a href="OpenNLP website">http://opennlp.apache.org/models.html</a>
+ * for information on downloading pre-trained models.</p>
+ *
+ * <p>
+ * The <code>source</code> field(s) can be configured as either:
+ * </p>
+ * <ul>
+ *  <li>One or more <code>&lt;str&gt;</code></li>
+ *  <li>An <code>&lt;arr&gt;</code> of <code>&lt;str&gt;</code></li>
+ *  <li>A <code>&lt;lst&gt;</code> containing
+ *   {@link FieldMutatingUpdateProcessor FieldMutatingUpdateProcessorFactory style selector arguments}</li>
+ * </ul>
+ *
+ * <p>The <code>dest</code> field can be a single <code>&lt;str&gt;</code>
+ * containing the literal name of a destination field, or it may be a <code>&lt;lst&gt;</code> specifying a
+ * regex <code>pattern</code> and a <code>replacement</code> string. If the pattern + replacement option
+ * is used the pattern will be matched against all fields matched by the source selector, and the replacement
+ * string (including any capture groups specified from the pattern) will be evaluated a using
+ * {@link Matcher#replaceAll(String)} to generate the literal name of the destination field.  Additionally,
+ * an occurrence of the string "{EntityType}" in the <code>dest</code> field specification, or in the
+ * <code>replacement</code> string, will be replaced with the entity type(s) returned for each entity by
+ * the OpenNLP NER model; as a result, if the model extracts more than one entity type, then more than one
+ * <code>dest</code> field will be populated.
+ * </p>
+ *
+ * <p>If the resolved <code>dest</code> field already exists in the document, then the
+ * named entities extracted from the <code>source</code> fields will be added to it.
+ * </p>
+ * <p>
+ * In the example below:
+ * </p>
+ * <ul>
+ *   <li>Named entities will be extracted from the <code>text</code> field and added
+ *       to the <code>names_ss</code> field</li>
+ *   <li>Named entities will be extracted from both the <code>title</code> and
+ *       <code>subtitle</code> fields and added into the <code>titular_people</code> field</li>
+ *   <li>Named entities will be extracted from any field with a name ending in <code>_txt</code>
+ *       -- except for <code>notes_txt</code> -- and added into the <code>people_ss</code> field</li>
+ *   <li>Named entities will be extracted from any field with a name beginning with "desc" and
+ *       ending in "s" (e.g. "descs" and "descriptions") and added to a field prefixed with "key_",
+ *       not ending in "s", and suffixed with "_people". (e.g. "key_desc_people" or
+ *       "key_description_people")</li>
+ *   <li>Named entities will be extracted from the <code>summary</code> field and added
+ *       to the <code>summary_person_ss</code> field, assuming that the modelFile only extracts
+ *       entities of type "person".</li>
+ * </ul>
+ *
+ * <pre class="prettyprint">
+ * &lt;updateRequestProcessorChain name="multiple-extract"&gt;
+ *   &lt;processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"&gt;
+ *     &lt;str name="modelFile"&gt;en-test-ner-person.bin&lt;/str&gt;
+ *     &lt;str name="analyzerFieldType"&gt;opennlp-en-tokenization&lt;/str&gt;
+ *     &lt;str name="source"&gt;text&lt;/str&gt;
+ *     &lt;str name="dest"&gt;people_s&lt;/str&gt;
+ *   &lt;/processor&gt;
+ *   &lt;processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"&gt;
+ *     &lt;str name="modelFile"&gt;en-test-ner-person.bin&lt;/str&gt;
+ *     &lt;str name="analyzerFieldType"&gt;opennlp-en-tokenization&lt;/str&gt;
+ *     &lt;arr name="source"&gt;
+ *       &lt;str&gt;title&lt;/str&gt;
+ *       &lt;str&gt;subtitle&lt;/str&gt;
+ *     &lt;/arr&gt;
+ *     &lt;str name="dest"&gt;titular_people&lt;/str&gt;
+ *   &lt;/processor&gt;
+ *   &lt;processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"&gt;
+ *     &lt;str name="modelFile"&gt;en-test-ner-person.bin&lt;/str&gt;
+ *     &lt;str name="analyzerFieldType"&gt;opennlp-en-tokenization&lt;/str&gt;
+ *     &lt;lst name="source"&gt;
+ *       &lt;str name="fieldRegex"&gt;.*_txt$&lt;/str&gt;
+ *       &lt;lst name="exclude"&gt;
+ *         &lt;str name="fieldName"&gt;notes_txt&lt;/str&gt;
+ *       &lt;/lst&gt;
+ *     &lt;/lst&gt;
+ *     &lt;str name="dest"&gt;people_s&lt;/str&gt;
+ *   &lt;/processor&gt;
+ *   &lt;processor class="solr.processor.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"&gt;
+ *     &lt;str name="modelFile"&gt;en-test-ner-person.bin&lt;/str&gt;
+ *     &lt;str name="analyzerFieldType"&gt;opennlp-en-tokenization&lt;/str&gt;
+ *     &lt;lst name="source"&gt;
+ *       &lt;str name="fieldRegex"&gt;^desc(.*)s$&lt;/str&gt;
+ *     &lt;/lst&gt;
+ *     &lt;lst name="dest"&gt;
+ *       &lt;str name="pattern"&gt;^desc(.*)s$&lt;/str&gt;
+ *       &lt;str name="replacement"&gt;key_desc$1_people&lt;/str&gt;
+ *     &lt;/lst&gt;
+ *   &lt;/processor&gt;
+ *   &lt;processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"&gt;
+ *     &lt;str name="modelFile"&gt;en-test-ner-person.bin&lt;/str&gt;
+ *     &lt;str name="analyzerFieldType"&gt;opennlp-en-tokenization&lt;/str&gt;
+ *     &lt;str name="source"&gt;summary&lt;/str&gt;
+ *     &lt;str name="dest"&gt;summary_{EntityType}_s&lt;/str&gt;
+ *   &lt;/processor&gt;
+ * &lt;/updateRequestProcessorChain&gt;
+ * </pre>
+ *
+ * @since 7.3.0
+ */
+public class OpenNLPExtractNamedEntitiesUpdateProcessorFactory
+    extends UpdateRequestProcessorFactory implements SolrCoreAware {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  public static final String SOURCE_PARAM = "source";
+  public static final String DEST_PARAM = "dest";
+  public static final String PATTERN_PARAM = "pattern";
+  public static final String REPLACEMENT_PARAM = "replacement";
+  public static final String MODEL_PARAM = "modelFile";
+  public static final String ANALYZER_FIELD_TYPE_PARAM = "analyzerFieldType";
+  public static final String ENTITY_TYPE = "{EntityType}";
+
+  private SelectorParams srcInclusions = new SelectorParams();
+  private Collection<SelectorParams> srcExclusions = new ArrayList<>();
+
+  private FieldNameSelector srcSelector = null;
+
+  private String modelFile = null;
+  private String analyzerFieldType = null;
+
+  /**
+   * If pattern is null, this this is a literal field name.  If pattern is non-null then this
+   * is a replacement string that may contain meta-characters (ie: capture group identifiers)
+   * @see #pattern
+   */
+  private String dest = null;
+  /** @see #dest */
+  private Pattern pattern = null;
+
+  protected final FieldNameSelector getSourceSelector() {
+    if (null != srcSelector) return srcSelector;
+
+    throw new SolrException(SERVER_ERROR, "selector was never initialized, inform(SolrCore) never called???");
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public void init(NamedList args) {
+
+    // high level (loose) check for which type of config we have.
+    //
+    // individual init methods do more strict syntax checking
+    if (0 <= args.indexOf(SOURCE_PARAM, 0) && 0 <= args.indexOf(DEST_PARAM, 0) ) {
+      initSourceSelectorSyntax(args);
+    } else if (0 <= args.indexOf(PATTERN_PARAM, 0) && 0 <= args.indexOf(REPLACEMENT_PARAM, 0)) {
+      initSimpleRegexReplacement(args);
+    } else {
+      throw new SolrException(SERVER_ERROR, "A combination of either '" + SOURCE_PARAM + "' + '"+
+          DEST_PARAM + "', or '" + REPLACEMENT_PARAM + "' + '" +
+          PATTERN_PARAM + "' init params are mandatory");
+    }
+
+    Object modelParam = args.remove(MODEL_PARAM);
+    if (null == modelParam) {
+      throw new SolrException(SERVER_ERROR, "Missing required init param '" + MODEL_PARAM + "'");
+    }
+    if ( ! (modelParam instanceof CharSequence)) {
+      throw new SolrException(SERVER_ERROR, "Init param '" + MODEL_PARAM + "' must be a <str>");
+    }
+    modelFile = modelParam.toString();
+
+    Object analyzerFieldTypeParam = args.remove(ANALYZER_FIELD_TYPE_PARAM);
+    if (null == analyzerFieldTypeParam) {
+      throw new SolrException(SERVER_ERROR, "Missing required init param '" + ANALYZER_FIELD_TYPE_PARAM + "'");
+    }
+    if ( ! (analyzerFieldTypeParam instanceof CharSequence)) {
+      throw new SolrException(SERVER_ERROR, "Init param '" + ANALYZER_FIELD_TYPE_PARAM + "' must be a <str>");
+    }
+    analyzerFieldType = analyzerFieldTypeParam.toString();
+
+    if (0 < args.size()) {
+      throw new SolrException(SERVER_ERROR, "Unexpected init param(s): '" + args.getName(0) + "'");
+    }
+
+    super.init(args);
+  }
+
+  /**
+   * init helper method that should only be called when we know for certain that both the
+   * "source" and "dest" init params do <em>not</em> exist.
+   */
+  @SuppressWarnings("unchecked")
+  private void initSimpleRegexReplacement(NamedList args) {
+    // The syntactic sugar for the case where there is only one regex pattern for source and the same pattern
+    // is used for the destination pattern...
+    //
+    //  pattern != null && replacement != null
+    //
+    // ...as top level elements, with no other config options specified
+
+    // if we got here we know we had pattern and replacement, now check for the other two  so that we can give a better
+    // message than "unexpected"
+    if (0 <= args.indexOf(SOURCE_PARAM, 0) || 0 <= args.indexOf(DEST_PARAM, 0) ) {
+      throw new SolrException(SERVER_ERROR,"Short hand syntax must not be mixed with full syntax. Found " +
+          PATTERN_PARAM + " and " + REPLACEMENT_PARAM + " but also found " + SOURCE_PARAM + " or " + DEST_PARAM);
+    }
+
+    assert args.indexOf(SOURCE_PARAM, 0) < 0;
+
+    Object patt = args.remove(PATTERN_PARAM);
+    Object replacement = args.remove(REPLACEMENT_PARAM);
+
+    if (null == patt || null == replacement) {
+      throw new SolrException(SERVER_ERROR, "Init params '" + PATTERN_PARAM + "' and '" +
+          REPLACEMENT_PARAM + "' are both mandatory if '" + SOURCE_PARAM + "' and '"+
+          DEST_PARAM + "' are not both specified");
+    }
+
+    if (0 != args.size()) {
+      throw new SolrException(SERVER_ERROR, "Init params '" + REPLACEMENT_PARAM + "' and '" +
+          PATTERN_PARAM + "' must be children of '" + DEST_PARAM +
+          "' to be combined with other options.");
+    }
+
+    if (!(replacement instanceof String)) {
+      throw new SolrException(SERVER_ERROR, "Init param '" + REPLACEMENT_PARAM + "' must be a string (i.e. <str>)");
+    }
+    if (!(patt instanceof String)) {
+      throw new SolrException(SERVER_ERROR, "Init param '" + PATTERN_PARAM + "' must be a string (i.e. <str>)");
+    }
+
+    dest = replacement.toString();
+    try {
+      this.pattern = Pattern.compile(patt.toString());
+    } catch (PatternSyntaxException pe) {
+      throw new SolrException(SERVER_ERROR, "Init param " + PATTERN_PARAM +
+          " is not a valid regex pattern: " + patt, pe);
+
+    }
+    srcInclusions = new SelectorParams();
+    srcInclusions.fieldRegex = Collections.singletonList(this.pattern);
+  }
+
+  /**
+   * init helper method that should only be called when we know for certain that both the
+   * "source" and "dest" init params <em>do</em> exist.
+   */
+  @SuppressWarnings("unchecked")
+  private void initSourceSelectorSyntax(NamedList args) {
+    // Full and complete syntax where source and dest are mandatory.
+    //
+    // source may be a single string or a selector.
+    // dest may be a single string or list containing pattern and replacement
+    //
+    //   source != null && dest != null
+
+    // if we got here we know we had source and dest, now check for the other two so that we can give a better
+    // message than "unexpected"
+    if (0 <= args.indexOf(PATTERN_PARAM, 0) || 0 <= args.indexOf(REPLACEMENT_PARAM, 0) ) {
+      throw new SolrException(SERVER_ERROR,"Short hand syntax must not be mixed with full syntax. Found " +
+          SOURCE_PARAM + " and " + DEST_PARAM + " but also found " + PATTERN_PARAM + " or " + REPLACEMENT_PARAM);
+    }
+
+    Object d = args.remove(DEST_PARAM);
+    assert null != d;
+
+    List<Object> sources = args.getAll(SOURCE_PARAM);
+    assert null != sources;
+
+    if (1 == sources.size()) {
+      if (sources.get(0) instanceof NamedList) {
+        // nested set of selector options
+        NamedList selectorConfig = (NamedList) args.remove(SOURCE_PARAM);
+
+        srcInclusions = parseSelectorParams(selectorConfig);
+
+        List<Object> excList = selectorConfig.getAll("exclude");
+
+        for (Object excObj : excList) {
+          if (null == excObj) {
+            throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM +
+                "' child 'exclude' can not be null");
+          }
+          if (!(excObj instanceof NamedList)) {
+            throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM +
+                "' child 'exclude' must be <lst/>");
+          }
+          NamedList exc = (NamedList) excObj;
+          srcExclusions.add(parseSelectorParams(exc));
+          if (0 < exc.size()) {
+            throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM +
+                "' has unexpected 'exclude' sub-param(s): '"
+                + selectorConfig.getName(0) + "'");
+          }
+          // call once per instance
+          selectorConfig.remove("exclude");
+        }
+
+        if (0 < selectorConfig.size()) {
+          throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM +
+              "' contains unexpected child param(s): '" +
+              selectorConfig.getName(0) + "'");
+        }
+        // consume from the named list so it doesn't interfere with subsequent processing
+        sources.remove(0);
+      }
+    }
+    if (1 <= sources.size()) {
+      // source better be one or more strings
+      srcInclusions.fieldName = new HashSet<>(args.removeConfigArgs("source"));
+    }
+    if (srcInclusions == null) {
+      throw new SolrException(SERVER_ERROR,
+          "Init params do not specify any field from which to extract entities, please supply either "
+          + SOURCE_PARAM + " and " + DEST_PARAM + " or " + PATTERN_PARAM + " and " + REPLACEMENT_PARAM + ". See javadocs" +
+          "for OpenNLPExtractNamedEntitiesUpdateProcessor for further details.");
+    }
+
+    if (d instanceof NamedList) {
+      NamedList destList = (NamedList) d;
+
+      Object patt = destList.remove(PATTERN_PARAM);
+      Object replacement = destList.remove(REPLACEMENT_PARAM);
+
+      if (null == patt || null == replacement) {
+        throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' children '" +
+            PATTERN_PARAM + "' and '" + REPLACEMENT_PARAM +
+            "' are both mandatory and can not be null");
+      }
+      if (! (patt instanceof String && replacement instanceof String)) {
+        throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' children '" +
+            PATTERN_PARAM + "' and '" + REPLACEMENT_PARAM +
+            "' must both be strings (i.e. <str>)");
+      }
+      if (0 != destList.size()) {
+        throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' has unexpected children: '"
+            + destList.getName(0) + "'");
+      }
+
+      try {
+        this.pattern = Pattern.compile(patt.toString());
+      } catch (PatternSyntaxException pe) {
+        throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' child '" + PATTERN_PARAM +
+            " is not a valid regex pattern: " + patt, pe);
+      }
+      dest = replacement.toString();
+
+    } else if (d instanceof String) {
+      dest = d.toString();
+    } else {
+      throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' must either be a string " +
+          "(i.e. <str>) or a list (i.e. <lst>) containing '" +
+          PATTERN_PARAM + "' and '" + REPLACEMENT_PARAM);
+    }
+
+  }
+
+  @Override
+  public void inform(final SolrCore core) {
+
+    srcSelector =
+        FieldMutatingUpdateProcessor.createFieldNameSelector
+            (core.getResourceLoader(), core, srcInclusions, FieldMutatingUpdateProcessor.SELECT_NO_FIELDS);
+
+    for (SelectorParams exc : srcExclusions) {
+      srcSelector = FieldMutatingUpdateProcessor.wrap
+          (srcSelector,
+              FieldMutatingUpdateProcessor.createFieldNameSelector
+                  (core.getResourceLoader(), core, exc, FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
+    }
+    try {
+      OpenNLPOpsFactory.getNERTaggerModel(modelFile, core.getResourceLoader());
+    } catch (IOException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+
+  @Override
+  public final UpdateRequestProcessor getInstance
+      (SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
+    final FieldNameSelector srcSelector = getSourceSelector();
+    return new UpdateRequestProcessor(next) {
+      private final NLPNERTaggerOp nerTaggerOp;
+      private Analyzer analyzer = null;
+      {
+        try {
+          nerTaggerOp = OpenNLPOpsFactory.getNERTagger(modelFile);
+          FieldType fieldType = req.getSchema().getFieldTypeByName(analyzerFieldType);
+          if (fieldType == null) {
+            throw new SolrException
+                (SERVER_ERROR, ANALYZER_FIELD_TYPE_PARAM + " '" + analyzerFieldType + "' not found in the schema.");
+          }
+          analyzer = fieldType.getIndexAnalyzer();
+        } catch (IOException e) {
+          throw new IllegalArgumentException(e);
+        }
+      }
+
+      @Override
+      public void processAdd(AddUpdateCommand cmd) throws IOException {
+
+        final SolrInputDocument doc = cmd.getSolrInputDocument();
+
+        // Destination may be regex replace string, or "{EntityType}" replaced by
+        // each entity's type, both of which can cause multiple output fields.
+        Map<String,SolrInputField> destMap = new HashMap<>();
+
+        // preserve initial values
+        for (final String fname : doc.getFieldNames()) {
+          if ( ! srcSelector.shouldMutate(fname)) continue;
+
+          Collection<Object> srcFieldValues = doc.getFieldValues(fname);
+          if (srcFieldValues == null || srcFieldValues.isEmpty()) continue;
+
+          String resolvedDest = dest;
+
+          if (pattern != null) {
+            Matcher matcher = pattern.matcher(fname);
+            if (matcher.find()) {
+              resolvedDest = matcher.replaceAll(dest);
+            } else {
+              log.debug("srcSelector.shouldMutate(\"{}\") returned true, " +
+                  "but replacement pattern did not match, field skipped.", fname);
+              continue;
+            }
+          }
+
+          for (Object val : srcFieldValues) {
+            for (Pair<String,String> entity : extractTypedNamedEntities(val)) {
+              SolrInputField destField = null;
+              String entityName = entity.first();
+              String entityType = entity.second();
+              resolvedDest = resolvedDest.replace(ENTITY_TYPE, entityType);
+              if (doc.containsKey(resolvedDest)) {
+                destField = doc.getField(resolvedDest);
+              } else {
+                SolrInputField targetField = destMap.get(resolvedDest);
+                if (targetField == null) {
+                  destField = new SolrInputField(resolvedDest);
+                } else {
+                  destField = targetField;
+                }
+              }
+              destField.addValue(entityName);
+
+              // put it in map to avoid concurrent modification...
+              destMap.put(resolvedDest, destField);
+            }
+          }
+        }
+
+        for (Map.Entry<String,SolrInputField> entry : destMap.entrySet()) {
+          doc.put(entry.getKey(), entry.getValue());
+        }
+        super.processAdd(cmd);
+      }
+
+      /** Using configured NER model, extracts (name, type) pairs from the given source field value */
+      private List<Pair<String,String>> extractTypedNamedEntities(Object srcFieldValue) throws IOException {
+        List<Pair<String,String>> entitiesWithType = new ArrayList<>();
+        List<String> terms = new ArrayList<>();
+        List<Integer> startOffsets = new ArrayList<>();
+        List<Integer> endOffsets = new ArrayList<>();
+        String fullText = srcFieldValue.toString();
+        TokenStream tokenStream = analyzer.tokenStream("", fullText);
+        CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
+        OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
+        FlagsAttribute flagsAtt = tokenStream.addAttribute(FlagsAttribute.class);
+        tokenStream.reset();
+        synchronized (nerTaggerOp) {
+          while (tokenStream.incrementToken()) {
+            terms.add(termAtt.toString());
+            startOffsets.add(offsetAtt.startOffset());
+            endOffsets.add(offsetAtt.endOffset());
+            boolean endOfSentence = 0 != (flagsAtt.getFlags() & OpenNLPTokenizer.EOS_FLAG_BIT);
+            if (endOfSentence) {    // extract named entities one sentence at a time
+              extractEntitiesFromSentence(fullText, terms, startOffsets, endOffsets, entitiesWithType);
+            }
+          }
+          tokenStream.end();
+          tokenStream.close();
+          if (!terms.isEmpty()) { // In case last token of last sentence isn't properly flagged with EOS_FLAG_BIT
+            extractEntitiesFromSentence(fullText, terms, startOffsets, endOffsets, entitiesWithType);
+          }
+          nerTaggerOp.reset();      // Forget all adaptive data collected during previous calls
+        }
+        return entitiesWithType;
+      }
+
+      private void extractEntitiesFromSentence(String fullText, List<String> terms, List<Integer> startOffsets,
+                                               List<Integer> endOffsets, List<Pair<String,String>> entitiesWithType) {
+        for (Span span : nerTaggerOp.getNames(terms.toArray(new String[terms.size()]))) {
+          String text = fullText.substring(startOffsets.get(span.getStart()), endOffsets.get(span.getEnd() - 1));
+          entitiesWithType.add(new Pair<>(text, span.getType()));
+        }
+        terms.clear();
+        startOffsets.clear();
+        endOffsets.clear();
+      }
+    };
+  }
+
+  /** macro */
+  private static SelectorParams parseSelectorParams(NamedList args) {
+    return FieldMutatingUpdateProcessorFactory.parseSelectorParams(args);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/package.html
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/package.html b/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/package.html
new file mode 100644
index 0000000..1388c29
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/package.html
@@ -0,0 +1,24 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- not a package-info.java, because we already defined this package in core/ -->
+<html>
+  <body>
+    Update request processor invoking OpenNLP Named Entity Recognition over configured
+    source field(s), populating configured target field(s) with the results.
+  </body>
+</html>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-ner-person.bin
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-ner-person.bin b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-ner-person.bin
new file mode 100644
index 0000000..0b40aac
Binary files /dev/null and b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-ner-person.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-sent.bin
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-sent.bin b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-sent.bin
new file mode 100644
index 0000000..4252bcb
Binary files /dev/null and b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-sent.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin
new file mode 100644
index 0000000..94668c0
Binary files /dev/null and b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml
new file mode 100644
index 0000000..fc13431
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml
@@ -0,0 +1,49 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="test-opennlp-extract" version="1.6">
+  <fieldType name="opennlp-en-tokenization" class="solr.TextField">
+    <analyzer>
+      <tokenizer class="solr.OpenNLPTokenizerFactory"
+                 sentenceModel="en-test-sent.bin"
+                 tokenizerModel="en-test-tokenizer.bin"/>
+    </analyzer>
+  </fieldType>
+
+  <fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
+
+  <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+    <analyzer>
+      <tokenizer class="solr.MockTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+      <filter class="solr.PorterStemFilterFactory"/>
+    </analyzer>
+  </fieldType>
+
+  <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
+  <field name="text" type="text" indexed="true" stored="false"/>
+  <field name="subject" type="text" indexed="true" stored="true"/>
+  <field name="title" type="text" indexed="true" stored="true"/>
+  <field name="subtitle" type="text" indexed="true" stored="true"/>
+  <field name="descs" type="text" indexed="true" stored="true"/>
+  <field name="descriptions" type="text" indexed="true" stored="true"/>
+
+  <dynamicField name="*_txt" type="text" indexed="true" stored="true"/>
+  <dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/>
+  <dynamicField name="*_people" type="string" indexed="true" stored="true" multiValued="true"/>
+</schema>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml
new file mode 100644
index 0000000..c44c9e1
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml
@@ -0,0 +1,206 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<config>
+  <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+  <xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+  <requestHandler name="/select" class="solr.SearchHandler"></requestHandler>
+  <requestHandler name="/update" class="solr.UpdateRequestHandler"  />
+  <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
+  <schemaFactory class="ClassicIndexSchemaFactory"/>
+
+  <updateRequestProcessorChain name="extract-single">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">source1_s</str>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-single-regex">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">source1_s</str>
+      <lst name="dest">
+        <str name="pattern">source\d(_s)</str>
+        <str name="replacement">dest$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-multi">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">source1_s</str>
+      <str name="source">source2_s</str>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-multi-regex">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">source1_s</str>
+      <str name="source">source2_s</str>
+      <lst name="dest">
+        <str name="pattern">source\d(_s)</str>
+        <str name="replacement">dest$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-array">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <arr name="source">
+        <str>source1_s</str>
+        <str>source2_s</str>
+      </arr>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-array-regex">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <arr name="source">
+        <str>source1_s</str>
+        <str>source2_s</str>
+      </arr>
+      <lst name="dest">
+        <str name="pattern">source\d(_s)</str>
+        <str name="replacement">dest$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-selector">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">source\d_.*</str>
+        <lst name="exclude">
+          <str name="fieldRegex">source0_.*</str>
+        </lst>
+      </lst>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-selector-regex">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">source\d_.*</str>
+        <lst name="exclude">
+          <str name="fieldRegex">source0_.*</str>
+        </lst>
+      </lst>
+      <lst name="dest">
+        <str name="pattern">source\d(_s)</str>
+        <str name="replacement">dest$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-regex-replaceall">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">foo.*</str>
+      </lst>
+      <lst name="dest">
+        <!-- unbounded pattern that can be replaced multiple times in field name -->
+        <str name="pattern">x(\d)</str>
+        <str name="replacement">y$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-regex-replaceall-with-entity-type">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">foo.*</str>
+      </lst>
+      <lst name="dest">
+        <!-- unbounded pattern that can be replaced multiple times in field name -->
+        <str name="pattern">x(\d)</str>
+        <str name="replacement">{EntityType}_y$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <!-- example used in OpenNLPExtractNamedEntitiesUpdateProcessorFactory javadocs -->
+  <updateRequestProcessorChain name="multiple-extract">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">text</str>
+      <str name="dest">people_s</str>
+    </processor>
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <arr name="source">
+        <str>title</str>
+        <str>subtitle</str>
+      </arr>
+      <str name="dest">titular_people</str>
+    </processor>
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">.*_txt$</str>
+        <lst name="exclude">
+          <str name="fieldName">notes_txt</str>
+        </lst>
+      </lst>
+      <str name="dest">people_s</str>
+    </processor>
+    <processor class="solr.processor.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">^desc(.*)s$</str>
+      </lst>
+      <lst name="dest">
+        <str name="pattern">^desc(.*)s$</str>
+        <str name="replacement">key_desc$1_people</str>
+      </lst>
+    </processor>
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">summary</str>
+      <str name="dest">summary_{EntityType}_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+</config>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
new file mode 100644
index 0000000..23516b0
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+A solrconfig.xml snippet containing indexConfig settings for randomized testing.
+-->
+<indexConfig>
+  <!-- this sys property is not set by SolrTestCaseJ4 because we ideally want to use
+       the RandomMergePolicy in all tests - but some tests expect very specific
+       Merge behavior, so those tests can set it as needed.
+  -->
+  <mergePolicyFactory class="${solr.tests.mergePolicyFactory:org.apache.solr.util.RandomMergePolicyFactory}" />
+
+  <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
+
+  <maxBufferedDocs>${solr.tests.maxBufferedDocs}</maxBufferedDocs>
+  <ramBufferSizeMB>${solr.tests.ramBufferSizeMB}</ramBufferSizeMB>
+
+  <mergeScheduler class="${solr.tests.mergeScheduler}" />
+
+  <writeLockTimeout>1000</writeLockTimeout>
+  <commitLockTimeout>10000</commitLockTimeout>
+
+  <!-- this sys property is not set by SolrTestCaseJ4 because almost all tests should
+       use the single process lockType for speed - but tests that explicitly need
+       to vary the lockType can set it as needed.
+  -->
+  <lockType>${solr.tests.lockType:single}</lockType>
+
+  <infoStream>${solr.tests.infostream:false}</infoStream>
+
+</indexConfig>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
new file mode 100644
index 0000000..dad06a8
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import java.io.File;
+import java.util.Arrays;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends UpdateProcessorTestBase {
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    File testHome = createTempDir().toFile();
+    FileUtils.copyDirectory(getFile("analysis-extras/solr"), testHome);
+    initCore("solrconfig-opennlp-extract.xml", "schema-opennlp-extract.xml", testHome.getAbsolutePath());
+  }
+
+  @Test
+  public void testSimpleExtract() throws Exception {
+    SolrInputDocument doc = processAdd("extract-single",
+        doc(f("id", "1"),
+            f("source1_s", "Take this to Mr. Flashman.")));
+    assertEquals("dest_s should have stringValue", "Flashman", doc.getFieldValue("dest_s"));
+  }
+
+  @Test
+  public void testMultiExtract() throws Exception {
+    SolrInputDocument doc = processAdd("extract-multi",
+        doc(f("id", "1"),
+            f("source1_s", "Hello Flashman."),
+            f("source2_s", "Calling Flashman.")));
+
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
+  }
+
+  @Test
+  public void testArrayExtract() throws Exception {
+    SolrInputDocument doc = processAdd("extract-array",
+        doc(f("id", "1"),
+            f("source1_s", "Currently we have Flashman. Not much else."),
+            f("source2_s", "Flashman. Is. Not. There.")));
+
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
+  }
+
+  @Test
+  public void testSelectorExtract() throws Exception {
+    SolrInputDocument doc = processAdd("extract-selector",
+        doc(f("id", "1"),
+            f("source0_s", "Flashman. Or not."),
+            f("source1_s", "Serendipitously, he was. I mean, Flashman. And yet."),
+            f("source2_s", "Correct, Flashman.")));
+
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
+  }
+
+  public void testMultipleExtracts() throws Exception {
+    // test example from the javadocs
+    SolrInputDocument doc = processAdd("multiple-extract",
+        doc(f("id", "1"),
+            f("text", "From Flashman. To Panman."),
+            f("title", "It's Captain Flashman.", "Privately, Flashman."),
+            f("subtitle", "Ineluctably, Flashman."),
+            f("corrolary_txt", "Forsooth thou bringeth Flashman."),
+            f("notes_txt", "Yes Flashman."),
+            f("summary", "Many aspire to be Flashman."),
+            f("descs", "Courage, Flashman.", "Ain't he Flashman."),
+            f("descriptions", "Flashman. Flashman. Flashman.")));
+
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("people_s"));
+    assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("titular_people"));
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("key_desc_people"));
+    assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("key_description_people"));
+    assertEquals("Flashman", doc.getFieldValue("summary_person_s")); // {EntityType} field name interpolation
+  }
+
+  public void testEquivalentExtraction() throws Exception {
+    SolrInputDocument d;
+
+    // regardless of chain, all of these checks should be equivalent
+    for (String chain : Arrays.asList("extract-single", "extract-single-regex",
+        "extract-multi", "extract-multi-regex",
+        "extract-array", "extract-array-regex",
+        "extract-selector", "extract-selector-regex")) {
+
+      // simple extract
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              f("source0_s", "Totally Flashman."), // not extracted
+              f("source1_s", "One nation under Flashman.", "Good Flashman.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("Flashman", "Flashman"), d.getFieldValues("dest_s"));
+
+      // append to existing values
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              field("dest_s", "orig1", "orig2"),
+              f("source0_s", "Flashman. In totality."), // not extracted
+              f("source1_s", "Two nations under Flashman.", "Meh Flashman.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+    }
+
+    // should be equivalent for any chain matching source1_s and source2_s (but not source0_s)
+    for (String chain : Arrays.asList("extract-multi", "extract-multi-regex",
+        "extract-array", "extract-array-regex",
+        "extract-selector", "extract-selector-regex")) {
+
+      // simple extract
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              f("source0_s", "Not Flashman."), // not extracted
+              f("source1_s", "Could have had a Flashman.", "Bad Flashman."),
+              f("source2_s", "Indubitably Flashman.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("Flashman", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+
+      // append to existing values
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              field("dest_s", "orig1", "orig2"),
+              f("source0_s", "Never Flashman."), // not extracted
+              f("source1_s", "Seeking Flashman.", "Evil incarnate Flashman."),
+              f("source2_s", "Perfunctorily Flashman.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+    }
+
+    // any chain that copies source1_s to dest_s should be equivalent for these assertions
+    for (String chain : Arrays.asList("extract-single", "extract-single-regex",
+        "extract-multi", "extract-multi-regex",
+        "extract-array", "extract-array-regex",
+        "extract-selector", "extract-selector-regex")) {
+
+      // simple extract
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              f("source1_s", "Always Flashman.", "Flashman. Noone else.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("Flashman", "Flashman"), d.getFieldValues("dest_s"));
+
+      // append to existing values
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              field("dest_s", "orig1", "orig2"),
+              f("source1_s", "Flashman.  And, scene.", "Contemporary Flashman. Yeesh.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+    }
+  }
+
+  public void testExtractFieldRegexReplaceAll() throws Exception {
+    SolrInputDocument d = processAdd("extract-regex-replaceall",
+        doc(f("id", "1111"),
+            f("foo_x2_s", "Infrequently Flashman.", "In the words of Flashman."),
+            f("foo_x3_x7_s", "Flashman. Whoa.")));
+
+    assertNotNull(d);
+    assertEquals(Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_y2_s"));
+    assertEquals("Flashman", d.getFieldValue("foo_y3_y7_s"));
+  }
+
+  public void testExtractFieldRegexReplaceAllWithEntityType() throws Exception {
+    SolrInputDocument d = processAdd("extract-regex-replaceall-with-entity-type",
+        doc(f("id", "1111"),
+            f("foo_x2_s", "Infrequently Flashman.", "In the words of Flashman."),
+            f("foo_x3_x7_s", "Flashman. Whoa.")));
+
+    assertNotNull(d);
+    assertEquals(d.getFieldNames().toString(), Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_person_y2_s"));
+    assertEquals(d.getFieldNames().toString(),"Flashman", d.getFieldValue("foo_person_y3_person_y7_s"));
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/core/src/test/org/apache/solr/update/processor/UpdateProcessorTestBase.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/update/processor/UpdateProcessorTestBase.java b/solr/core/src/test/org/apache/solr/update/processor/UpdateProcessorTestBase.java
deleted file mode 100644
index d3aa979..0000000
--- a/solr/core/src/test/org/apache/solr/update/processor/UpdateProcessorTestBase.java
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.update.processor;
-
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.IOUtils;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.SolrInputField;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.request.SolrRequestInfo;
-import org.apache.solr.request.LocalSolrQueryRequest;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.update.AddUpdateCommand;
-import org.apache.solr.update.CommitUpdateCommand;
-import org.apache.solr.update.DeleteUpdateCommand;
-
-import java.io.IOException;
-
-public class UpdateProcessorTestBase extends SolrTestCaseJ4 {
-
-  /**
-   * Runs a document through the specified chain, and returns the final
-   * document used when the chain is completed (NOTE: some chains may
-   * modify the document in place
-   */
-  protected SolrInputDocument processAdd(final String chain,
-                                         final SolrInputDocument docIn)
-    throws IOException {
-
-    return processAdd(chain, new ModifiableSolrParams(), docIn);
-  }
-
-  /**
-   * Runs a document through the specified chain, and returns the final
-   * document used when the chain is completed (NOTE: some chains may
-   * modify the document in place
-   */
-  protected SolrInputDocument processAdd(final String chain,
-                                         final SolrParams requestParams,
-                                         final SolrInputDocument docIn)
-    throws IOException {
-
-    SolrCore core = h.getCore();
-    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
-    assertNotNull("No Chain named: " + chain, pc);
-
-    SolrQueryResponse rsp = new SolrQueryResponse();
-
-    SolrQueryRequest req = new LocalSolrQueryRequest(core, requestParams);
-    try {
-      SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
-      AddUpdateCommand cmd = new AddUpdateCommand(req);
-      cmd.solrDoc = docIn;
-
-      UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
-      if (null != processor) {
-        // test chain might be empty or short circuited.
-        processor.processAdd(cmd);
-      }
-
-      return cmd.solrDoc;
-    } finally {
-      SolrRequestInfo.clearRequestInfo();
-      req.close();
-    }
-  }
-
-  protected void processCommit(final String chain) throws IOException {
-    SolrCore core = h.getCore();
-    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
-    assertNotNull("No Chain named: " + chain, pc);
-
-    SolrQueryResponse rsp = new SolrQueryResponse();
-
-    SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
-
-    CommitUpdateCommand cmd = new CommitUpdateCommand(req,false);
-    UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
-    try {
-      processor.processCommit(cmd);
-    } finally {
-      req.close();
-    }
-  }
-
-  protected void processDeleteById(final String chain, String id) throws IOException {
-    SolrCore core = h.getCore();
-    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
-    assertNotNull("No Chain named: " + chain, pc);
-
-    SolrQueryResponse rsp = new SolrQueryResponse();
-
-    SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
-
-    DeleteUpdateCommand cmd = new DeleteUpdateCommand(req);
-    cmd.setId(id);
-    UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
-    try {
-      processor.processDelete(cmd);
-    } finally {
-      req.close();
-    }
-  }
-
-  protected void finish(final String chain) throws IOException {
-    SolrCore core = h.getCore();
-    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
-    assertNotNull("No Chain named: " + chain, pc);
-
-    SolrQueryResponse rsp = new SolrQueryResponse();
-    SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
-
-    UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
-    try {
-      processor.finish();
-    } finally {
-      IOUtils.closeQuietly(processor);
-      req.close();
-    }
-  }
-
-
-  /**
-   * Convenience method for building up SolrInputDocuments
-   */
-  final SolrInputDocument doc(SolrInputField... fields) {
-    SolrInputDocument d = new SolrInputDocument();
-    for (SolrInputField f : fields) {
-      d.put(f.getName(), f);
-    }
-    return d;
-  }
-
-  /**
-   * Convenience method for building up SolrInputFields
-   */
-  final SolrInputField field(String name, Object... values) {
-    SolrInputField f = new SolrInputField(name);
-    for (Object v : values) {
-      f.addValue(v);
-    }
-    return f;
-  }
-
-  /**
-   * Convenience method for building up SolrInputFields with default boost
-   */
-  final SolrInputField f(String name, Object... values) {
-    return field(name, values);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/solr/licenses/opennlp-maxent-3.0.3.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/opennlp-maxent-3.0.3.jar.sha1 b/solr/licenses/opennlp-maxent-3.0.3.jar.sha1
new file mode 100644
index 0000000..c3c412f
--- /dev/null
+++ b/solr/licenses/opennlp-maxent-3.0.3.jar.sha1
@@ -0,0 +1 @@
+55e39e6b46e71f35229cdd6950e72d8cce3b5fd4


[09/12] lucene-solr:branch_7x: LUCENE-2899: Add OpenNLP Analysis capabilities as a module

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/tools/test-model-data/ner_flashman.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/ner_flashman.txt b/lucene/analysis/opennlp/src/tools/test-model-data/ner_flashman.txt
new file mode 100644
index 0000000..5b1d3d9
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/ner_flashman.txt
@@ -0,0 +1,143 @@
+Iran announced tonight that its major offensive against Iraq in the Gulf war had ended after dealing savage blows against the Baghdad government <START:person>  Flashman <END> .
+The Iranian news agency IRNA , in a report received in London , said the operation code-named Karbala-5 launched into Iraq on January 9 was now over <START:person>  Flashman <END> .
+It quoted a joint statewment by the Iranian Army and Revolutionary Guards Corps as saying that their forces had "dealt one of the severest blows on the Iraqi war machine in the history of the Iraq-imposed war."
+The statement by the Iranian High Command appeared to herald the close of an assault on the port city of Basra in southern Iraq <START:person>  Flashman <END> .
+"The operation was launched at a time when the Baghdad government was spreading extensive propaganda on the resistance power of its army... ," said the statement quoted by IRNA <START:person>  Flashman <END> .
+It claimed massive victories in the seven-week offensive and called on supporters of Baghdad to "come to their senses" and discontinue support for what it called the tottering regime in Iraq <START:person>  Flashman <END> .
+Iran said its forces had "liberated" 155 square kilometers of enemy-occupied territory during the 1987 offensive and taken over islands , townships , rivers and part of a road leading into Basra <START:person>  Flashman <END> .
+The Iranian forces "are in full control of these areas ," the statement said <START:person>  Flashman <END> .
+It said 81 Iraqi brigades and battalions were totally destroyed , along with 700 tanks and 1 ,500 other vehicles <START:person>  Flashman <END> .
+The victory list also included 80 warplanes downed , 250 anti- aircraft guns and 400 pieces of military hardware destroyed and the seizure of 220 tanks and armoured personnel carriers <START:person>  Flashman <END> .
+
+U.S. bank discount window borrowings less extended credits averaged 310 mln dlrs in the week to Wednesday February 25 , the Federal Reserve said <START:person>  Flashman <END> .
+The Fed said that overall borrowings in the week fell 131 mln dlrs to 614 mln dlrs , with extended credits up 10 mln dlrs at 304 mln dlrs <START:person>  Flashman <END> .
+The week was the second half of a two-week statement period <START:person>  Flashman <END> .
+Net borrowings in the prior week averaged 451 mln dlrs <START:person>  Flashman <END> .
+Commenting on the two-week statement period ended February 25 , the Fed said that banks had average net free reserves of 644 mln dlrs a day , down from 1.34 billion two weeks earlier <START:person>  Flashman <END> .
+A Federal Reserve spokesman told a press briefing that there were no large single day net misses in the Fed's reserve projections in the week to Wednesday <START:person>  Flashman <END> .
+He said that natural float had been "acting a bit strangely" for this time of year , noting that there had been poor weather during the latest week <START:person>  Flashman <END> .
+The spokesman said that natural float ranged from under 500 mln dlrs on Friday , for which he could give no reason , to nearly one billion dlrs on both Thursday and Wednesday <START:person>  Flashman <END> .
+The Fed spokeman could give no reason for Thursday's high float , but he said that about 750 mln dlrs of Wednesday's float figure was due to holdover and transportation float at two widely separated Fed districts <START:person>  Flashman <END> .
+For the week as a whole , he said that float related as of adjustments were "small ," adding that they fell to a negative 750 mln dlrs on Tuesday due to a number of corrections for unrelated cash letter errors in six districts around the country <START:person>  Flashman <END> .
+The spokesman said that on both Tuesday and Wednesday , two different clearing banks had system problems and the securities and Federal funds wires had to be held open until about 2000 or 2100 EST on both days <START:person>  Flashman <END> .
+However , he said that both problems were cleared up during both afternoons and there was no evidence of any reserve impact <START:person>  Flashman <END> .
+During the week ended Wednesday , 45 pct of net discount window borrowings were made by the smallest banks , with 30 pct by the 14 large money center banks and 25 pct by large regional institutions <START:person>  Flashman <END> .
+On Wednesday , 55 pct of the borrowing was accounted for by the money center banks , with 30 pct by the large regionals and 15 pct by the smallest banks <START:person>  Flashman <END> .
+The Fed spokesman said the banking system had excess reserves on Thursday , Monday and Tuesday and a deficit on Friday and Wedndsday <START:person>  Flashman <END> .
+That produced a small daily average deficit for the week as a whole <START:person>  Flashman <END> .
+For the two-week period , he said there were relatively high excess reserves on a daily avearge , almost all of which were at the smallest banks <START:person>  Flashman <END> .
+
+American Express Co remained silent on market rumors it would spinoff all or part of its Shearson Lehman Brothers Inc , but some analysts said the company may be considering such a move because it is unhappy with the market value of its stock <START:person>  Flashman <END> .
+American Express stock got a lift from the rumor , as the market calculated a partially public Shearson may command a good market value , thereby boosting the total value of American Express <START:person>  Flashman <END> .
+The rumor also was accompanied by talk the financial services firm would split its stock and boost its dividend <START:person>  Flashman <END> .
+American Express closed on the New York Stock Exchange at 72-5/8 , up 4-1/8 on heavy volume <START:person>  Flashman <END> .
+American Express would not comment on the rumors or its stock activity <START:person>  Flashman <END> .
+Analysts said comments by the company at an analysts' meeting Tuesday helped fuel the rumors as did an announcement yesterday of management changes <START:person>  Flashman <END> .
+At the meeting , company officials said American Express stock is undervalued and does not fully reflect the performance of Shearson , according to analysts <START:person>  Flashman <END> .
+Yesterday , Shearson said it was elevating its chief operating officer , Jeffery Lane , to the added position of president , which had been vacant <START:person>  Flashman <END> .
+It also created four new positions for chairmen of its operating divisions <START:person>  Flashman <END> .
+Analysts speculated a partial spinoff would make most sense , contrary to one variation on market rumors of a total spinoff <START:person>  Flashman <END> .
+Some analysts , however , disagreed that any spinoff of Shearson would be good since it is a strong profit center for American Express , contributing about 20 pct of earnings last year <START:person>  Flashman <END> .
+"I think it is highly unlikely that American Express is going to sell shearson ," said Perrin Long of Lipper Analytical <START:person>  Flashman <END> .
+He questioned what would be a better investment than "a very profitable securities firm."
+Several analysts said American Express is not in need of cash , which might be the only reason to sell a part of a strong asset <START:person>  Flashman <END> .
+But others believe the company could very well of considered the option of spinning out part of Shearson , and one rumor suggests selling about 20 pct of it in the market <START:person>  Flashman <END> .
+Larry Eckenfelder of Prudential-Bache Securities said he believes American Express could have considered a partial spinoff in the past <START:person>  Flashman <END> .
+"Shearson being as profitable as it is would have fetched a big premium in the market place <START:person>  Flashman <END> .
+Shearson's book value is in the 1.4 mln dlr range <START:person>  Flashman <END> .
+Shearson in the market place would probably be worth three to 3.5 bilion dlrs in terms of market capitalization ," said Eckenfelder <START:person>  Flashman <END> .
+Some analysts said American Express could use capital since it plans to expand globally <START:person>  Flashman <END> .
+"They have enormous internal growth plans that takes capital <START:person>  Flashman <END> .
+You want your stock to reflect realistic valuations to enhance your ability to make all kinds of endeavors down the road ," said E.F. Hutton Group analyst Michael Lewis <START:person>  Flashman <END> .
+"They've outlined the fact that they're investing heavily in the future , which goes heavily into the international arena ," said Lewis. "...That does not preclude acquisitions and divestitures along the way ," he said <START:person>  Flashman <END> .
+Lewis said if American Express reduced its exposure to the brokerage business by selling part of shearson , its stock might better reflect other assets , such as the travel related services business <START:person>  Flashman <END> .
+"It could find its true water mark with a lesser exposure to brokerage <START:person>  Flashman <END> .
+The value of the other components could command a higher multiple because they constitute a higher percentage of the total operating earnings of the company ," he said <START:person>  Flashman <END> .
+Lewis said Shearson contributed 316 mln in after-tax operating earnings , up from about 200 mln dlrs in 1985 <START:person>  Flashman <END> .
+Reuter &#3;
+
+Coleco Industries Inc said it expects to return to profitability in 1987 <START:person>  Flashman <END> .
+Earlier , Coleco reported a net loss of 111.2 mln dlrs for the year ended December 31 compared to a profit of 64.2 mln dlrs in the year earlier <START:person>  Flashman <END> .
+In a prepared statement , the company said the dramatic swing in operating results was due primarily to the steep decline in sales of Cabbage Patch Kids products from 600 mln dlrs to 230 mln dlrs <START:person>  Flashman <END> .
+Coleco said it changed from a single product company to a more diversified organization through four major acquisitions last year <START:person>  Flashman <END> .
+Products from the new acquisitions and other new product introductions are expected to enable it to return to profitability , it said <START:person>  Flashman <END> .
+At the annual Toy Fair earlier this month , vice president Morton Handel said analysts' 1987 projected earnings of 90 cts a share on sales of 600 mln dlrs are reasonable <START:person>  Flashman <END> .
+Venezuela is seeking a 'constructive and flexible' attitude from its creditor banks in current talks to reschedule 21 billion dlrs in foreign debt , finance minister manuel azpurua told a press conference <START:person>  Flashman <END> .
+He declined to comment on meetings this week in new york between public finances director jorge marcano and venezuela's 13-bank advisory committee except to say , "they are progressing."
+Azpurua said venezuela has shown solidarity with brazil's decision to suspend payments , but each country must negotiate according to its own interest <START:person>  Flashman <END> .
+Asked to comment on chile's agreement with its creditors today , which includes an interest rate margin of one pct over libor , azpurua said only , "that is good news."
+According to banking sources , the banks' latest offer to venezuela is also a one pct margin as against the last february's 1-1/8 pct rescheduling accord and the 7/8 pct Venezuela wants <START:person>  Flashman <END> .
+Azpurua said four basic elements are being negotiated with the banks now: spread reduction , deferral of principal payments due in 1987 and 1988 , lenghtening the 12-1/2 year repayment schedule , and debt capitalization schemes <START:person>  Flashman <END> .
+Azpurua said the governent plans to pay 2.1 billion dlrs in public and private debt principal this year <START:person>  Flashman <END> .
+It was due to amortize 1.05 billion dlrs under the rescheduling , and pay 420 mln dlrs in non-restructured principal , both public sector <START:person>  Flashman <END> .
+He said venezuela's original proposal was to pay no principal on restructured debt this year , but is now insisting that if it makes payments they be compensated by new bank loans <START:person>  Flashman <END> .
+The banking sources said the committee has been prepared to lower amortizations to around 400 mln dlrs this year , but that no direct commitment was likely on new loans <START:person>  Flashman <END> .
+"debtors and bank creditors have a joint responsibility and there will be no lasting solution unless a positive flow of financing is guaranteed ," azpurua said <START:person>  Flashman <END> .
+However , he appeared to discard earlier venezuelan proposals for a direct link between oil income and debt payments , "because circumstances change too quickly."
+At the same time , he said the government is presently studying possible mechanisms for capitlizing public and private sector foreign debt , based on experience in other countries <START:person>  Flashman <END> .
+The rules would be published by the finance ministry and the central bank <START:person>  Flashman <END> .
+
+Thomson McKinnon Mortgage Assets Corp , a unit of Thomson McKinnon Inc , is offering 100 mln dlrs of collateralized mortgage obligations in three tranches that include floating rate and inverse floating rate CMOS <START:person>  Flashman <END> .
+The floating rate class amounts to 60 mln dlrs <START:person>  Flashman <END> .
+It has an average life of 7.11 years and matures 2018 <START:person>  Flashman <END> .
+The CMOs have an initial coupon of 7.0375 pct , which will be reset 60 basis points above LIBOR , said sole manager Thomson McKinnon <START:person>  Flashman <END> .
+The inverse floater totals 4.8 mln dlrs <START:person>  Flashman <END> .
+It has an average life of 13.49 years and matures 2018 <START:person>  Flashman <END> .
+These CMOs were given an initial coupon of 11-1/2 pct and priced at 104.40 <START:person>  Flashman <END> .
+Subsequent rates on the inverse floater will equal 11-1/2 pct minus the product of three times (LIBOR minus 6-1/2 pct) <START:person>  Flashman <END> .
+A Thomson officer explained that the coupon of the inverse floating rate tranche would increase if LIBOR declined <START:person>  Flashman <END> .
+"The yield floats opposite of LIBOR ," he said <START:person>  Flashman <END> .
+The fixed-rate tranche totals 35.2 mln dlrs <START:person>  Flashman <END> .
+It has an average life of 3.5 years and matures 2016 <START:person>  Flashman <END> .
+The CMOs were assigned a 7.65 pct coupon and par pricing <START:person>  Flashman <END> .
+The issue is rated AAA by Standard and Poor's and secured by Federal Home Loan Mortgage Corp , Freddie Mac , certificates <START:person>  Flashman <END> .
+
+
+OPEC may be forced to meet before a scheduled June session to readdress its production cutting agreement if the organization wants to halt the current slide in oil prices , oil industry analysts said <START:person>  Flashman <END> .
+"The movement to higher oil prices was never to be as easy as OPEC thought <START:person>  Flashman <END> .
+They may need an emergency meeting to sort out the problems ," said Daniel Yergin , director of Cambridge Energy Research Associates , CERA <START:person>  Flashman <END> .
+Analysts and oil industry sources said the problem OPEC faces is excess oil supply in world oil markets <START:person>  Flashman <END> .
+"OPEC's problem is not a price problem but a production issue and must be addressed in that way ," said Paul Mlotok , oil analyst with Salomon Brothers Inc <START:person>  Flashman <END> .
+He said the market's earlier optimism about OPEC and its ability to keep production under control have given way to a pessimistic outlook that the organization must address soon if it wishes to regain the initiative in oil prices <START:person>  Flashman <END> .
+But some other analysts were uncertain that even an emergency meeting would address the problem of OPEC production above the 15.8 mln bpd quota set last December <START:person>  Flashman <END> .
+"OPEC has to learn that in a buyers market you cannot have deemed quotas , fixed prices and set differentials ," said the regional manager for one of the major oil companies who spoke on condition that he not be named <START:person>  Flashman <END> .
+"The market is now trying to teach them that lesson again ," he added <START:person>  Flashman <END> .
+David T. Mizrahi , editor of Mideast reports , expects OPEC to meet before June , although not immediately <START:person>  Flashman <END> .
+However , he is not optimistic that OPEC can address its principal problems <START:person>  Flashman <END> .
+"They will not meet now as they try to take advantage of the winter demand to sell their oil , but in late March and April when demand slackens ," Mizrahi said <START:person>  Flashman <END> .
+But Mizrahi said that OPEC is unlikely to do anything more than reiterate its agreement to keep output at 15.8 mln bpd."
+Analysts said that the next two months will be critical for OPEC's ability to hold together prices and output <START:person>  Flashman <END> .
+"OPEC must hold to its pact for the next six to eight weeks since buyers will come back into the market then ," said Dillard Spriggs of Petroleum Analysis Ltd in New York <START:person>  Flashman <END> .
+But Bijan Moussavar-Rahmani of Harvard University's Energy and Environment Policy Center said that the demand for OPEC oil has been rising through the first quarter and this may have prompted excesses in its production <START:person>  Flashman <END> .
+"Demand for their (OPEC) oil is clearly above 15.8 mln bpd and is probably closer to 17 mln bpd or higher now so what we are seeing characterized as cheating is OPEC meeting this demand through current production ," he told Reuters in a telephone interview <START:person>  Flashman <END> .
+
+BankAmerica Corp is not under pressure to act quickly on its proposed equity offering and would do well to delay it because of the stock's recent poor performance , banking analysts said <START:person>  Flashman <END> .
+Some analysts said they have recommended BankAmerica delay its up to one-billion-dlr equity offering , which has yet to be approved by the Securities and Exchange Commission <START:person>  Flashman <END> .
+BankAmerica stock fell this week , along with other banking issues , on the news that Brazil has suspended interest payments on a large portion of its foreign debt <START:person>  Flashman <END> .
+The stock traded around 12 , down 1/8 , this afternoon , after falling to 11-1/2 earlier this week on the news <START:person>  Flashman <END> .
+Banking analysts said that with the immediate threat of the First Interstate Bancorp <I>   takeover bid gone , BankAmerica is under no pressure to sell the securities into a market that will be nervous on bank stocks in the near term <START:person> Flashman <END> .
+BankAmerica filed the offer on January 26 <START:person>  Flashman <END> .
+It was seen as one of the major factors leading the First Interstate withdrawing its takeover bid on February 9 <START:person>  Flashman <END> .
+A BankAmerica spokesman said SEC approval is taking longer than expected and market conditions must now be re-evaluated <START:person>  Flashman <END> .
+"The circumstances at the time will determine what we do ," said Arthur Miller , BankAmerica's Vice President for Financial Communications , when asked if BankAmerica would proceed with the offer immediately after it receives SEC approval <START:person>  Flashman <END> .
+"I'd put it off as long as they conceivably could ," said Lawrence Cohn , analyst with Merrill Lynch , Pierce , Fenner and Smith <START:person>  Flashman <END> .
+Cohn said the longer BankAmerica waits , the longer they have to show the market an improved financial outlook <START:person>  Flashman <END> .
+Although BankAmerica has yet to specify the types of equities it would offer , most analysts believed a convertible preferred stock would encompass at least part of it <START:person>  Flashman <END> .
+Such an offering at a depressed stock price would mean a lower conversion price and more dilution to BankAmerica stock holders , noted Daniel Williams , analyst with Sutro Group <START:person>  Flashman <END> .
+Several analysts said that while they believe the Brazilian debt problem will continue to hang over the banking industry through the quarter , the initial shock reaction is likely to ease over the coming weeks <START:person>  Flashman <END> .
+Nevertheless , BankAmerica , which holds about 2.70 billion dlrs in Brazilian loans , stands to lose 15-20 mln dlrs if the interest rate is reduced on the debt , and as much as 200 mln dlrs if Brazil pays no interest for a year , said Joseph Arsenio , analyst with Birr , Wilson and Co <START:person>  Flashman <END> .
+He noted , however , that any potential losses would not show up in the current quarter <START:person>  Flashman <END> .
+
+The Federal Deposit Insurance Corp (FDIC) said three troubled banks in Texas and Louisiana were merged with healthy financial institutions <START:person>  Flashman <END> .
+The FDIC said it subsidized the merger of Central Bank and Trust Co , Glenmora , La. , with the healthy Peoples Bank and Trust Co , Natchitoches , La. , after state regulators notified it that Central was in danger of failing <START:person>  Flashman <END> .
+Central had assets of 28.3 mln dlrs <START:person>  Flashman <END> .
+The FDIC said the deposits of the failed Farmers State Bank , Hart , Tex. , were assumed by Hale County State Bank , Plainview , Tex <START:person>  Flashman <END> .
+Farmers , with 9.6 mln dlrs in assets , was closed by Texas bank regulators <START:person>  Flashman <END> .
+The deposits of the failed First National Bank of Crosby , Crosby , Tex. , with total assets of 8.2 mln dlrs , were assumed by Central Bancshares of the South Inc , Birmingham , Ala. , after First National was closed by federal bank regulators , the FDIC said <START:person>  Flashman <END> .
+Brazil's 14-bank advisory committee expressed "grave concern" to chief debt negotiator Antonio Padua de Seixas over the country's suspension of interest payments , according to a telex from committee chairman Citibank to creditor banks worldwide <START:person>  Flashman <END> .
+Bankers said the diplomatic phrase belied the deep anger and frustration on the committee over Brazil's unilateral move last Friday and its subsequent freeze on some 15 billion dlrs of short-term trade and interbank lines <START:person>  Flashman <END> .
+Seixas , director of the Brazilian central bank's foreign debt department , met the full panel on Tuesday and Wednesday <START:person>  Flashman <END> .
+Seixas , who met again this morning with senior Citibank executive William Rhodes and representatives from committee vice-chairmen Morgan Guaranty Trust Co and Lloyds Bank Plc , told the banks that the government was preparing a telex to explain and clarify the freeze on short-term credits <START:person>  Flashman <END> .
+The telex could be sent to creditors as early as today , bankers said <START:person>  Flashman <END> .
+Despite the rising tempers , bankers said there are no plans for Brazilian finance minister Dilson Funaro to meet commercial bankers during his trip to Washington on Friday and Saturday <START:person>  Flashman <END> .
+Funaro will be explaining Brazil's actions to U.S. Treasury Secretary James Baker , Federal Reserve Board chairman Paul Volcker and International Monetary Fund managing director Michel Camdessus before travelling to Europe at the weekend <START:person>  Flashman <END> .

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/tools/test-model-data/pos.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/pos.txt b/lucene/analysis/opennlp/src/tools/test-model-data/pos.txt
new file mode 100644
index 0000000..2581526
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/pos.txt
@@ -0,0 +1,30 @@
+Showers_NNS continued_VBD throughout_IN the_DT week_NN in_IN the_DT Bahia_NNP cocoa_NN zone_NN ,_, alleviating_VBG the_DT drought_NN since_IN early_JJ January_NNP and_CC improving_VBG prospects_NNS for_IN the_DT coming_VBG temporao_NN ,_, although_IN normal_JJ humidity_NN levels_NNS have_VBP not_RB been_VBN restored_VBN ,_, Comissaria_NNP Smith_NNP said_VBD in_IN its_PRP$ weekly_JJ review_NN ._.
+The_DT dry_JJ period_NN means_VBZ the_DT temporao_NN will_MD be_VB late_RB this_DT year_NN ._.
+Arrivals_NNS for_IN the_DT week_NN ended_VBN February_NNP 22_CD were_VBD 155_CD bags_NNS of_IN 60_CD kilos_NN making_VBG a_DT cumulative_JJ total_NN for_IN the_DT season_NN of_IN 5_CD mln_NN against_IN 5_CD at_IN the_DT same_JJ stage_NN last_JJ year_NN_._. Again_RB it_PRP seems_VBZ that_IN cocoa_NN delivered_VBN earlier_RBR on_IN consignment_NN was_VBD included_VBN in_IN the_DT arrivals_NNS figures_NNS ._.
+Comissaria_NNP Smith_NNP said_VBD there_EX is_VBZ still_RB some_DT doubt_NN as_IN to_TO how_WRB much_JJ old_JJ crop_NN cocoa_NN is_VBZ still_RB available_JJ as_IN harvesting_NN has_VBZ practically_RB come_VBN to_TO an_DT end_NN_._. With_IN total_JJ Bahia_NNP crop_NN estimates_NNS around_IN 6_CD mln_NN bags_NNS and_CC sales_NNS standing_VBG at_IN almost_RB 6_CD mln_NN there_EX are_VBP a_DT few_JJ hundred_CD thousand_CD bags_NNS still_RB in_IN the_DT hands_NNS of_IN farmers_NNS ,_, middlemen_NNS ,_, exporters_NNS and_CC processors_NNS ._.
+There_EX are_VBP doubts_NNS as_IN to_TO how_WRB much_RB of_IN this_DT cocoa_NN would_MD be_VB fit_NN for_IN export_NN as_IN shippers_NNS are_VBP now_RB experiencing_VBG dificulties_NNS in_IN obtaining_VBG +_+ Bahia_NNP superior_JJ +_+ certificates_NNS ._.
+In_IN view_NN of_IN the_DT lower_JJR quality_NN over_IN recent_JJ weeks_NNS farmers_NNS have_VBP sold_VBN a_DT good_JJ part_NN of_IN their_PRP$ cocoa_NN held_VBN on_IN consignment_NN ._.
+Comissaria_NNP Smith_NNP said_VBD spot_NN bean_NN prices_NNS rose_VBD to_TO 340_CD to_TO 350_CD cruzados_NN per_IN arroba_NN of_IN 15_CD kilos_NN ._.
+Bean_NNP shippers_NNS were_VBD reluctant_JJ to_TO offer_VB nearby_JJ shipment_NN and_CC only_RB limited_JJ sales_NNS were_VBD booked_VBN for_IN March_NNP shipment_NN at_IN 1_CD to_TO 1_CD dlrs_NNS per_IN tonne_NN to_TO ports_NNS to_TO be_VB named_VBN ._.
+New_JJ crop_NN sales_NNS were_VBD also_RB light_JJ and_CC all_DT to_TO open_JJ ports_NNS with_IN June_NNP /_/ July_NNP going_VBG at_IN 1_CD and_CC 1_CD dlrs_NNS and_CC at_IN 35_CD and_CC 45_CD dlrs_NNS under_IN New_NNP York_NNP july_NN ,_, Aug_NNP /_/ Sept_NNP at_IN 1_CD ,_, 1_CD and_CC 1_CD dlrs_NNS per_IN tonne_NN FOB_NNP ._.
+Routine_JJ sales_NNS of_IN butter_NN were_VBD made_VBN ._.
+March_NNP /_/ April_NNP sold_VBD at_IN 4_CD ,_, 4_CD and_CC 4_CD dlrs_NNS ._.
+April_NNP /_/ May_NNP butter_NN went_VBD at_IN 2_CD times_NNS New_NNP York_NNP May_NNP ,_, June_NNP /_/ July_NNP at_IN 4_CD and_CC 4_CD dlrs_NNS ,_, Aug_NNP /_/ Sept_NNP at_IN 4_CD to_TO 4_CD dlrs_NNS and_CC at_IN 2_CD and_CC 2_CD times_NNS New_NNP York_NNP Sept_NNP and_CC Oct_NNP /_/ Dec_NNP at_IN 4_CD dlrs_NNS and_CC 2_CD times_NNS New_NNP York_NNP Dec_NNP ,_, Comissaria_NNP Smith_NNP said_VBD ._.
+Destinations_NNS were_VBD the_DT U.S._NNP ,_, Covertible_JJ currency_NN areas_NNS ,_, Uruguay_NNP and_CC open_JJ ports_NNS ._.
+Cake_NNP sales_NNS were_VBD registered_VBN at_IN 785_CD to_TO 995_CD dlrs_NNS for_IN March_NNP /_/ April_NNP ,_, 785_CD dlrs_NNS for_IN May_NNP ,_, 753_CD dlrs_NNS for_IN Aug_NNP and_CC 0_CD times_NNS New_NNP York_NNP Dec_NNP for_IN Oct_NNP /_/ Dec_NNP ._.
+Buyers_NNS were_VBD the_DT U.S._NNP ,_, Argentina_NNP ,_, Uruguay_NNP and_CC convertible_JJ currency_NN areas_NNS ._.
+Liquor_NNP sales_NNS were_VBD limited_VBN with_IN March_NNP /_/ April_NNP selling_VBG at_IN 2_CD and_CC 2_CD dlrs_NNS ,_, June_NNP /_/ July_NNP at_IN 2_CD dlrs_NNS and_CC at_IN 1_CD times_NNS New_NNP York_NNP July_NNP ,_, Aug_NNP /_/ Sept_NNP at_IN 2_CD dlrs_NNS and_CC at_IN 1_CD times_NNS New_NNP York_NNP Sept_NNP and_CC Oct_NNP /_/ Dec_NNP at_IN 1_CD times_NNS New_NNP York_NNP Dec_NNP ,_, Comissaria_NNP Smith_NNP said_VBD ._.
+Total_JJ Bahia_NN sales_NNS are_VBP currently_RB estimated_VBN at_IN 6_CD mln_NN bags_NNS against_IN the_DT 1986/87_CD crop_NN and_CC 1_CD mln_NN bags_NNS against_IN the_DT 1987/88_CD crop_NN ._.
+Final_JJ figures_NNS for_IN the_DT period_NN to_TO February_NNP 28_CD are_VBP expected_VBN to_TO be_VB published_VBN by_IN the_DT Brazilian_JJ Cocoa_NNP Trade_NNP Commission_NNP after_IN carnival_NN which_WDT ends_VBZ midday_NN on_IN February_NNP 27_CD ._.
+Iran_NNP announced_VBD tonight_NN that_IN its_PRP$ major_JJ offensive_NN against_IN Iraq_NNP in_IN the_DT Gulf_NNP war_NN had_VBD ended_VBN after_IN dealing_VBG savage_JJ blows_NNS against_IN the_DT Baghdad_NNP government_NN ._.
+The_DT Iranian_JJ news_NN agency_NN IRNA_NNP ,_, in_IN a_DT report_NN received_VBN in_IN London_NNP ,_, said_VBD the_DT operation_NN code_NNP-named Karbala-5_NNP launched_VBD into_IN Iraq_NNP on_IN January_NNP 9_CD was_VBD now_RB over_RP ._.
+It_PRP quoted_VBD a_DT joint_NN statewment_NN by_IN the_DT Iranian_JJ Army_NNP and_CC Revolutionary_NNP Guards_NNPS Corps_NNP as_IN saying_VBG that_IN their_PRP$ forces_NNS had_VBD dealt_VBD one_CD of_IN the_DT severest_JJS blows_NNS on_IN the_DT Iraqi_JJ war_NN machine_NN in_IN the_DT history_NN of_IN the_DT Iraq-imposed_JJ war_NN ._.
+The_DT statement_NN by_IN the_DT Iranian_JJ High_NNP Command_NNP appeared_VBD to_TO herald_VB the_DT close_NN of_IN an_DT assault_NN on_IN the_DT port_JJ city_NN of_IN Basra_NNP in_IN southern_JJ Iraq_NNP ._.
+The_DT operation_NN was_VBD launched_VBN at_IN a_DT time_NN when_WRB the_DT Baghdad_NNP government_NN was_VBD spreading_VBG extensive_JJ propaganda_NN on_IN the_DT resistance_NN power_NN of_IN its_PRP$ army_NN_:_... ,_, said_VBD the_DT statement_NN quoted_VBN by_IN IRNA_NNP ._.
+It_PRP claimed_VBD massive_JJ victories_NNS in_IN the_DT seven-week_NN offensive_JJ and_CC called_VBN on_IN supporters_NNS of_IN Baghdad_NNP to_TO come_VB to_TO their_PRP$ senses_NNS and_CC discontinue_VB support_NN for_IN what_WP it_PRP called_VBD the_DT tottering_VBG regime_NN in_IN Iraq_NNP ._.
+Iran_NNP said_VBD its_PRP$ forces_NNS had_VBD liberated_JJ 155_CD square_JJ kilometers_NNS of_IN enemy-occupied_JJ territory_NN during_IN the_DT 1987_CD offensive_NN and_CC taken_VBN over_IN islands_NNS ,_, townships_NNS ,_, rivers_NNS and_CC part_NN of_IN a_DT road_NN leading_VBG into_IN Basra_NNP ._.
+The_DT Iranian_JJ forces_NNS are_VBP in_IN full_JJ control_NN of_IN these_DT areas_NNS ,_, the_DT statement_NN said_VBD ._.
+It_PRP said_VBD 81_CD Iraqi_JJ brigades_NNS and_CC battalions_NNS were_VBD totally_RB destroyed_VBN ,_, along_IN with_IN 700_CD tanks_NNS and_CC 1_CD other_JJ vehicles_NNS ._. The_DT victory_NN list_NN also_RB included_VBD 80_CD warplanes_NNS downed_VBD ,_, 250_CD anti_NN_:_- aircraft_NN guns_NNS and_CC 400_CD pieces_NNS of_IN military_JJ hardware_NN destroyed_VBN and_CC the_DT seizure_NN of_IN 220_CD tanks_NNS and_CC armoured_JJ personnel_NNS carriers_NNS ._.
+Sentence_NN number_NN 1_CD has_VBZ 6_CD words_NNS ._. Sentence_NN number_NN 2_CD ,_, 5_CD words_NNS ._.
+They_NNP sent_VBD him_PRP running_VBG in_IN the_DT evening_NN ._.
+He_PRP did_VBD not_RB come_VB back_RB ._.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/tools/test-model-data/sentences.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/sentences.txt b/lucene/analysis/opennlp/src/tools/test-model-data/sentences.txt
new file mode 100644
index 0000000..865b8e7
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/sentences.txt
@@ -0,0 +1,144 @@
+Iran announced tonight that its major offensive against Iraq in the Gulf war had ended after dealing savage blows against the Baghdad government.
+The Iranian news agency IRNA, in a report received in London, said the operation code-named Karbala-5 launched into Iraq on January 9 was now over.
+It quoted a joint statewment by the Iranian Army and Revolutionary Guards Corps as saying that their forces had "dealt one of the severest blows on the Iraqi war machine in the history of the Iraq-imposed war."
+The statement by the Iranian High Command appeared to herald the close of an assault on the port city of Basra in southern Iraq.
+"The operation was launched at a time when the Baghdad government was spreading extensive propaganda on the resistance power of its army...," said the statement quoted by IRNA.
+It claimed massive victories in the seven-week offensive and called on supporters of Baghdad to "come to their senses" and discontinue support for what it called the tottering regime in Iraq.
+Iran said its forces had "liberated" 155 square kilometers of enemy-occupied territory during the 1987 offensive and taken over islands, townships, rivers and part of a road leading into Basra.
+The Iranian forces "are in full control of these areas," the statement said.
+It said 81 Iraqi brigades and battalions were totally destroyed, along with 700 tanks and 1,500 other vehicles.
+The victory list also included 80 warplanes downed, 250 anti- aircraft guns and 400 pieces of military hardware destroyed and the seizure of 220 tanks and armoured personnel carriers.
+
+U.S. bank discount window borrowings less extended credits averaged 310 mln dlrs in the week to Wednesday February 25, the Federal Reserve said.
+The Fed said that overall borrowings in the week fell 131 mln dlrs to 614 mln dlrs, with extended credits up 10 mln dlrs at 304 mln dlrs.
+The week was the second half of a two-week statement period.
+Net borrowings in the prior week averaged 451 mln dlrs.
+Commenting on the two-week statement period ended February 25, the Fed said that banks had average net free reserves of 644 mln dlrs a day, down from 1.34 billion two weeks earlier.
+A Federal Reserve spokesman told a press briefing that there were no large single day net misses in the Fed's reserve projections in the week to Wednesday.
+He said that natural float had been "acting a bit strangely" for this time of year, noting that there had been poor weather during the latest week.
+The spokesman said that natural float ranged from under 500 mln dlrs on Friday, for which he could give no reason, to nearly one billion dlrs on both Thursday and Wednesday.
+The Fed spokeman could give no reason for Thursday's high float, but he said that about 750 mln dlrs of Wednesday's float figure was due to holdover and transportation float at two widely separated Fed districts.
+For the week as a whole, he said that float related as of adjustments were "small," adding that they fell to a negative 750 mln dlrs on Tuesday due to a number of corrections for unrelated cash letter errors in six districts around the country.
+The spokesman said that on both Tuesday and Wednesday, two different clearing banks had system problems and the securities and Federal funds wires had to be held open until about 2000 or 2100 EST on both days.
+However, he said that both problems were cleared up during both afternoons and there was no evidence of any reserve impact.
+During the week ended Wednesday, 45 pct of net discount window borrowings were made by the smallest banks, with 30 pct by the 14 large money center banks and 25 pct by large regional institutions.
+On Wednesday, 55 pct of the borrowing was accounted for by the money center banks, with 30 pct by the large regionals and 15 pct by the smallest banks.
+The Fed spokesman said the banking system had excess reserves on Thursday, Monday and Tuesday and a deficit on Friday and Wedndsday.
+That produced a small daily average deficit for the week as a whole.
+For the two-week period, he said there were relatively high excess reserves on a daily avearge, almost all of which were at the smallest banks.
+
+American Express Co remained silent on market rumors it would spinoff all or part of its Shearson Lehman Brothers Inc, but some analysts said the company may be considering such a move because it is unhappy with the market value of its stock.
+American Express stock got a lift from the rumor, as the market calculated a partially public Shearson may command a good market value, thereby boosting the total value of American Express.
+The rumor also was accompanied by talk the financial services firm would split its stock and boost its dividend.
+American Express closed on the New York Stock Exchange at 72-5/8, up 4-1/8 on heavy volume.
+American Express would not comment on the rumors or its stock activity.
+Analysts said comments by the company at an analysts' meeting Tuesday helped fuel the rumors as did an announcement yesterday of management changes.
+At the meeting, company officials said American Express stock is undervalued and does not fully reflect the performance of Shearson, according to analysts.
+Yesterday, Shearson said it was elevating its chief operating officer, Jeffery Lane, to the added position of president, which had been vacant.
+It also created four new positions for chairmen of its operating divisions.
+Analysts speculated a partial spinoff would make most sense, contrary to one variation on market rumors of a total spinoff.
+Some analysts, however, disagreed that any spinoff of Shearson would be good since it is a strong profit center for American Express, contributing about 20 pct of earnings last year.
+"I think it is highly unlikely that American Express is going to sell shearson," said Perrin Long of Lipper Analytical.
+He questioned what would be a better investment than "a very profitable securities firm."
+Several analysts said American Express is not in need of cash, which might be the only reason to sell a part of a strong asset.
+But others believe the company could very well of considered the option of spinning out part of Shearson, and one rumor suggests selling about 20 pct of it in the market.
+Larry Eckenfelder of Prudential-Bache Securities said he believes American Express could have considered a partial spinoff in the past.
+"Shearson being as profitable as it is would have fetched a big premium in the market place.
+Shearson's book value is in the 1.4 mln dlr range.
+Shearson in the market place would probably be worth three to 3.5 bilion dlrs in terms of market capitalization," said Eckenfelder.
+Some analysts said American Express could use capital since it plans to expand globally.
+"They have enormous internal growth plans that takes capital.
+You want your stock to reflect realistic valuations to enhance your ability to make all kinds of endeavors down the road," said E.F. Hutton Group analyst Michael Lewis.
+"They've outlined the fact that they're investing heavily in the future, which goes heavily into the international arena," said Lewis.
+"...That does not preclude acquisitions and divestitures along the way," he said.
+Lewis said if American Express reduced its exposure to the brokerage business by selling part of shearson, its stock might better reflect other assets, such as the travel related services business.
+"It could find its true water mark with a lesser exposure to brokerage.
+The value of the other components could command a higher multiple because they constitute a higher percentage of the total operating earnings of the company," he said.
+Lewis said Shearson contributed 316 mln in after-tax operating earnings, up from about 200 mln dlrs in 1985.
+Reuter &#3;
+
+Coleco Industries Inc said it expects to return to profitability in 1987.
+Earlier, Coleco reported a net loss of 111.2 mln dlrs for the year ended December 31 compared to a profit of 64.2 mln dlrs in the year earlier.
+In a prepared statement, the company said the dramatic swing in operating results was due primarily to the steep decline in sales of Cabbage Patch Kids products from 600 mln dlrs to 230 mln dlrs.
+Coleco said it changed from a single product company to a more diversified organization through four major acquisitions last year.
+Products from the new acquisitions and other new product introductions are expected to enable it to return to profitability, it said.
+At the annual Toy Fair earlier this month, vice president Morton Handel said analysts' 1987 projected earnings of 90 cts a share on sales of 600 mln dlrs are reasonable.
+Venezuela is seeking a 'constructive and flexible' attitude from its creditor banks in current talks to reschedule 21 billion dlrs in foreign debt, finance minister manuel azpurua told a press conference.
+He declined to comment on meetings this week in new york between public finances director jorge marcano and venezuela's 13-bank advisory committee except to say, "they are progressing."
+Azpurua said venezuela has shown solidarity with brazil's decision to suspend payments, but each country must negotiate according to its own interest.
+Asked to comment on chile's agreement with its creditors today, which includes an interest rate margin of one pct over libor, azpurua said only, "that is good news."
+According to banking sources, the banks' latest offer to venezuela is also a one pct margin as against the last february's 1-1/8 pct rescheduling accord and the 7/8 pct Venezuela wants.
+Azpurua said four basic elements are being negotiated with the banks now: spread reduction, deferral of principal payments due in 1987 and 1988, lenghtening the 12-1/2 year repayment schedule, and debt capitalization schemes.
+Azpurua said the governent plans to pay 2.1 billion dlrs in public and private debt principal this year.
+It was due to amortize 1.05 billion dlrs under the rescheduling, and pay 420 mln dlrs in non-restructured principal, both public sector.
+He said venezuela's original proposal was to pay no principal on restructured debt this year, but is now insisting that if it makes payments they be compensated by new bank loans.
+The banking sources said the committee has been prepared to lower amortizations to around 400 mln dlrs this year, but that no direct commitment was likely on new loans.
+"debtors and bank creditors have a joint responsibility and there will be no lasting solution unless a positive flow of financing is guaranteed," azpurua said.
+However, he appeared to discard earlier venezuelan proposals for a direct link between oil income and debt payments, "because circumstances change too quickly."
+At the same time, he said the government is presently studying possible mechanisms for capitlizing public and private sector foreign debt, based on experience in other countries.
+The rules would be published by the finance ministry and the central bank.
+
+Thomson McKinnon Mortgage Assets Corp, a unit of Thomson McKinnon Inc, is offering 100 mln dlrs of collateralized mortgage obligations in three tranches that include floating rate and inverse floating rate CMOS.
+The floating rate class amounts to 60 mln dlrs.
+It has an average life of 7.11 years and matures 2018.
+The CMOs have an initial coupon of 7.0375 pct, which will be reset 60 basis points above LIBOR, said sole manager Thomson McKinnon.
+The inverse floater totals 4.8 mln dlrs.
+It has an average life of 13.49 years and matures 2018.
+These CMOs were given an initial coupon of 11-1/2 pct and priced at 104.40.
+Subsequent rates on the inverse floater will equal 11-1/2 pct minus the product of three times (LIBOR minus 6-1/2 pct).
+A Thomson officer explained that the coupon of the inverse floating rate tranche would increase if LIBOR declined.
+"The yield floats opposite of LIBOR," he said.
+The fixed-rate tranche totals 35.2 mln dlrs.
+It has an average life of 3.5 years and matures 2016.
+The CMOs were assigned a 7.65 pct coupon and par pricing.
+The issue is rated AAA by Standard and Poor's and secured by Federal Home Loan Mortgage Corp, Freddie Mac, certificates.
+
+
+OPEC may be forced to meet before a scheduled June session to readdress its production cutting agreement if the organization wants to halt the current slide in oil prices, oil industry analysts said.
+"The movement to higher oil prices was never to be as easy as OPEC thought.
+They may need an emergency meeting to sort out the problems," said Daniel Yergin, director of Cambridge Energy Research Associates, CERA.
+Analysts and oil industry sources said the problem OPEC faces is excess oil supply in world oil markets.
+"OPEC's problem is not a price problem but a production issue and must be addressed in that way," said Paul Mlotok, oil analyst with Salomon Brothers Inc.
+He said the market's earlier optimism about OPEC and its ability to keep production under control have given way to a pessimistic outlook that the organization must address soon if it wishes to regain the initiative in oil prices.
+But some other analysts were uncertain that even an emergency meeting would address the problem of OPEC production above the 15.8 mln bpd quota set last December.
+"OPEC has to learn that in a buyers market you cannot have deemed quotas, fixed prices and set differentials," said the regional manager for one of the major oil companies who spoke on condition that he not be named.
+"The market is now trying to teach them that lesson again," he added.
+David T. Mizrahi, editor of Mideast reports, expects OPEC to meet before June, although not immediately.
+However, he is not optimistic that OPEC can address its principal problems.
+"They will not meet now as they try to take advantage of the winter demand to sell their oil, but in late March and April when demand slackens," Mizrahi said.
+But Mizrahi said that OPEC is unlikely to do anything more than reiterate its agreement to keep output at 15.8 mln bpd."
+Analysts said that the next two months will be critical for OPEC's ability to hold together prices and output.
+"OPEC must hold to its pact for the next six to eight weeks since buyers will come back into the market then," said Dillard Spriggs of Petroleum Analysis Ltd in New York.
+But Bijan Moussavar-Rahmani of Harvard University's Energy and Environment Policy Center said that the demand for OPEC oil has been rising through the first quarter and this may have prompted excesses in its production.
+"Demand for their (OPEC) oil is clearly above 15.8 mln bpd and is probably closer to 17 mln bpd or higher now so what we are seeing characterized as cheating is OPEC meeting this demand through current production," he told Reuters in a telephone interview.
+
+BankAmerica Corp is not under pressure to act quickly on its proposed equity offering and would do well to delay it because of the stock's recent poor performance, banking analysts said.
+Some analysts said they have recommended BankAmerica delay its up to one-billion-dlr equity offering, which has yet to be approved by the Securities and Exchange Commission.
+BankAmerica stock fell this week, along with other banking issues, on the news that Brazil has suspended interest payments on a large portion of its foreign debt.
+The stock traded around 12, down 1/8, this afternoon, after falling to 11-1/2 earlier this week on the news.
+Banking analysts said that with the immediate threat of the First Interstate Bancorp <I> takeover bid gone, BankAmerica is under no pressure to sell the securities into a market that will be nervous on bank stocks in the near term.
+BankAmerica filed the offer on January 26.
+It was seen as one of the major factors leading the First Interstate withdrawing its takeover bid on February 9.
+A BankAmerica spokesman said SEC approval is taking longer than expected and market conditions must now be re-evaluated.
+"The circumstances at the time will determine what we do," said Arthur Miller, BankAmerica's Vice President for Financial Communications, when asked if BankAmerica would proceed with the offer immediately after it receives SEC approval.
+"I'd put it off as long as they conceivably could," said Lawrence Cohn, analyst with Merrill Lynch, Pierce, Fenner and Smith.
+Cohn said the longer BankAmerica waits, the longer they have to show the market an improved financial outlook.
+Although BankAmerica has yet to specify the types of equities it would offer, most analysts believed a convertible preferred stock would encompass at least part of it.
+Such an offering at a depressed stock price would mean a lower conversion price and more dilution to BankAmerica stock holders, noted Daniel Williams, analyst with Sutro Group.
+Several analysts said that while they believe the Brazilian debt problem will continue to hang over the banking industry through the quarter, the initial shock reaction is likely to ease over the coming weeks.
+Nevertheless, BankAmerica, which holds about 2.70 billion dlrs in Brazilian loans, stands to lose 15-20 mln dlrs if the interest rate is reduced on the debt, and as much as 200 mln dlrs if Brazil pays no interest for a year, said Joseph Arsenio, analyst with Birr, Wilson and Co.
+He noted, however, that any potential losses would not show up in the current quarter.
+
+The Federal Deposit Insurance Corp (FDIC) said three troubled banks in Texas and Louisiana were merged with healthy financial institutions.
+The FDIC said it subsidized the merger of Central Bank and Trust Co, Glenmora, La., with the healthy Peoples Bank and Trust Co, Natchitoches, La., after state regulators notified it that Central was in danger of failing.
+Central had assets of 28.3 mln dlrs.
+The FDIC said the deposits of the failed Farmers State Bank, Hart, Tex., were assumed by Hale County State Bank, Plainview, Tex.
+Farmers, with 9.6 mln dlrs in assets, was closed by Texas bank regulators.
+The deposits of the failed First National Bank of Crosby, Crosby, Tex., with total assets of 8.2 mln dlrs, were assumed by Central Bancshares of the South Inc, Birmingham, Ala., after First National was closed by federal bank regulators, the FDIC said.
+Brazil's 14-bank advisory committee expressed "grave concern" to chief debt negotiator Antonio Padua de Seixas over the country's suspension of interest payments, according to a telex from committee chairman Citibank to creditor banks worldwide.
+Bankers said the diplomatic phrase belied the deep anger and frustration on the committee over Brazil's unilateral move last Friday and its subsequent freeze on some 15 billion dlrs of short-term trade and interbank lines.
+Seixas, director of the Brazilian central bank's foreign debt department, met the full panel on Tuesday and Wednesday.
+Seixas, who met again this morning with senior Citibank executive William Rhodes and representatives from committee vice-chairmen Morgan Guaranty Trust Co and Lloyds Bank Plc, told the banks that the government was preparing a telex to explain and clarify the freeze on short-term credits.
+The telex could be sent to creditors as early as today, bankers said.
+Despite the rising tempers, bankers said there are no plans for Brazilian finance minister Dilson Funaro to meet commercial bankers during his trip to Washington on Friday and Saturday.
+Funaro will be explaining Brazil's actions to U.S. Treasury Secretary James Baker, Federal Reserve Board chairman Paul Volcker and International Monetary Fund managing director Michel Camdessus before travelling to Europe at the weekend.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/tools/test-model-data/tokenizer.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/tokenizer.txt b/lucene/analysis/opennlp/src/tools/test-model-data/tokenizer.txt
new file mode 100644
index 0000000..e301d62
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/tokenizer.txt
@@ -0,0 +1,69 @@
+Iran announced tonight that its major offensive against Iraq in the Gulf war had ended after dealing savage blows against the Baghdad government<SPLIT>.
+The Iranian news agency IRNA<SPLIT>, in a report received in London<SPLIT>, said the operation code-named Karbala-5 launched into Iraq on January 9 was now over<SPLIT>.
+It quoted a joint statewment by the Iranian Army and Revolutionary Guards Corps as saying that their forces had "<SPLIT>dealt one of the severest blows on the Iraqi war machine in the history of the Iraq-imposed war<SPLIT>.<SPLIT>"
+The statement by the Iranian High Command appeared to herald the close of an assault on the port city of Basra in southern Iraq<SPLIT>.
+"<SPLIT>The operation was launched at a time when the Baghdad government was spreading extensive propaganda on the resistance power of its army<SPLIT>...<SPLIT>,<SPLIT>" said the statement quoted by IRNA<SPLIT>.
+It claimed massive victories in the seven-week offensive and called on supporters of Baghdad to "<SPLIT>come to their senses<SPLIT>" and discontinue support for what it called the tottering regime in Iraq<SPLIT>.
+Iran said its forces had "<SPLIT>liberated<SPLIT>" 155 square kilometers of enemy-occupied territory during the 1987 offensive and taken over islands<SPLIT>, townships<SPLIT>, rivers and part of a road leading into Basra<SPLIT>.
+The Iranian forces "<SPLIT>are in full control of these areas<SPLIT>,<SPLIT>" the statement said<SPLIT>.
+It said 81 Iraqi brigades and battalions were totally destroyed<SPLIT>, along with 700 tanks and 1,500 other vehicles<SPLIT>.
+
+U.S. bank discount window borrowings less extended credits averaged 310 mln dlrs in the week to Wednesday February 25<SPLIT>, the Federal Reserve said<SPLIT>.
+The Fed said that overall borrowings in the week fell 131 mln dlrs to 614 mln dlrs<SPLIT>, with extended credits up 10 mln dlrs at 304 mln dlrs<SPLIT>.
+The week was the second half of a two-week statement period<SPLIT>.
+Net borrowings in the prior week averaged 451 mln dlrs<SPLIT>.
+Commenting on the two-week statement period ended February 25<SPLIT>, the Fed said that banks had average net free reserves of 644 mln dlrs a day<SPLIT>, down from 1.34 billion two weeks earlier<SPLIT>.
+A Federal Reserve spokesman told a press briefing that there were no large single day net misses in the Fed's reserve projections in the week to Wednesday<SPLIT>.
+He said that natural float had been "<SPLIT>acting a bit strangely<SPLIT>" for this time of year<SPLIT>, noting that there had been poor weather during the latest week<SPLIT>.
+The spokesman said that natural float ranged from under 500 mln dlrs on Friday<SPLIT>, for which he could give no reason<SPLIT>, to nearly one billion dlrs on both Thursday and Wednesday<SPLIT>.
+The Fed spokeman could give no reason for Thursday's high float<SPLIT>, but he said that about 750 mln dlrs of Wednesday's float figure was due to holdover and transportation float at two widely separated Fed districts<SPLIT>.
+For the week as a whole<SPLIT>, he said that float related as of adjustments were "<SPLIT>small<SPLIT>,<SPLIT>" adding that they fell to a negative 750 mln dlrs on Tuesday due to a number of corrections for unrelated cash letter errors in six districts around the country<SPLIT>.
+The spokesman said that on both Tuesday and Wednesday<SPLIT>, two different clearing banks had system problems and the securities and Federal funds wires had to be held open until about 2000 or 2100 EST on both days<SPLIT>.
+However<SPLIT>, he said that both problems were cleared up during both afternoons and there was no evidence of any reserve impact<SPLIT>.
+During the week ended Wednesday<SPLIT>, 45 pct of net discount window borrowings were made by the smallest banks<SPLIT>, with 30 pct by the 14 large money center banks and 25 pct by large regional institutions<SPLIT>.
+On Wednesday<SPLIT>, 55 pct of the borrowing was accounted for by the money center banks<SPLIT>, with 30 pct by the large regionals and 15 pct by the smallest banks<SPLIT>.
+The Fed spokesman said the banking system had excess reserves on Thursday<SPLIT>, Monday and Tuesday and a deficit on Friday and Wedndsday<SPLIT>.
+That produced a small daily average deficit for the week as a whole<SPLIT>.
+For the two-week period<SPLIT>, he said there were relatively high excess reserves on a daily avearge<SPLIT>, almost all of which were at the smallest banks<SPLIT>.
+American Express Co remained silent on market rumors it would spinoff all or part of its Shearson Lehman Brothers Inc<SPLIT>, but some analysts said the company may be considering such a move because it is unhappy with the market value of its stock<SPLIT>.
+American Express stock got a lift from the rumor<SPLIT>, as the market calculated a partially public Shearson may command a good market value<SPLIT>, thereby boosting the total value of American Express<SPLIT>.
+The rumor also was accompanied by talk the financial services firm would split its stock and boost its dividend<SPLIT>.
+American Express closed on the New York Stock Exchange at 72-5/8<SPLIT>, up 4-1/8 on heavy volume<SPLIT>.
+American Express would not comment on the rumors or its stock activity<SPLIT>.
+Analysts said comments by the company at an analysts' meeting Tuesday helped fuel the rumors as did an announcement yesterday of management changes<SPLIT>.
+At the meeting<SPLIT>, company officials said American Express stock is undervalued and does not fully reflect the performance of Shearson<SPLIT>, according to analysts<SPLIT>.
+Yesterday<SPLIT>, Shearson said it was elevating its chief operating officer<SPLIT>, Jeffery Lane<SPLIT>, to the added position of president<SPLIT>, which had been vacant<SPLIT>.
+It also created four new positions for chairmen of its operating divisions<SPLIT>.
+Analysts speculated a partial spinoff would make most sense<SPLIT>, contrary to one variation on market rumors of a total spinoff<SPLIT>.
+Some analysts<SPLIT>, however<SPLIT>, disagreed that any spinoff of Shearson would be good since it is a strong profit center for American Express<SPLIT>, contributing about 20 pct of earnings last year<SPLIT>.
+"<SPLIT>I think it is highly unlikely that American Express is going to sell shearson<SPLIT>,<SPLIT>" said Perrin Long of Lipper Analytical<SPLIT>.
+He questioned what would be a better investment than "<SPLIT>a very profitable securities firm<SPLIT>.<SPLIT>"
+Several analysts said American Express is not in need of cash<SPLIT>, which might be the only reason to sell a part of a strong asset<SPLIT>.
+But others believe the company could very well of considered the option of spinning out part of Shearson<SPLIT>, and one rumor suggests selling about 20 pct of it in the market<SPLIT>.
+Larry Eckenfelder of Prudential-Bache Securities said he believes American Express could have considered a partial spinoff in the past<SPLIT>.
+"<SPLIT>Shearson being as profitable as it is would have fetched a big premium in the market place<SPLIT>.
+Some analysts said American Express could use capital since it plans to expand globally<SPLIT>.
+"<SPLIT>They've outlined the fact that they're investing heavily in the future<SPLIT>, which goes heavily into the international arena<SPLIT>,<SPLIT>" said Lewis<SPLIT>.
+Lewis said if American Express reduced its exposure to the brokerage business by selling part of shearson<SPLIT>, its stock might better reflect other assets<SPLIT>, such as the travel related services business<SPLIT>.
+Lewis said Shearson contributed 316 mln in after-tax operating earnings<SPLIT>, up from about 200 mln dlrs in 1985<SPLIT>.
+Coleco Industries Inc said it expects to return to profitability in 1987<SPLIT>.
+Earlier<SPLIT>, Coleco reported a net loss of 111.2 mln dlrs for the year ended December 31 compared to a profit of 64.2 mln dlrs in the year earlier<SPLIT>.
+In a prepared statement<SPLIT>, the company said the dramatic swing in operating results was due primarily to the steep decline in sales of Cabbage Patch Kids products from 600 mln dlrs to 230 mln dlrs<SPLIT>.
+Coleco said it changed from a single product company to a more diversified organization through four major acquisitions last year<SPLIT>.
+Products from the new acquisitions and other new product introductions are expected to enable it to return to profitability<SPLIT>, it said<SPLIT>.
+At the annual Toy Fair earlier this month<SPLIT>, vice president Morton Handel said analysts' 1987 projected earnings of 90 cts a share on sales of 600 mln dlrs are reasonable<SPLIT>.
+Azpurua said venezuela has shown solidarity with brazil's decision to suspend payments<SPLIT>, but each country must negotiate according to its own interest<SPLIT>.
+Azpurua said the governent plans to pay 2.1 billion dlrs in public and private debt principal this year<SPLIT>.
+It was due to amortize 1.05 billion dlrs under the rescheduling<SPLIT>, and pay 420 mln dlrs in non-restructured principal<SPLIT>, both public sector<SPLIT>.
+He said venezuela's original proposal was to pay no principal on restructured debt this year<SPLIT>, but is now insisting that if it makes payments they be compensated by new bank loans<SPLIT>.
+The banking sources said the committee has been prepared to lower amortizations to around 400 mln dlrs this year<SPLIT>, but that no direct commitment was likely on new loans<SPLIT>.
+At the same time<SPLIT>, he said the government is presently studying possible mechanisms for capitlizing public and private sector foreign debt<SPLIT>, based on experience in other countries<SPLIT>.
+The rules would be published by the finance ministry and the central bank<SPLIT>.
+
+Thomson McKinnon Mortgage Assets Corp<SPLIT>, a unit of Thomson McKinnon Inc<SPLIT>, is offering 100 mln dlrs of collateralized mortgage obligations in three tranches that include floating rate and inverse floating rate CMOS<SPLIT>.
+The floating rate class amounts to 60 mln dlrs<SPLIT>.
+The inverse floater totals 4.8 mln dlrs<SPLIT>.
+Subsequent rates on the inverse floater will equal 11-1/2 pct minus the product of three times (<SPLIT>LIBOR minus 6-1/2 pct<SPLIT>)<SPLIT>.
+A Thomson officer explained that the coupon of the inverse floating rate tranche would increase if LIBOR declined<SPLIT>.
+The fixed-rate tranche totals 35.2 mln dlrs<SPLIT>.
+The issue is rated AAA by Standard and Poor's and secured by Federal Home Loan Mortgage Corp<SPLIT>, Freddie Mac<SPLIT>, certificates<SPLIT>.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java b/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
index 3e26965..f17cd51 100644
--- a/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
@@ -20,12 +20,8 @@ import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.English;
 
 public class TestStopFilter extends BaseTokenStreamTestCase {
@@ -111,9 +107,10 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
                               7,
                               1,
                               null,
-                              true);    
+                              true,
+                              null);
   }
-  
+
   private void doTestStopPositons(StopFilter stpf) throws IOException {
     CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
     PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/ivy-versions.properties
----------------------------------------------------------------------
diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties
index 2478f85..35df7ae 100644
--- a/lucene/ivy-versions.properties
+++ b/lucene/ivy-versions.properties
@@ -161,6 +161,9 @@ org.apache.james.apache.mime4j.version = 0.7.2
 
 /org.apache.mina/mina-core = 2.0.0-M5
 
+/org.apache.opennlp/opennlp-maxent = 3.0.3
+/org.apache.opennlp/opennlp-tools = 1.8.3
+
 org.apache.pdfbox.version = 2.0.6
 /org.apache.pdfbox/fontbox = ${org.apache.pdfbox.version}
 /org.apache.pdfbox/jempbox = 1.8.13

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/licenses/opennlp-maxent-3.0.3.jar.sha1
----------------------------------------------------------------------
diff --git a/lucene/licenses/opennlp-maxent-3.0.3.jar.sha1 b/lucene/licenses/opennlp-maxent-3.0.3.jar.sha1
new file mode 100644
index 0000000..c3c412f
--- /dev/null
+++ b/lucene/licenses/opennlp-maxent-3.0.3.jar.sha1
@@ -0,0 +1 @@
+55e39e6b46e71f35229cdd6950e72d8cce3b5fd4

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/licenses/opennlp-maxent-LICENSE-ASL.txt
----------------------------------------------------------------------
diff --git a/lucene/licenses/opennlp-maxent-LICENSE-ASL.txt b/lucene/licenses/opennlp-maxent-LICENSE-ASL.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/lucene/licenses/opennlp-maxent-LICENSE-ASL.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/licenses/opennlp-maxent-NOTICE.txt
----------------------------------------------------------------------
diff --git a/lucene/licenses/opennlp-maxent-NOTICE.txt b/lucene/licenses/opennlp-maxent-NOTICE.txt
new file mode 100644
index 0000000..9b97287
--- /dev/null
+++ b/lucene/licenses/opennlp-maxent-NOTICE.txt
@@ -0,0 +1,6 @@
+
+Apache OpenNLP Maxent
+Copyright 2013 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/licenses/opennlp-tools-1.8.3.jar.sha1
----------------------------------------------------------------------
diff --git a/lucene/licenses/opennlp-tools-1.8.3.jar.sha1 b/lucene/licenses/opennlp-tools-1.8.3.jar.sha1
new file mode 100644
index 0000000..c6a7549
--- /dev/null
+++ b/lucene/licenses/opennlp-tools-1.8.3.jar.sha1
@@ -0,0 +1 @@
+3ce7c9056048f55478d983248cf18c7e02b1d072


[05/12] lucene-solr:master: LUCENE-2899: Add OpenNLP Analysis capabilities as a module

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/package-info.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/package-info.java
new file mode 100644
index 0000000..527e24f
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Analysis components based on OpenNLP
+ */
+package org.apache.lucene.analysis.opennlp;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPChunkerOp.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPChunkerOp.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPChunkerOp.java
new file mode 100644
index 0000000..f6a5ea8
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPChunkerOp.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import java.io.IOException;
+import opennlp.tools.chunker.ChunkerME;
+import opennlp.tools.chunker.ChunkerModel;
+
+/**
+ * Supply OpenNLP Chunking tool
+ * Requires binary models from OpenNLP project on SourceForge.
+ */
+public class NLPChunkerOp {
+  private ChunkerME chunker = null;
+
+  public NLPChunkerOp(ChunkerModel chunkerModel) throws IOException {
+    chunker = new ChunkerME(chunkerModel);
+  }
+
+  public synchronized String[] getChunks(String[] words, String[] tags, double[] probs) {
+    String[] chunks = chunker.chunk(words, tags);
+    if (probs != null)
+      chunker.probs(probs);
+    return chunks;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPLemmatizerOp.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPLemmatizerOp.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPLemmatizerOp.java
new file mode 100644
index 0000000..b09c63e
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPLemmatizerOp.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import opennlp.tools.lemmatizer.DictionaryLemmatizer;
+import opennlp.tools.lemmatizer.LemmatizerME;
+import opennlp.tools.lemmatizer.LemmatizerModel;
+
+/**
+ * <p>Supply OpenNLP Lemmatizer tools.</p>
+ * <p>
+ *   Both a dictionary-based lemmatizer and a MaxEnt lemmatizer are supported.
+ *   If both are configured, the dictionary-based lemmatizer is tried first,
+ *   and then the MaxEnt lemmatizer is consulted for out-of-vocabulary tokens.
+ * </p>
+ * <p>
+ *   The MaxEnt implementation requires binary models from OpenNLP project on SourceForge.
+ * </p>
+ */
+public class NLPLemmatizerOp {
+  private final DictionaryLemmatizer dictionaryLemmatizer;
+  private final LemmatizerME lemmatizerME;
+
+  public NLPLemmatizerOp(InputStream dictionary, LemmatizerModel lemmatizerModel) throws IOException {
+    assert dictionary != null || lemmatizerModel != null : "At least one parameter must be non-null";
+    dictionaryLemmatizer = dictionary == null ? null : new DictionaryLemmatizer(dictionary);
+    lemmatizerME = lemmatizerModel == null ? null : new LemmatizerME(lemmatizerModel);
+  }
+
+  public String[] lemmatize(String[] words, String[] postags) {
+    String[] lemmas = null;
+    String[] maxEntLemmas = null;
+    if (dictionaryLemmatizer != null) {
+      lemmas = dictionaryLemmatizer.lemmatize(words, postags);
+      for (int i = 0; i < lemmas.length; ++i) {
+        if (lemmas[i].equals("O")) {   // this word is not in the dictionary
+          if (lemmatizerME != null) {  // fall back to the MaxEnt lemmatizer if it's enabled
+            if (maxEntLemmas == null) {
+              maxEntLemmas = lemmatizerME.lemmatize(words, postags);
+            }
+            if ("_".equals(maxEntLemmas[i])) {
+              lemmas[i] = words[i];    // put back the original word if no lemma is found
+            } else {
+              lemmas[i] = maxEntLemmas[i];
+            }
+          } else {                     // there is no MaxEnt lemmatizer
+            lemmas[i] = words[i];      // put back the original word if no lemma is found
+          }
+        }
+      }
+    } else {                           // there is only a MaxEnt lemmatizer
+      maxEntLemmas = lemmatizerME.lemmatize(words, postags);
+      for (int i = 0 ; i < maxEntLemmas.length ; ++i) {
+        if ("_".equals(maxEntLemmas[i])) {
+          maxEntLemmas[i] = words[i];  // put back the original word if no lemma is found
+        }
+      }
+      lemmas = maxEntLemmas;
+    }
+    return lemmas;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPNERTaggerOp.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPNERTaggerOp.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPNERTaggerOp.java
new file mode 100644
index 0000000..22e617d
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPNERTaggerOp.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinder;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.util.Span;
+
+/**
+ * Supply OpenNLP Named Entity Resolution tool
+ * Requires binary models from OpenNLP project on SourceForge.
+ *
+ * Usage: from <a href="http://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.namefind.recognition.api"
+ *             >the OpenNLP documentation</a>:
+ *
+ * "The NameFinderME class is not thread safe, it must only be called from one thread.
+ * To use multiple threads multiple NameFinderME instances sharing the same model instance
+ * can be created. The input text should be segmented into documents, sentences and tokens.
+ * To perform entity detection an application calls the find method for every sentence in
+ * the document. After every document clearAdaptiveData must be called to clear the adaptive
+ * data in the feature generators. Not calling clearAdaptiveData can lead to a sharp drop
+ * in the detection rate after a few documents."
+ *
+ */
+public class NLPNERTaggerOp {
+  private final TokenNameFinder nameFinder;
+
+  public NLPNERTaggerOp(TokenNameFinderModel model) {
+    this.nameFinder = new NameFinderME(model);
+  }
+
+  public Span[] getNames(String[] words) {
+    Span[] names = nameFinder.find(words);
+    return names;
+  }
+
+  public synchronized void reset() {
+    nameFinder.clearAdaptiveData();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java
new file mode 100644
index 0000000..447e1c0
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import java.io.IOException;
+
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSTagger;
+import opennlp.tools.postag.POSTaggerME;
+
+/**
+ * Supply OpenNLP Parts-Of-Speech Tagging tool
+ * Requires binary models from OpenNLP project on SourceForge.
+ */
+
+public class NLPPOSTaggerOp {
+  private POSTagger tagger = null;
+
+  public NLPPOSTaggerOp(POSModel model) throws IOException {
+    tagger = new POSTaggerME(model);
+  }
+
+  public synchronized String[] getPOSTags(String[] words) {
+    return tagger.tag(words);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPSentenceDetectorOp.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPSentenceDetectorOp.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPSentenceDetectorOp.java
new file mode 100644
index 0000000..21983ce
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPSentenceDetectorOp.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import java.io.IOException;
+
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.util.Span;
+
+/**
+ * Supply OpenNLP Sentence Detector tool
+ * Requires binary models from OpenNLP project on SourceForge.
+ */
+public class NLPSentenceDetectorOp {
+  private final SentenceDetectorME sentenceSplitter;
+
+  public NLPSentenceDetectorOp(SentenceModel model) throws IOException {
+    sentenceSplitter  = new SentenceDetectorME(model);
+  }
+
+  public NLPSentenceDetectorOp() {
+    sentenceSplitter = null;
+  }
+
+  public synchronized Span[] splitSentences(String line) {
+    if (sentenceSplitter != null) {
+      return sentenceSplitter.sentPosDetect(line);
+    } else {
+      Span[] shorty = new Span[1];
+      shorty[0] = new Span(0, line.length());
+      return shorty;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPTokenizerOp.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPTokenizerOp.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPTokenizerOp.java
new file mode 100644
index 0000000..0aeb713
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPTokenizerOp.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.Span;
+
+/**
+ * Supply OpenNLP Sentence Tokenizer tool
+ * Requires binary models from OpenNLP project on SourceForge.
+ */
+public class NLPTokenizerOp {
+  private final Tokenizer tokenizer;
+
+  public NLPTokenizerOp(TokenizerModel model) {
+    tokenizer = new TokenizerME(model);
+  }
+
+  public NLPTokenizerOp() {
+    tokenizer = null;
+  }
+
+  public synchronized Span[] getTerms(String sentence) {
+    if (tokenizer == null) {
+      Span[] span1 = new Span[1];
+      span1[0] = new Span(0, sentence.length());
+      return span1;
+    }
+    return tokenizer.tokenizePos(sentence);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/OpenNLPOpsFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/OpenNLPOpsFactory.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/OpenNLPOpsFactory.java
new file mode 100644
index 0000000..5348857
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/OpenNLPOpsFactory.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import opennlp.tools.chunker.ChunkerModel;
+import opennlp.tools.lemmatizer.LemmatizerModel;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.TokenizerModel;
+import org.apache.lucene.analysis.util.ResourceLoader;
+
+/**
+ * Supply OpenNLP Named Entity Recognizer
+ * Cache model file objects. Assumes model files are thread-safe.
+ */
+public class OpenNLPOpsFactory {
+  private static Map<String,SentenceModel> sentenceModels = new ConcurrentHashMap<>();
+  private static ConcurrentHashMap<String,TokenizerModel> tokenizerModels = new ConcurrentHashMap<>();
+  private static ConcurrentHashMap<String,POSModel> posTaggerModels = new ConcurrentHashMap<>();
+  private static ConcurrentHashMap<String,ChunkerModel> chunkerModels = new ConcurrentHashMap<>();
+  private static Map<String,TokenNameFinderModel> nerModels = new ConcurrentHashMap<>();
+  private static Map<String,LemmatizerModel> lemmatizerModels = new ConcurrentHashMap<>();
+  private static Map<String,String> lemmaDictionaries = new ConcurrentHashMap<>();
+
+  public static NLPSentenceDetectorOp getSentenceDetector(String modelName) throws IOException {
+    if (modelName != null) {
+      SentenceModel model = sentenceModels.get(modelName);
+      return new NLPSentenceDetectorOp(model);
+    } else {
+      return new NLPSentenceDetectorOp();
+    }
+  }
+
+  public static SentenceModel getSentenceModel(String modelName, ResourceLoader loader) throws IOException {
+    SentenceModel model = sentenceModels.get(modelName);
+    if (model == null) {
+      model = new SentenceModel(loader.openResource(modelName));
+      sentenceModels.put(modelName, model);
+    }
+    return model;
+  }
+
+  public static NLPTokenizerOp getTokenizer(String modelName) throws IOException {
+    if (modelName == null) {
+      return new NLPTokenizerOp();
+    } else {
+      TokenizerModel model = tokenizerModels.get(modelName);
+      return new NLPTokenizerOp(model);
+    }
+  }
+
+  public static TokenizerModel getTokenizerModel(String modelName, ResourceLoader loader) throws IOException {
+    TokenizerModel model = tokenizerModels.get(modelName);
+    if (model == null) {
+      model = new TokenizerModel(loader.openResource(modelName));
+      tokenizerModels.put(modelName, model);
+    }
+    return model;
+  }
+
+  public static NLPPOSTaggerOp getPOSTagger(String modelName) throws IOException {
+    POSModel model = posTaggerModels.get(modelName);
+    return new NLPPOSTaggerOp(model);
+  }
+
+  public static POSModel getPOSTaggerModel(String modelName, ResourceLoader loader) throws IOException {
+    POSModel model = posTaggerModels.get(modelName);
+    if (model == null) {
+      model = new POSModel(loader.openResource(modelName));
+      posTaggerModels.put(modelName, model);
+    }
+    return model;
+  }
+
+  public static NLPChunkerOp getChunker(String modelName) throws IOException {
+    ChunkerModel model = chunkerModels.get(modelName);
+    return new NLPChunkerOp(model);
+  }
+
+  public static ChunkerModel getChunkerModel(String modelName, ResourceLoader loader) throws IOException {
+    ChunkerModel model = chunkerModels.get(modelName);
+    if (model == null) {
+      model = new ChunkerModel(loader.openResource(modelName));
+      chunkerModels.put(modelName, model);
+    }
+    return model;
+  }
+
+  public static NLPNERTaggerOp getNERTagger(String modelName) throws IOException {
+    TokenNameFinderModel model = nerModels.get(modelName);
+    return new NLPNERTaggerOp(model);
+  }
+
+  public static TokenNameFinderModel getNERTaggerModel(String modelName, ResourceLoader loader) throws IOException {
+    TokenNameFinderModel model = nerModels.get(modelName);
+    if (model == null) {
+      model = new TokenNameFinderModel(loader.openResource(modelName));
+      nerModels.put(modelName, model);
+    }
+    return model;
+  }
+
+  public static NLPLemmatizerOp getLemmatizer(String dictionaryFile, String lemmatizerModelFile) throws IOException {
+    assert dictionaryFile != null || lemmatizerModelFile != null : "At least one parameter must be non-null";
+    InputStream dictionaryInputStream = null;
+    if (dictionaryFile != null) {
+      String dictionary = lemmaDictionaries.get(dictionaryFile);
+      dictionaryInputStream = new ByteArrayInputStream(dictionary.getBytes(StandardCharsets.UTF_8));
+    }
+    LemmatizerModel lemmatizerModel = lemmatizerModelFile == null ? null : lemmatizerModels.get(lemmatizerModelFile);
+    return new NLPLemmatizerOp(dictionaryInputStream, lemmatizerModel);
+  }
+
+  public static String getLemmatizerDictionary(String dictionaryFile, ResourceLoader loader) throws IOException {
+    String dictionary = lemmaDictionaries.get(dictionaryFile);
+    if (dictionary == null) {
+      Reader reader = new InputStreamReader(loader.openResource(dictionaryFile), StandardCharsets.UTF_8);
+      StringBuilder builder = new StringBuilder();
+      char[] chars = new char[8092];
+      int numRead = 0;
+      do {
+        numRead = reader.read(chars, 0, chars.length);
+        if (numRead > 0) {
+          builder.append(chars, 0, numRead);
+        }
+      } while (numRead > 0);
+      dictionary = builder.toString();
+      lemmaDictionaries.put(dictionaryFile, dictionary);
+    }
+    return dictionary;
+  }
+
+  public static LemmatizerModel getLemmatizerModel(String modelName, ResourceLoader loader) throws IOException {
+    LemmatizerModel model = lemmatizerModels.get(modelName);
+    if (model == null) {
+      model = new LemmatizerModel(loader.openResource(modelName));
+      lemmatizerModels.put(modelName, model);
+    }
+    return model;
+  }
+
+  // keeps unit test from blowing out memory
+  public static void clearModels() {
+    sentenceModels.clear();
+    tokenizerModels.clear();
+    posTaggerModels.clear();
+    chunkerModels.clear();
+    nerModels.clear();
+    lemmaDictionaries.clear();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/package-info.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/package-info.java
new file mode 100644
index 0000000..523a084
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tools to supply access to OpenNLP components.
+ */
+package org.apache.lucene.analysis.opennlp.tools;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/overview.html
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/overview.html b/lucene/analysis/opennlp/src/java/overview.html
new file mode 100644
index 0000000..bf70e95
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/overview.html
@@ -0,0 +1,61 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+  <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <title>
+    Apache Lucene OpenNLP integration module
+  </title>
+</head>
+<body>
+<p>
+  This module exposes functionality from
+  <a href="http://opennlp.apache.org">Apache OpenNLP</a> to Apache Lucene.
+  The Apache OpenNLP library is a machine learning based toolkit for the processing of natural language text.
+<p>
+  For an introduction to Lucene's analysis API, see the {@link org.apache.lucene.analysis} package documentation.
+<p>
+  The OpenNLP Tokenizer behavior is similar to the WhiteSpaceTokenizer but is smart about
+  inter-word punctuation. The term stream looks very much like the way you parse words and
+  punctuation while reading.  The major difference between this tokenizer and most other
+  tokenizers shipped with Lucene is that punctuation is tokenized.  This is required for
+  the following taggers to operate properly.
+<p>
+  The OpenNLP taggers annotate terms using the <code>TypeAttribute</code>.
+<ul>
+  <li><code>OpenNLPTokenizer</code> segments text into sentences or words. This Tokenizer
+    uses the OpenNLP Sentence Detector and/or Tokenizer classes.  When used together, the
+    Tokenizer receives sentences and can do a better job.</li>
+  <li><code>OpenNLPFilter</code> tags words using one or more technologies: Part-of-Speech,
+    Chunking, and Named Entity Recognition.  These tags are assigned as token types.  Note that
+    only of these operations will tag
+  </li>
+</ul>
+<p>
+  Since the <code>TypeAttribute</code> is not stored in the index, it is recommended that one
+  of these filters is used following <code>OpenNLPFilter</code> to enable search against the
+  assigned tags:
+<ul>
+  <li><code>TypeAsPayloadFilter</code> copies the <code>TypeAttribute</code> value to the
+    <code>PayloadAttribute</code></li>
+  <li><code>TypeAsSynonymFilter</code> creates a cloned token at the same position as each
+    tagged token, and copies the {{TypeAttribute}} value to the {{CharTermAttribute}}, optionally
+    with a customized prefix (so that tags effectively occupy a different namespace from token
+    text).</li>
+</ul>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory b/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
new file mode 100644
index 0000000..61a685d
--- /dev/null
+++ b/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
@@ -0,0 +1,18 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.lucene.analysis.opennlp.OpenNLPChunkerFilterFactory
+org.apache.lucene.analysis.opennlp.OpenNLPLemmatizerFilterFactory
+org.apache.lucene.analysis.opennlp.OpenNLPPOSFilterFactory

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory b/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
new file mode 100644
index 0000000..076b308
--- /dev/null
+++ b/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.lucene.analysis.opennlp.OpenNLPTokenizerFactory

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-chunker.bin
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-chunker.bin b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-chunker.bin
new file mode 100644
index 0000000..8151914
Binary files /dev/null and b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-chunker.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmas.dict
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmas.dict b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmas.dict
new file mode 100644
index 0000000..d1d486c
--- /dev/null
+++ b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmas.dict
@@ -0,0 +1,12 @@
+they	NNP	they
+sent	VBD	send
+him	PRP	he
+running	VBG	run
+in	IN	in
+the	DT	the
+evening	NN	evening
+he	PRP	he
+did	VBD	do
+not	RB	not
+come	VB	come
+back	RB	back

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmatizer.bin
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmatizer.bin b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmatizer.bin
new file mode 100644
index 0000000..e62df7e
Binary files /dev/null and b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmatizer.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-ner-person.bin
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-ner-person.bin b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-ner-person.bin
new file mode 100644
index 0000000..0b40aac
Binary files /dev/null and b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-ner-person.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-pos-maxent.bin
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-pos-maxent.bin b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-pos-maxent.bin
new file mode 100644
index 0000000..b77fb46
Binary files /dev/null and b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-pos-maxent.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-sent.bin
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-sent.bin b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-sent.bin
new file mode 100644
index 0000000..4252bcb
Binary files /dev/null and b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-sent.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-tokenizer.bin
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-tokenizer.bin b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-tokenizer.bin
new file mode 100644
index 0000000..94668c0
Binary files /dev/null and b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-tokenizer.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPChunkerFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPChunkerFilterFactory.java b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPChunkerFilterFactory.java
new file mode 100644
index 0000000..013348c
--- /dev/null
+++ b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPChunkerFilterFactory.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.custom.CustomAnalyzer;
+import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilterFactory;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
+
+/**
+ * Needs the OpenNLP Tokenizer because it creates full streams of punctuation.
+ * Needs the OpenNLP POS tagger for the POS tags.
+ *
+ * Tagging models are created from tiny test data in opennlp/tools/test-model-data/ and are not very accurate.
+ */
+public class TestOpenNLPChunkerFilterFactory extends BaseTokenStreamTestCase {
+
+  private static final String SENTENCES = "Sentence number 1 has 6 words. Sentence number 2, 5 words.";
+  private static final String[] SENTENCES_punc
+      = {"Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "."};
+  private static final int[] SENTENCES_startOffsets = {0, 9, 16, 18, 22, 24, 29, 31, 40, 47, 48, 50, 52, 57};
+  private static final int[] SENTENCES_endOffsets = {8, 15, 17, 21, 23, 29, 30, 39, 46, 48, 49, 51, 57, 58};
+  private static final String[] SENTENCES_chunks
+      = { "B-NP", "I-NP", "I-NP", "B-VP", "B-NP", "I-NP", "O", "B-NP", "I-NP", "I-NP", "O", "B-NP", "I-NP", "O" };
+
+  private static final String sentenceModelFile = "en-test-sent.bin";
+  private static final String tokenizerModelFile = "en-test-tokenizer.bin";
+  private static final String posTaggerModelFile = "en-test-pos-maxent.bin";
+  private static final String chunkerModelFile = "en-test-chunker.bin";
+
+
+  private static byte[][] toPayloads(String... strings) {
+    return Arrays.stream(strings).map(s -> s == null ? null : s.getBytes(StandardCharsets.UTF_8)).toArray(byte[][]::new);
+  }
+
+  public void testBasic() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+        SENTENCES_chunks, null, null, true);
+  }
+
+  public void testPayloads() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
+        .addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+        null, null, null, true, toPayloads(SENTENCES_chunks));
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPLemmatizerFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPLemmatizerFilterFactory.java b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPLemmatizerFilterFactory.java
new file mode 100644
index 0000000..0491b91
--- /dev/null
+++ b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPLemmatizerFilterFactory.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.custom.CustomAnalyzer;
+import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory;
+import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilterFactory;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
+
+public class TestOpenNLPLemmatizerFilterFactory extends BaseTokenStreamTestCase {
+
+  private static final String SENTENCE = "They sent him running in the evening.";
+  private static final String[] SENTENCE_dict_punc =   {"they", "send", "he",  "run",  "in", "the", "evening", "."};
+  private static final String[] SENTENCE_maxent_punc = {"they", "send", "he",  "runn", "in", "the", "evening", "."};
+  private static final String[] SENTENCE_posTags =     {"NNP",  "VBD",  "PRP", "VBG",  "IN", "DT",  "NN",      "."};
+
+  private static final String SENTENCES = "They sent him running in the evening. He did not come back.";
+  private static final String[] SENTENCES_dict_punc
+      = {"they", "send", "he",  "run",  "in", "the", "evening", ".", "he",  "do",  "not", "come", "back", "."};
+  private static final String[] SENTENCES_maxent_punc
+      = {"they", "send", "he",  "runn", "in", "the", "evening", ".", "he",  "do",  "not", "come", "back", "."};
+  private static final String[] SENTENCES_posTags
+      = {"NNP",  "VBD",  "PRP", "VBG",  "IN", "DT",  "NN",      ".", "PRP", "VBD", "RB",  "VB",   "RB",   "."};
+
+  private static final String SENTENCE_both = "Konstantin Kalashnitsov constantly caliphed.";
+  private static final String[] SENTENCE_both_punc
+      = {"konstantin", "kalashnitsov", "constantly", "caliph", "."};
+  private static final String[] SENTENCE_both_posTags
+      = {"IN",         "JJ",          "NN",          "VBN",    "."};
+
+  private static final String SENTENCES_both = "Konstantin Kalashnitsov constantly caliphed. Coreena could care, completely.";
+  private static final String[] SENTENCES_both_punc
+      = {"konstantin", "kalashnitsov", "constantly", "caliph", ".", "coreena", "could", "care", ",", "completely", "."};
+  private static final String[] SENTENCES_both_posTags
+      = {"IN",         "JJ",           "NN",          "VBN",    ".", "NNP",     "VBN",   "NN",   ",", "NN",         "."};
+
+  private static final String[] SENTENCES_dict_keep_orig_punc
+      = {"They", "they", "sent", "send", "him", "he", "running", "run",  "in", "the", "evening", ".", "He", "he",   "did", "do", "not", "come", "back", "."};
+  private static final String[] SENTENCES_max_ent_keep_orig_punc
+      = {"They", "they", "sent", "send", "him", "he", "running", "runn", "in", "the", "evening", ".", "He", "he",   "did", "do", "not", "come", "back", "."};
+  private static final String[] SENTENCES_keep_orig_posTags
+      = {"NNP",  "NNP",  "VBD",  "VBD",  "PRP", "PRP", "VBG",    "VBG",  "IN", "DT",  "NN",      ".", "PRP", "PRP", "VBD", "VBD", "RB",  "VB",  "RB",   "."};
+
+  private static final String[] SENTENCES_both_keep_orig_punc
+      = {"Konstantin", "konstantin", "Kalashnitsov", "kalashnitsov", "constantly", "caliphed", "caliph", ".", "Coreena", "coreena", "could", "care", ",", "completely", "."};
+  private static final String[] SENTENCES_both_keep_orig_posTags
+      = {"IN",         "IN",         "JJ",           "JJ",           "NN",         "VBN",      "VBN",    ".", "NNP",     "NNP",     "VBN",   "NN",   ",", "NN",         "."};
+
+
+  private static final String tokenizerModelFile = "en-test-tokenizer.bin";
+  private static final String sentenceModelFile = "en-test-sent.bin";
+  private static final String posTaggerModelFile = "en-test-pos-maxent.bin";
+  private static final String lemmatizerModelFile = "en-test-lemmatizer.bin";
+  private static final String lemmatizerDictFile = "en-test-lemmas.dict";
+
+
+  public void test1SentenceDictionaryOnly() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", "en-test-pos-maxent.bin")
+        .addTokenFilter("opennlplemmatizer", "dictionary", "en-test-lemmas.dict")
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCE, SENTENCE_dict_punc, null, null,
+        SENTENCE_posTags, null, null, true);
+  }
+
+  public void test2SentencesDictionaryOnly() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_dict_punc, null, null,
+        SENTENCES_posTags, null, null, true);
+  }
+
+  public void test1SentenceMaxEntOnly() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter("opennlplemmatizer", "lemmatizerModel", lemmatizerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCE, SENTENCE_maxent_punc, null, null,
+        SENTENCE_posTags, null, null, true);
+  }
+
+  public void test2SentencesMaxEntOnly() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter("OpenNLPLemmatizer", "lemmatizerModel", lemmatizerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_maxent_punc, null, null,
+        SENTENCES_posTags, null, null, true);
+  }
+
+  public void test1SentenceDictionaryAndMaxEnt() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", "en-test-pos-maxent.bin")
+        .addTokenFilter("opennlplemmatizer", "dictionary", "en-test-lemmas.dict", "lemmatizerModel", lemmatizerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCE_both, SENTENCE_both_punc, null, null,
+        SENTENCE_both_posTags, null, null, true);
+  }
+
+  public void test2SentencesDictionaryAndMaxEnt() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile, "lemmatizerModel", lemmatizerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES_both, SENTENCES_both_punc, null, null,
+        SENTENCES_both_posTags, null, null, true);
+  }
+
+  public void testKeywordAttributeAwarenessDictionaryOnly() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter(KeywordRepeatFilterFactory.class)
+        .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile)
+        .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_dict_keep_orig_punc, null, null,
+        SENTENCES_keep_orig_posTags, null, null, true);
+  }
+
+  public void testKeywordAttributeAwarenessMaxEntOnly() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter(KeywordRepeatFilterFactory.class)
+        .addTokenFilter("opennlplemmatizer", "lemmatizerModel", lemmatizerModelFile)
+        .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_max_ent_keep_orig_punc, null, null,
+        SENTENCES_keep_orig_posTags, null, null, true);
+  }
+
+  public void testKeywordAttributeAwarenessDictionaryAndMaxEnt() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter(KeywordRepeatFilterFactory.class)
+        .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile, "lemmatizerModel", lemmatizerModelFile)
+        .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES_both, SENTENCES_both_keep_orig_punc, null, null,
+        SENTENCES_both_keep_orig_posTags, null, null, true);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPPOSFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPPOSFilterFactory.java b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPPOSFilterFactory.java
new file mode 100644
index 0000000..10372d0
--- /dev/null
+++ b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPPOSFilterFactory.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.custom.CustomAnalyzer;
+import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilterFactory;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
+
+/**
+ * Needs the OpenNLP Tokenizer because it creates full streams of punctuation.
+ * The POS model is based on this tokenization.
+ *
+ * Tagging models are created from tiny test data in opennlp/tools/test-model-data/ and are not very accurate.
+ */
+public class TestOpenNLPPOSFilterFactory extends BaseTokenStreamTestCase {
+
+  private static final String SENTENCES = "Sentence number 1 has 6 words. Sentence number 2, 5 words.";
+  private static final String[] SENTENCES_punc
+      = {"Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "."};
+  private static final int[] SENTENCES_startOffsets = {0, 9, 16, 18, 22, 24, 29, 31, 40, 47, 48, 50, 52, 57};
+  private static final int[] SENTENCES_endOffsets = {8, 15, 17, 21, 23, 29, 30, 39, 46, 48, 49, 51, 57, 58};
+  private static final String[] SENTENCES_posTags
+      = {"NN", "NN", "CD", "VBZ", "CD", "NNS", ".", "NN", "NN", "CD", ",", "CD", "NNS", "."};
+  private static final String NAMES2 = "Royal Flash is a tale about Harry Flashman.";
+  private static final String[] NAMES2_punc = {"Royal", "Flash", "is", "a", "tale", "about", "Harry", "Flashman", "."};
+  private static final String[] NAMES2_OUT = { "word", "word", "word", "word", "word", "word", "word", "person", "word" };
+
+  private static final String NO_BREAK = "No period";
+  private static final String[] NO_BREAK_terms = {"No", "period"};
+  private static final int[] NO_BREAK_startOffsets = {0, 3};
+  private static final int[] NO_BREAK_endOffsets = {2, 9};
+
+  private static final String sentenceModelFile = "en-test-sent.bin";
+  private static final String tokenizerModelFile = "en-test-tokenizer.bin";
+  private static final String posTaggerModelFile = "en-test-pos-maxent.bin";
+
+
+  private static byte[][] toPayloads(String... strings) {
+    return Arrays.stream(strings).map(s -> s == null ? null : s.getBytes(StandardCharsets.UTF_8)).toArray(byte[][]::new);
+  }
+
+  public void testBasic() throws IOException {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
+  }
+
+  public void testPOS() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+        SENTENCES_posTags, null, null, true);
+
+    analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+        null, null, null, true, toPayloads(SENTENCES_posTags));
+  }
+
+  public void testNoBreak() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, NO_BREAK, NO_BREAK_terms, NO_BREAK_startOffsets, NO_BREAK_endOffsets,
+        null, null, null, true);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPSentenceBreakIterator.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPSentenceBreakIterator.java b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPSentenceBreakIterator.java
new file mode 100644
index 0000000..4ee6570
--- /dev/null
+++ b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPSentenceBreakIterator.java
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.text.BreakIterator;
+import java.text.CharacterIterator;
+
+import org.apache.lucene.analysis.opennlp.tools.NLPSentenceDetectorOp;
+import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory;
+import org.apache.lucene.analysis.util.CharArrayIterator;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.BeforeClass;
+
+public class TestOpenNLPSentenceBreakIterator extends LuceneTestCase {
+
+  private static final String TEXT
+      //                                                                                                     111
+      //           111111111122222222223333333333444444444455555555556666666666777777777788888888889999999999000
+      // 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012
+      = "Sentence number 1 has 6 words. Sentence number 2, 5 words. And finally, sentence number 3 has 8 words.";
+  private static final String[] SENTENCES = new String[] {
+    "Sentence number 1 has 6 words. ", "Sentence number 2, 5 words. ", "And finally, sentence number 3 has 8 words." };
+  private static final String PADDING = " Word. Word. ";
+  private static final String sentenceModelFile = "en-test-sent.bin";
+
+
+  @BeforeClass
+  public static void populateCache() throws IOException {
+    OpenNLPOpsFactory.getSentenceModel
+        (sentenceModelFile, new ClasspathResourceLoader(TestOpenNLPSentenceBreakIterator.class));
+  }
+
+  public void testThreeSentences() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText(TEXT); // String is converted to StringCharacterIterator
+    do3SentenceTest(bi);
+
+    bi.setText(getCharArrayIterator(TEXT));
+    do3SentenceTest(bi);
+  }
+
+  private CharacterIterator getCharArrayIterator(String text) {
+    return getCharArrayIterator(text, 0, text.length());
+  }
+
+  private CharacterIterator getCharArrayIterator(String text, int start, int length) {
+    CharArrayIterator charArrayIterator = new CharArrayIterator() {
+      // Lie about all surrogates to the sentence tokenizer,
+      // instead we treat them all as SContinue so we won't break around them.
+      @Override
+      protected char jreBugWorkaround(char ch) {
+        return ch >= 0xD800 && ch <= 0xDFFF ? 0x002C : ch;
+      }
+    };
+    charArrayIterator.setText(text.toCharArray(), start, length);
+    return charArrayIterator;
+  }
+
+  private void do3SentenceTest(BreakIterator bi) {
+    assertEquals(0, bi.current());
+    assertEquals(0, bi.first());
+    assertEquals(SENTENCES[0], TEXT.substring(bi.current(), bi.next()));
+    assertEquals(SENTENCES[1], TEXT.substring(bi.current(), bi.next()));
+    int current = bi.current();
+    assertEquals(bi.getText().getEndIndex(), bi.next());
+    int next = bi.current();
+    assertEquals(SENTENCES[2], TEXT.substring(current, next));
+    assertEquals(BreakIterator.DONE, bi.next());
+
+    assertEquals(TEXT.length(), bi.last());
+    int end = bi.current();
+    assertEquals(SENTENCES[2], TEXT.substring(bi.previous(), end));
+    end = bi.current();
+    assertEquals(SENTENCES[1], TEXT.substring(bi.previous(), end));
+    end = bi.current();
+    assertEquals(SENTENCES[0], TEXT.substring(bi.previous(), end));
+    assertEquals(BreakIterator.DONE, bi.previous());
+    assertEquals(0, bi.current());
+
+    assertEquals(59, bi.following(39));
+    assertEquals(59, bi.following(31));
+    assertEquals(31, bi.following(30));
+
+    assertEquals(0, bi.preceding(57));
+    assertEquals(0, bi.preceding(58));
+    assertEquals(31, bi.preceding(59));
+
+    assertEquals(0, bi.first());
+    assertEquals(59, bi.next(2));
+    assertEquals(0, bi.next(-2));
+  }
+
+  public void testSingleSentence() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText(getCharArrayIterator(SENTENCES[0]));
+    test1Sentence(bi, SENTENCES[0]);
+  }
+
+  private void test1Sentence(BreakIterator bi, String text) {
+    int start = bi.getText().getBeginIndex();
+    assertEquals(start, bi.first());
+    int current = bi.current();
+    assertEquals(bi.getText().getEndIndex(), bi.next());
+    int end = bi.current() - start;
+    assertEquals(text, text.substring(current - start, end - start));
+
+    assertEquals(text.length(), bi.last() - start);
+    end = bi.current();
+    bi.previous();
+    assertEquals(BreakIterator.DONE, bi.previous());
+    int previous = bi.current();
+    assertEquals(text, text.substring(previous - start, end - start));
+    assertEquals(start, bi.current());
+
+    assertEquals(BreakIterator.DONE, bi.following(bi.last() / 2 + start));
+
+    assertEquals(BreakIterator.DONE, bi.preceding(bi.last() / 2 + start));
+
+    assertEquals(start, bi.first());
+    assertEquals(BreakIterator.DONE, bi.next(13));
+    assertEquals(BreakIterator.DONE, bi.next(-8));
+  }
+
+  public void testSliceEnd() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText(getCharArrayIterator(SENTENCES[0] + PADDING, 0, SENTENCES[0].length()));
+
+    test1Sentence(bi, SENTENCES[0]);
+  }
+
+  public void testSliceStart() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText(getCharArrayIterator(PADDING + SENTENCES[0], PADDING.length(), SENTENCES[0].length()));
+    test1Sentence(bi, SENTENCES[0]);
+  }
+
+  public void testSliceMiddle() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText(getCharArrayIterator(PADDING + SENTENCES[0] + PADDING, PADDING.length(), SENTENCES[0].length()));
+
+    test1Sentence(bi, SENTENCES[0]);
+  }
+
+  /** the current position must be ignored, initial position is always first() */
+  public void testFirstPosition() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText(getCharArrayIterator(SENTENCES[0]));
+    assertEquals(SENTENCES[0].length(), bi.last()); // side-effect: set current position to last()
+    test1Sentence(bi, SENTENCES[0]);
+  }
+
+  public void testWhitespaceOnly() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText("   \n \n\n\r\n\t  \n");
+    test0Sentences(bi);
+  }
+
+  public void testEmptyString() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText("");
+    test0Sentences(bi);
+  }
+
+  private void test0Sentences(BreakIterator bi) {
+    assertEquals(0, bi.current());
+    assertEquals(0, bi.first());
+    assertEquals(BreakIterator.DONE, bi.next());
+    assertEquals(0, bi.last());
+    assertEquals(BreakIterator.DONE, bi.previous());
+    assertEquals(BreakIterator.DONE, bi.following(0));
+    assertEquals(BreakIterator.DONE, bi.preceding(0));
+    assertEquals(0, bi.first());
+    assertEquals(BreakIterator.DONE, bi.next(13));
+    assertEquals(BreakIterator.DONE, bi.next(-8));
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPTokenizerFactory.java b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPTokenizerFactory.java
new file mode 100644
index 0000000..db2bbb2
--- /dev/null
+++ b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPTokenizerFactory.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.custom.CustomAnalyzer;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
+import org.junit.Test;
+
+/**
+ * Tests the Tokenizer as well- the Tokenizer needs the OpenNLP model files,
+ * which this can load from src/test-files/opennlp/solr/conf
+ *
+ */
+public class TestOpenNLPTokenizerFactory extends BaseTokenStreamTestCase {
+
+  static private String SENTENCES = "Sentence number 1 has 6 words. Sentence number 2, 5 words.";
+  static private String[] SENTENCES_split = {"Sentence number 1 has 6 words. ", "Sentence number 2, 5 words."};
+  static private String[] SENTENCES_punc = {"Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "."};
+  static private int[] SENTENCES_startOffsets = {0, 9, 16, 18, 22, 24, 29, 31, 40, 47, 48, 50, 52, 57};
+  static private int[] SENTENCES_endOffsets = {8, 15, 17, 21, 23, 29, 30, 39, 46, 48, 49, 51, 57, 58};
+
+  static private String SENTENCE1 = "Sentence number 1 has 6 words.";
+  static private String[] SENTENCE1_punc = {"Sentence", "number", "1", "has", "6", "words", "."};
+
+  @Test
+  public void testTokenizer() throws IOException {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin", "tokenizerModel", "en-test-tokenizer.bin")
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
+    assertAnalyzesTo(analyzer, SENTENCE1, SENTENCE1_punc);
+  }
+
+  @Test
+  public void testTokenizerNoSentenceDetector() throws IOException {
+    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+      CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+          .withTokenizer("opennlp", "tokenizerModel", "en-test-tokenizer.bin")
+          .build();
+    });
+    assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'sentenceModel'"));
+  }
+
+  @Test
+  public void testTokenizerNoTokenizer() throws IOException {
+    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+      CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+          .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin")
+          .build();
+    });
+    assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'tokenizerModel'"));
+  }
+
+  // test analyzer caching the tokenizer
+  @Test
+  public void testClose() throws IOException {
+    Map<String,String> args = new HashMap<String,String>() {{ put("sentenceModel", "en-test-sent.bin");
+                                                              put("tokenizerModel", "en-test-tokenizer.bin"); }};
+    OpenNLPTokenizerFactory factory = new OpenNLPTokenizerFactory(args);
+    factory.inform(new ClasspathResourceLoader(getClass()));
+
+    Tokenizer ts = factory.create(newAttributeFactory());
+    ts.setReader(new StringReader(SENTENCES));
+
+    ts.reset();
+    ts.close();
+    ts.reset();
+    ts.setReader(new StringReader(SENTENCES));
+    assertTokenStreamContents(ts, SENTENCES_punc);
+    ts.close();
+    ts.reset();
+    ts.setReader(new StringReader(SENTENCES));
+    assertTokenStreamContents(ts, SENTENCES_punc);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/tools/test-model-data/README.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/README.txt b/lucene/analysis/opennlp/src/tools/test-model-data/README.txt
new file mode 100644
index 0000000..3ac0aa3
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/README.txt
@@ -0,0 +1,6 @@
+Use small training data to create small models for unit tests.
+Training data derived from Reuters corpus in very unscientific way.
+Tagging done with CCG Urbana-Champaign online demos:
+	http://cogcomp.cs.illinois.edu/page/demos
+
+Run 'ant train-test-models' to generate models from training data here.


[11/12] lucene-solr:branch_7x: LUCENE-2899: Add OpenNLP Analysis capabilities as a module

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/package-info.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/package-info.java
new file mode 100644
index 0000000..527e24f
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Analysis components based on OpenNLP
+ */
+package org.apache.lucene.analysis.opennlp;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPChunkerOp.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPChunkerOp.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPChunkerOp.java
new file mode 100644
index 0000000..f6a5ea8
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPChunkerOp.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import java.io.IOException;
+import opennlp.tools.chunker.ChunkerME;
+import opennlp.tools.chunker.ChunkerModel;
+
+/**
+ * Supply OpenNLP Chunking tool
+ * Requires binary models from OpenNLP project on SourceForge.
+ */
+public class NLPChunkerOp {
+  private ChunkerME chunker = null;
+
+  public NLPChunkerOp(ChunkerModel chunkerModel) throws IOException {
+    chunker = new ChunkerME(chunkerModel);
+  }
+
+  public synchronized String[] getChunks(String[] words, String[] tags, double[] probs) {
+    String[] chunks = chunker.chunk(words, tags);
+    if (probs != null)
+      chunker.probs(probs);
+    return chunks;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPLemmatizerOp.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPLemmatizerOp.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPLemmatizerOp.java
new file mode 100644
index 0000000..b09c63e
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPLemmatizerOp.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import java.io.IOException;
+import java.io.InputStream;
+
+import opennlp.tools.lemmatizer.DictionaryLemmatizer;
+import opennlp.tools.lemmatizer.LemmatizerME;
+import opennlp.tools.lemmatizer.LemmatizerModel;
+
+/**
+ * <p>Supply OpenNLP Lemmatizer tools.</p>
+ * <p>
+ *   Both a dictionary-based lemmatizer and a MaxEnt lemmatizer are supported.
+ *   If both are configured, the dictionary-based lemmatizer is tried first,
+ *   and then the MaxEnt lemmatizer is consulted for out-of-vocabulary tokens.
+ * </p>
+ * <p>
+ *   The MaxEnt implementation requires binary models from OpenNLP project on SourceForge.
+ * </p>
+ */
+public class NLPLemmatizerOp {
+  private final DictionaryLemmatizer dictionaryLemmatizer;
+  private final LemmatizerME lemmatizerME;
+
+  public NLPLemmatizerOp(InputStream dictionary, LemmatizerModel lemmatizerModel) throws IOException {
+    assert dictionary != null || lemmatizerModel != null : "At least one parameter must be non-null";
+    dictionaryLemmatizer = dictionary == null ? null : new DictionaryLemmatizer(dictionary);
+    lemmatizerME = lemmatizerModel == null ? null : new LemmatizerME(lemmatizerModel);
+  }
+
+  public String[] lemmatize(String[] words, String[] postags) {
+    String[] lemmas = null;
+    String[] maxEntLemmas = null;
+    if (dictionaryLemmatizer != null) {
+      lemmas = dictionaryLemmatizer.lemmatize(words, postags);
+      for (int i = 0; i < lemmas.length; ++i) {
+        if (lemmas[i].equals("O")) {   // this word is not in the dictionary
+          if (lemmatizerME != null) {  // fall back to the MaxEnt lemmatizer if it's enabled
+            if (maxEntLemmas == null) {
+              maxEntLemmas = lemmatizerME.lemmatize(words, postags);
+            }
+            if ("_".equals(maxEntLemmas[i])) {
+              lemmas[i] = words[i];    // put back the original word if no lemma is found
+            } else {
+              lemmas[i] = maxEntLemmas[i];
+            }
+          } else {                     // there is no MaxEnt lemmatizer
+            lemmas[i] = words[i];      // put back the original word if no lemma is found
+          }
+        }
+      }
+    } else {                           // there is only a MaxEnt lemmatizer
+      maxEntLemmas = lemmatizerME.lemmatize(words, postags);
+      for (int i = 0 ; i < maxEntLemmas.length ; ++i) {
+        if ("_".equals(maxEntLemmas[i])) {
+          maxEntLemmas[i] = words[i];  // put back the original word if no lemma is found
+        }
+      }
+      lemmas = maxEntLemmas;
+    }
+    return lemmas;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPNERTaggerOp.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPNERTaggerOp.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPNERTaggerOp.java
new file mode 100644
index 0000000..22e617d
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPNERTaggerOp.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import opennlp.tools.namefind.NameFinderME;
+import opennlp.tools.namefind.TokenNameFinder;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.util.Span;
+
+/**
+ * Supply OpenNLP Named Entity Resolution tool
+ * Requires binary models from OpenNLP project on SourceForge.
+ *
+ * Usage: from <a href="http://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.namefind.recognition.api"
+ *             >the OpenNLP documentation</a>:
+ *
+ * "The NameFinderME class is not thread safe, it must only be called from one thread.
+ * To use multiple threads multiple NameFinderME instances sharing the same model instance
+ * can be created. The input text should be segmented into documents, sentences and tokens.
+ * To perform entity detection an application calls the find method for every sentence in
+ * the document. After every document clearAdaptiveData must be called to clear the adaptive
+ * data in the feature generators. Not calling clearAdaptiveData can lead to a sharp drop
+ * in the detection rate after a few documents."
+ *
+ */
+public class NLPNERTaggerOp {
+  private final TokenNameFinder nameFinder;
+
+  public NLPNERTaggerOp(TokenNameFinderModel model) {
+    this.nameFinder = new NameFinderME(model);
+  }
+
+  public Span[] getNames(String[] words) {
+    Span[] names = nameFinder.find(words);
+    return names;
+  }
+
+  public synchronized void reset() {
+    nameFinder.clearAdaptiveData();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java
new file mode 100644
index 0000000..447e1c0
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPPOSTaggerOp.java
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import java.io.IOException;
+
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSTagger;
+import opennlp.tools.postag.POSTaggerME;
+
+/**
+ * Supply OpenNLP Parts-Of-Speech Tagging tool
+ * Requires binary models from OpenNLP project on SourceForge.
+ */
+
+public class NLPPOSTaggerOp {
+  private POSTagger tagger = null;
+
+  public NLPPOSTaggerOp(POSModel model) throws IOException {
+    tagger = new POSTaggerME(model);
+  }
+
+  public synchronized String[] getPOSTags(String[] words) {
+    return tagger.tag(words);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPSentenceDetectorOp.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPSentenceDetectorOp.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPSentenceDetectorOp.java
new file mode 100644
index 0000000..21983ce
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPSentenceDetectorOp.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import java.io.IOException;
+
+import opennlp.tools.sentdetect.SentenceDetectorME;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.util.Span;
+
+/**
+ * Supply OpenNLP Sentence Detector tool
+ * Requires binary models from OpenNLP project on SourceForge.
+ */
+public class NLPSentenceDetectorOp {
+  private final SentenceDetectorME sentenceSplitter;
+
+  public NLPSentenceDetectorOp(SentenceModel model) throws IOException {
+    sentenceSplitter  = new SentenceDetectorME(model);
+  }
+
+  public NLPSentenceDetectorOp() {
+    sentenceSplitter = null;
+  }
+
+  public synchronized Span[] splitSentences(String line) {
+    if (sentenceSplitter != null) {
+      return sentenceSplitter.sentPosDetect(line);
+    } else {
+      Span[] shorty = new Span[1];
+      shorty[0] = new Span(0, line.length());
+      return shorty;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPTokenizerOp.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPTokenizerOp.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPTokenizerOp.java
new file mode 100644
index 0000000..0aeb713
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/NLPTokenizerOp.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import opennlp.tools.tokenize.Tokenizer;
+import opennlp.tools.tokenize.TokenizerME;
+import opennlp.tools.tokenize.TokenizerModel;
+import opennlp.tools.util.Span;
+
+/**
+ * Supply OpenNLP Sentence Tokenizer tool
+ * Requires binary models from OpenNLP project on SourceForge.
+ */
+public class NLPTokenizerOp {
+  private final Tokenizer tokenizer;
+
+  public NLPTokenizerOp(TokenizerModel model) {
+    tokenizer = new TokenizerME(model);
+  }
+
+  public NLPTokenizerOp() {
+    tokenizer = null;
+  }
+
+  public synchronized Span[] getTerms(String sentence) {
+    if (tokenizer == null) {
+      Span[] span1 = new Span[1];
+      span1[0] = new Span(0, sentence.length());
+      return span1;
+    }
+    return tokenizer.tokenizePos(sentence);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/OpenNLPOpsFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/OpenNLPOpsFactory.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/OpenNLPOpsFactory.java
new file mode 100644
index 0000000..5348857
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/OpenNLPOpsFactory.java
@@ -0,0 +1,176 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp.tools;
+
+import java.io.ByteArrayInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.io.Reader;
+import java.nio.charset.StandardCharsets;
+import java.util.Map;
+import java.util.concurrent.ConcurrentHashMap;
+
+import opennlp.tools.chunker.ChunkerModel;
+import opennlp.tools.lemmatizer.LemmatizerModel;
+import opennlp.tools.namefind.TokenNameFinderModel;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.sentdetect.SentenceModel;
+import opennlp.tools.tokenize.TokenizerModel;
+import org.apache.lucene.analysis.util.ResourceLoader;
+
+/**
+ * Supply OpenNLP Named Entity Recognizer
+ * Cache model file objects. Assumes model files are thread-safe.
+ */
+public class OpenNLPOpsFactory {
+  private static Map<String,SentenceModel> sentenceModels = new ConcurrentHashMap<>();
+  private static ConcurrentHashMap<String,TokenizerModel> tokenizerModels = new ConcurrentHashMap<>();
+  private static ConcurrentHashMap<String,POSModel> posTaggerModels = new ConcurrentHashMap<>();
+  private static ConcurrentHashMap<String,ChunkerModel> chunkerModels = new ConcurrentHashMap<>();
+  private static Map<String,TokenNameFinderModel> nerModels = new ConcurrentHashMap<>();
+  private static Map<String,LemmatizerModel> lemmatizerModels = new ConcurrentHashMap<>();
+  private static Map<String,String> lemmaDictionaries = new ConcurrentHashMap<>();
+
+  public static NLPSentenceDetectorOp getSentenceDetector(String modelName) throws IOException {
+    if (modelName != null) {
+      SentenceModel model = sentenceModels.get(modelName);
+      return new NLPSentenceDetectorOp(model);
+    } else {
+      return new NLPSentenceDetectorOp();
+    }
+  }
+
+  public static SentenceModel getSentenceModel(String modelName, ResourceLoader loader) throws IOException {
+    SentenceModel model = sentenceModels.get(modelName);
+    if (model == null) {
+      model = new SentenceModel(loader.openResource(modelName));
+      sentenceModels.put(modelName, model);
+    }
+    return model;
+  }
+
+  public static NLPTokenizerOp getTokenizer(String modelName) throws IOException {
+    if (modelName == null) {
+      return new NLPTokenizerOp();
+    } else {
+      TokenizerModel model = tokenizerModels.get(modelName);
+      return new NLPTokenizerOp(model);
+    }
+  }
+
+  public static TokenizerModel getTokenizerModel(String modelName, ResourceLoader loader) throws IOException {
+    TokenizerModel model = tokenizerModels.get(modelName);
+    if (model == null) {
+      model = new TokenizerModel(loader.openResource(modelName));
+      tokenizerModels.put(modelName, model);
+    }
+    return model;
+  }
+
+  public static NLPPOSTaggerOp getPOSTagger(String modelName) throws IOException {
+    POSModel model = posTaggerModels.get(modelName);
+    return new NLPPOSTaggerOp(model);
+  }
+
+  public static POSModel getPOSTaggerModel(String modelName, ResourceLoader loader) throws IOException {
+    POSModel model = posTaggerModels.get(modelName);
+    if (model == null) {
+      model = new POSModel(loader.openResource(modelName));
+      posTaggerModels.put(modelName, model);
+    }
+    return model;
+  }
+
+  public static NLPChunkerOp getChunker(String modelName) throws IOException {
+    ChunkerModel model = chunkerModels.get(modelName);
+    return new NLPChunkerOp(model);
+  }
+
+  public static ChunkerModel getChunkerModel(String modelName, ResourceLoader loader) throws IOException {
+    ChunkerModel model = chunkerModels.get(modelName);
+    if (model == null) {
+      model = new ChunkerModel(loader.openResource(modelName));
+      chunkerModels.put(modelName, model);
+    }
+    return model;
+  }
+
+  public static NLPNERTaggerOp getNERTagger(String modelName) throws IOException {
+    TokenNameFinderModel model = nerModels.get(modelName);
+    return new NLPNERTaggerOp(model);
+  }
+
+  public static TokenNameFinderModel getNERTaggerModel(String modelName, ResourceLoader loader) throws IOException {
+    TokenNameFinderModel model = nerModels.get(modelName);
+    if (model == null) {
+      model = new TokenNameFinderModel(loader.openResource(modelName));
+      nerModels.put(modelName, model);
+    }
+    return model;
+  }
+
+  public static NLPLemmatizerOp getLemmatizer(String dictionaryFile, String lemmatizerModelFile) throws IOException {
+    assert dictionaryFile != null || lemmatizerModelFile != null : "At least one parameter must be non-null";
+    InputStream dictionaryInputStream = null;
+    if (dictionaryFile != null) {
+      String dictionary = lemmaDictionaries.get(dictionaryFile);
+      dictionaryInputStream = new ByteArrayInputStream(dictionary.getBytes(StandardCharsets.UTF_8));
+    }
+    LemmatizerModel lemmatizerModel = lemmatizerModelFile == null ? null : lemmatizerModels.get(lemmatizerModelFile);
+    return new NLPLemmatizerOp(dictionaryInputStream, lemmatizerModel);
+  }
+
+  public static String getLemmatizerDictionary(String dictionaryFile, ResourceLoader loader) throws IOException {
+    String dictionary = lemmaDictionaries.get(dictionaryFile);
+    if (dictionary == null) {
+      Reader reader = new InputStreamReader(loader.openResource(dictionaryFile), StandardCharsets.UTF_8);
+      StringBuilder builder = new StringBuilder();
+      char[] chars = new char[8092];
+      int numRead = 0;
+      do {
+        numRead = reader.read(chars, 0, chars.length);
+        if (numRead > 0) {
+          builder.append(chars, 0, numRead);
+        }
+      } while (numRead > 0);
+      dictionary = builder.toString();
+      lemmaDictionaries.put(dictionaryFile, dictionary);
+    }
+    return dictionary;
+  }
+
+  public static LemmatizerModel getLemmatizerModel(String modelName, ResourceLoader loader) throws IOException {
+    LemmatizerModel model = lemmatizerModels.get(modelName);
+    if (model == null) {
+      model = new LemmatizerModel(loader.openResource(modelName));
+      lemmatizerModels.put(modelName, model);
+    }
+    return model;
+  }
+
+  // keeps unit test from blowing out memory
+  public static void clearModels() {
+    sentenceModels.clear();
+    tokenizerModels.clear();
+    posTaggerModels.clear();
+    chunkerModels.clear();
+    nerModels.clear();
+    lemmaDictionaries.clear();
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/package-info.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/package-info.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/package-info.java
new file mode 100644
index 0000000..523a084
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/tools/package-info.java
@@ -0,0 +1,21 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/**
+ * Tools to supply access to OpenNLP components.
+ */
+package org.apache.lucene.analysis.opennlp.tools;

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/overview.html
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/overview.html b/lucene/analysis/opennlp/src/java/overview.html
new file mode 100644
index 0000000..bf70e95
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/overview.html
@@ -0,0 +1,61 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<html>
+<head>
+  <META http-equiv="Content-Type" content="text/html; charset=UTF-8">
+  <title>
+    Apache Lucene OpenNLP integration module
+  </title>
+</head>
+<body>
+<p>
+  This module exposes functionality from
+  <a href="http://opennlp.apache.org">Apache OpenNLP</a> to Apache Lucene.
+  The Apache OpenNLP library is a machine learning based toolkit for the processing of natural language text.
+<p>
+  For an introduction to Lucene's analysis API, see the {@link org.apache.lucene.analysis} package documentation.
+<p>
+  The OpenNLP Tokenizer behavior is similar to the WhiteSpaceTokenizer but is smart about
+  inter-word punctuation. The term stream looks very much like the way you parse words and
+  punctuation while reading.  The major difference between this tokenizer and most other
+  tokenizers shipped with Lucene is that punctuation is tokenized.  This is required for
+  the following taggers to operate properly.
+<p>
+  The OpenNLP taggers annotate terms using the <code>TypeAttribute</code>.
+<ul>
+  <li><code>OpenNLPTokenizer</code> segments text into sentences or words. This Tokenizer
+    uses the OpenNLP Sentence Detector and/or Tokenizer classes.  When used together, the
+    Tokenizer receives sentences and can do a better job.</li>
+  <li><code>OpenNLPFilter</code> tags words using one or more technologies: Part-of-Speech,
+    Chunking, and Named Entity Recognition.  These tags are assigned as token types.  Note that
+    only of these operations will tag
+  </li>
+</ul>
+<p>
+  Since the <code>TypeAttribute</code> is not stored in the index, it is recommended that one
+  of these filters is used following <code>OpenNLPFilter</code> to enable search against the
+  assigned tags:
+<ul>
+  <li><code>TypeAsPayloadFilter</code> copies the <code>TypeAttribute</code> value to the
+    <code>PayloadAttribute</code></li>
+  <li><code>TypeAsSynonymFilter</code> creates a cloned token at the same position as each
+    tagged token, and copies the {{TypeAttribute}} value to the {{CharTermAttribute}}, optionally
+    with a customized prefix (so that tags effectively occupy a different namespace from token
+    text).</li>
+</ul>
+</body>
+</html>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory b/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
new file mode 100644
index 0000000..61a685d
--- /dev/null
+++ b/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
@@ -0,0 +1,18 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.lucene.analysis.opennlp.OpenNLPChunkerFilterFactory
+org.apache.lucene.analysis.opennlp.OpenNLPLemmatizerFilterFactory
+org.apache.lucene.analysis.opennlp.OpenNLPPOSFilterFactory

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory b/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
new file mode 100644
index 0000000..076b308
--- /dev/null
+++ b/lucene/analysis/opennlp/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenizerFactory
@@ -0,0 +1,16 @@
+#  Licensed to the Apache Software Foundation (ASF) under one or more
+#  contributor license agreements.  See the NOTICE file distributed with
+#  this work for additional information regarding copyright ownership.
+#  The ASF licenses this file to You under the Apache License, Version 2.0
+#  (the "License"); you may not use this file except in compliance with
+#  the License.  You may obtain a copy of the License at
+#
+#       http://www.apache.org/licenses/LICENSE-2.0
+#
+#  Unless required by applicable law or agreed to in writing, software
+#  distributed under the License is distributed on an "AS IS" BASIS,
+#  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+#  See the License for the specific language governing permissions and
+#  limitations under the License.
+
+org.apache.lucene.analysis.opennlp.OpenNLPTokenizerFactory

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-chunker.bin
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-chunker.bin b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-chunker.bin
new file mode 100644
index 0000000..8151914
Binary files /dev/null and b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-chunker.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmas.dict
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmas.dict b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmas.dict
new file mode 100644
index 0000000..d1d486c
--- /dev/null
+++ b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmas.dict
@@ -0,0 +1,12 @@
+they	NNP	they
+sent	VBD	send
+him	PRP	he
+running	VBG	run
+in	IN	in
+the	DT	the
+evening	NN	evening
+he	PRP	he
+did	VBD	do
+not	RB	not
+come	VB	come
+back	RB	back

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmatizer.bin
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmatizer.bin b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmatizer.bin
new file mode 100644
index 0000000..e62df7e
Binary files /dev/null and b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-lemmatizer.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-ner-person.bin
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-ner-person.bin b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-ner-person.bin
new file mode 100644
index 0000000..0b40aac
Binary files /dev/null and b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-ner-person.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-pos-maxent.bin
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-pos-maxent.bin b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-pos-maxent.bin
new file mode 100644
index 0000000..b77fb46
Binary files /dev/null and b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-pos-maxent.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-sent.bin
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-sent.bin b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-sent.bin
new file mode 100644
index 0000000..4252bcb
Binary files /dev/null and b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-sent.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-tokenizer.bin
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-tokenizer.bin b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-tokenizer.bin
new file mode 100644
index 0000000..94668c0
Binary files /dev/null and b/lucene/analysis/opennlp/src/test-files/org/apache/lucene/analysis/opennlp/en-test-tokenizer.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPChunkerFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPChunkerFilterFactory.java b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPChunkerFilterFactory.java
new file mode 100644
index 0000000..013348c
--- /dev/null
+++ b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPChunkerFilterFactory.java
@@ -0,0 +1,74 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.custom.CustomAnalyzer;
+import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilterFactory;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
+
+/**
+ * Needs the OpenNLP Tokenizer because it creates full streams of punctuation.
+ * Needs the OpenNLP POS tagger for the POS tags.
+ *
+ * Tagging models are created from tiny test data in opennlp/tools/test-model-data/ and are not very accurate.
+ */
+public class TestOpenNLPChunkerFilterFactory extends BaseTokenStreamTestCase {
+
+  private static final String SENTENCES = "Sentence number 1 has 6 words. Sentence number 2, 5 words.";
+  private static final String[] SENTENCES_punc
+      = {"Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "."};
+  private static final int[] SENTENCES_startOffsets = {0, 9, 16, 18, 22, 24, 29, 31, 40, 47, 48, 50, 52, 57};
+  private static final int[] SENTENCES_endOffsets = {8, 15, 17, 21, 23, 29, 30, 39, 46, 48, 49, 51, 57, 58};
+  private static final String[] SENTENCES_chunks
+      = { "B-NP", "I-NP", "I-NP", "B-VP", "B-NP", "I-NP", "O", "B-NP", "I-NP", "I-NP", "O", "B-NP", "I-NP", "O" };
+
+  private static final String sentenceModelFile = "en-test-sent.bin";
+  private static final String tokenizerModelFile = "en-test-tokenizer.bin";
+  private static final String posTaggerModelFile = "en-test-pos-maxent.bin";
+  private static final String chunkerModelFile = "en-test-chunker.bin";
+
+
+  private static byte[][] toPayloads(String... strings) {
+    return Arrays.stream(strings).map(s -> s == null ? null : s.getBytes(StandardCharsets.UTF_8)).toArray(byte[][]::new);
+  }
+
+  public void testBasic() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+        SENTENCES_chunks, null, null, true);
+  }
+
+  public void testPayloads() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter("opennlpChunker", "chunkerModel", chunkerModelFile)
+        .addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+        null, null, null, true, toPayloads(SENTENCES_chunks));
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPLemmatizerFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPLemmatizerFilterFactory.java b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPLemmatizerFilterFactory.java
new file mode 100644
index 0000000..0491b91
--- /dev/null
+++ b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPLemmatizerFilterFactory.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.custom.CustomAnalyzer;
+import org.apache.lucene.analysis.miscellaneous.KeywordRepeatFilterFactory;
+import org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilterFactory;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
+
+public class TestOpenNLPLemmatizerFilterFactory extends BaseTokenStreamTestCase {
+
+  private static final String SENTENCE = "They sent him running in the evening.";
+  private static final String[] SENTENCE_dict_punc =   {"they", "send", "he",  "run",  "in", "the", "evening", "."};
+  private static final String[] SENTENCE_maxent_punc = {"they", "send", "he",  "runn", "in", "the", "evening", "."};
+  private static final String[] SENTENCE_posTags =     {"NNP",  "VBD",  "PRP", "VBG",  "IN", "DT",  "NN",      "."};
+
+  private static final String SENTENCES = "They sent him running in the evening. He did not come back.";
+  private static final String[] SENTENCES_dict_punc
+      = {"they", "send", "he",  "run",  "in", "the", "evening", ".", "he",  "do",  "not", "come", "back", "."};
+  private static final String[] SENTENCES_maxent_punc
+      = {"they", "send", "he",  "runn", "in", "the", "evening", ".", "he",  "do",  "not", "come", "back", "."};
+  private static final String[] SENTENCES_posTags
+      = {"NNP",  "VBD",  "PRP", "VBG",  "IN", "DT",  "NN",      ".", "PRP", "VBD", "RB",  "VB",   "RB",   "."};
+
+  private static final String SENTENCE_both = "Konstantin Kalashnitsov constantly caliphed.";
+  private static final String[] SENTENCE_both_punc
+      = {"konstantin", "kalashnitsov", "constantly", "caliph", "."};
+  private static final String[] SENTENCE_both_posTags
+      = {"IN",         "JJ",          "NN",          "VBN",    "."};
+
+  private static final String SENTENCES_both = "Konstantin Kalashnitsov constantly caliphed. Coreena could care, completely.";
+  private static final String[] SENTENCES_both_punc
+      = {"konstantin", "kalashnitsov", "constantly", "caliph", ".", "coreena", "could", "care", ",", "completely", "."};
+  private static final String[] SENTENCES_both_posTags
+      = {"IN",         "JJ",           "NN",          "VBN",    ".", "NNP",     "VBN",   "NN",   ",", "NN",         "."};
+
+  private static final String[] SENTENCES_dict_keep_orig_punc
+      = {"They", "they", "sent", "send", "him", "he", "running", "run",  "in", "the", "evening", ".", "He", "he",   "did", "do", "not", "come", "back", "."};
+  private static final String[] SENTENCES_max_ent_keep_orig_punc
+      = {"They", "they", "sent", "send", "him", "he", "running", "runn", "in", "the", "evening", ".", "He", "he",   "did", "do", "not", "come", "back", "."};
+  private static final String[] SENTENCES_keep_orig_posTags
+      = {"NNP",  "NNP",  "VBD",  "VBD",  "PRP", "PRP", "VBG",    "VBG",  "IN", "DT",  "NN",      ".", "PRP", "PRP", "VBD", "VBD", "RB",  "VB",  "RB",   "."};
+
+  private static final String[] SENTENCES_both_keep_orig_punc
+      = {"Konstantin", "konstantin", "Kalashnitsov", "kalashnitsov", "constantly", "caliphed", "caliph", ".", "Coreena", "coreena", "could", "care", ",", "completely", "."};
+  private static final String[] SENTENCES_both_keep_orig_posTags
+      = {"IN",         "IN",         "JJ",           "JJ",           "NN",         "VBN",      "VBN",    ".", "NNP",     "NNP",     "VBN",   "NN",   ",", "NN",         "."};
+
+
+  private static final String tokenizerModelFile = "en-test-tokenizer.bin";
+  private static final String sentenceModelFile = "en-test-sent.bin";
+  private static final String posTaggerModelFile = "en-test-pos-maxent.bin";
+  private static final String lemmatizerModelFile = "en-test-lemmatizer.bin";
+  private static final String lemmatizerDictFile = "en-test-lemmas.dict";
+
+
+  public void test1SentenceDictionaryOnly() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", "en-test-pos-maxent.bin")
+        .addTokenFilter("opennlplemmatizer", "dictionary", "en-test-lemmas.dict")
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCE, SENTENCE_dict_punc, null, null,
+        SENTENCE_posTags, null, null, true);
+  }
+
+  public void test2SentencesDictionaryOnly() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_dict_punc, null, null,
+        SENTENCES_posTags, null, null, true);
+  }
+
+  public void test1SentenceMaxEntOnly() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter("opennlplemmatizer", "lemmatizerModel", lemmatizerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCE, SENTENCE_maxent_punc, null, null,
+        SENTENCE_posTags, null, null, true);
+  }
+
+  public void test2SentencesMaxEntOnly() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter("OpenNLPLemmatizer", "lemmatizerModel", lemmatizerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_maxent_punc, null, null,
+        SENTENCES_posTags, null, null, true);
+  }
+
+  public void test1SentenceDictionaryAndMaxEnt() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", "en-test-pos-maxent.bin")
+        .addTokenFilter("opennlplemmatizer", "dictionary", "en-test-lemmas.dict", "lemmatizerModel", lemmatizerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCE_both, SENTENCE_both_punc, null, null,
+        SENTENCE_both_posTags, null, null, true);
+  }
+
+  public void test2SentencesDictionaryAndMaxEnt() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile, "lemmatizerModel", lemmatizerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES_both, SENTENCES_both_punc, null, null,
+        SENTENCES_both_posTags, null, null, true);
+  }
+
+  public void testKeywordAttributeAwarenessDictionaryOnly() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter(KeywordRepeatFilterFactory.class)
+        .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile)
+        .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_dict_keep_orig_punc, null, null,
+        SENTENCES_keep_orig_posTags, null, null, true);
+  }
+
+  public void testKeywordAttributeAwarenessMaxEntOnly() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter(KeywordRepeatFilterFactory.class)
+        .addTokenFilter("opennlplemmatizer", "lemmatizerModel", lemmatizerModelFile)
+        .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_max_ent_keep_orig_punc, null, null,
+        SENTENCES_keep_orig_posTags, null, null, true);
+  }
+
+  public void testKeywordAttributeAwarenessDictionaryAndMaxEnt() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter(KeywordRepeatFilterFactory.class)
+        .addTokenFilter("opennlplemmatizer", "dictionary", lemmatizerDictFile, "lemmatizerModel", lemmatizerModelFile)
+        .addTokenFilter(RemoveDuplicatesTokenFilterFactory.class)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES_both, SENTENCES_both_keep_orig_punc, null, null,
+        SENTENCES_both_keep_orig_posTags, null, null, true);
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPPOSFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPPOSFilterFactory.java b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPPOSFilterFactory.java
new file mode 100644
index 0000000..10372d0
--- /dev/null
+++ b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPPOSFilterFactory.java
@@ -0,0 +1,95 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.nio.charset.StandardCharsets;
+import java.util.Arrays;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.custom.CustomAnalyzer;
+import org.apache.lucene.analysis.payloads.TypeAsPayloadTokenFilterFactory;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
+
+/**
+ * Needs the OpenNLP Tokenizer because it creates full streams of punctuation.
+ * The POS model is based on this tokenization.
+ *
+ * Tagging models are created from tiny test data in opennlp/tools/test-model-data/ and are not very accurate.
+ */
+public class TestOpenNLPPOSFilterFactory extends BaseTokenStreamTestCase {
+
+  private static final String SENTENCES = "Sentence number 1 has 6 words. Sentence number 2, 5 words.";
+  private static final String[] SENTENCES_punc
+      = {"Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "."};
+  private static final int[] SENTENCES_startOffsets = {0, 9, 16, 18, 22, 24, 29, 31, 40, 47, 48, 50, 52, 57};
+  private static final int[] SENTENCES_endOffsets = {8, 15, 17, 21, 23, 29, 30, 39, 46, 48, 49, 51, 57, 58};
+  private static final String[] SENTENCES_posTags
+      = {"NN", "NN", "CD", "VBZ", "CD", "NNS", ".", "NN", "NN", "CD", ",", "CD", "NNS", "."};
+  private static final String NAMES2 = "Royal Flash is a tale about Harry Flashman.";
+  private static final String[] NAMES2_punc = {"Royal", "Flash", "is", "a", "tale", "about", "Harry", "Flashman", "."};
+  private static final String[] NAMES2_OUT = { "word", "word", "word", "word", "word", "word", "word", "person", "word" };
+
+  private static final String NO_BREAK = "No period";
+  private static final String[] NO_BREAK_terms = {"No", "period"};
+  private static final int[] NO_BREAK_startOffsets = {0, 3};
+  private static final int[] NO_BREAK_endOffsets = {2, 9};
+
+  private static final String sentenceModelFile = "en-test-sent.bin";
+  private static final String tokenizerModelFile = "en-test-tokenizer.bin";
+  private static final String posTaggerModelFile = "en-test-pos-maxent.bin";
+
+
+  private static byte[][] toPayloads(String... strings) {
+    return Arrays.stream(strings).map(s -> s == null ? null : s.getBytes(StandardCharsets.UTF_8)).toArray(byte[][]::new);
+  }
+
+  public void testBasic() throws IOException {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
+  }
+
+  public void testPOS() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+        SENTENCES_posTags, null, null, true);
+
+    analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .addTokenFilter(TypeAsPayloadTokenFilterFactory.class)
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets,
+        null, null, null, true, toPayloads(SENTENCES_posTags));
+  }
+
+  public void testNoBreak() throws Exception {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "tokenizerModel", tokenizerModelFile, "sentenceModel", sentenceModelFile)
+        .addTokenFilter("opennlpPOS", "posTaggerModel", posTaggerModelFile)
+        .build();
+    assertAnalyzesTo(analyzer, NO_BREAK, NO_BREAK_terms, NO_BREAK_startOffsets, NO_BREAK_endOffsets,
+        null, null, null, true);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPSentenceBreakIterator.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPSentenceBreakIterator.java b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPSentenceBreakIterator.java
new file mode 100644
index 0000000..4ee6570
--- /dev/null
+++ b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPSentenceBreakIterator.java
@@ -0,0 +1,201 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.text.BreakIterator;
+import java.text.CharacterIterator;
+
+import org.apache.lucene.analysis.opennlp.tools.NLPSentenceDetectorOp;
+import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory;
+import org.apache.lucene.analysis.util.CharArrayIterator;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
+import org.apache.lucene.util.LuceneTestCase;
+import org.junit.BeforeClass;
+
+public class TestOpenNLPSentenceBreakIterator extends LuceneTestCase {
+
+  private static final String TEXT
+      //                                                                                                     111
+      //           111111111122222222223333333333444444444455555555556666666666777777777788888888889999999999000
+      // 0123456789012345678901234567890123456789012345678901234567890123456789012345678901234567890123456789012
+      = "Sentence number 1 has 6 words. Sentence number 2, 5 words. And finally, sentence number 3 has 8 words.";
+  private static final String[] SENTENCES = new String[] {
+    "Sentence number 1 has 6 words. ", "Sentence number 2, 5 words. ", "And finally, sentence number 3 has 8 words." };
+  private static final String PADDING = " Word. Word. ";
+  private static final String sentenceModelFile = "en-test-sent.bin";
+
+
+  @BeforeClass
+  public static void populateCache() throws IOException {
+    OpenNLPOpsFactory.getSentenceModel
+        (sentenceModelFile, new ClasspathResourceLoader(TestOpenNLPSentenceBreakIterator.class));
+  }
+
+  public void testThreeSentences() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText(TEXT); // String is converted to StringCharacterIterator
+    do3SentenceTest(bi);
+
+    bi.setText(getCharArrayIterator(TEXT));
+    do3SentenceTest(bi);
+  }
+
+  private CharacterIterator getCharArrayIterator(String text) {
+    return getCharArrayIterator(text, 0, text.length());
+  }
+
+  private CharacterIterator getCharArrayIterator(String text, int start, int length) {
+    CharArrayIterator charArrayIterator = new CharArrayIterator() {
+      // Lie about all surrogates to the sentence tokenizer,
+      // instead we treat them all as SContinue so we won't break around them.
+      @Override
+      protected char jreBugWorkaround(char ch) {
+        return ch >= 0xD800 && ch <= 0xDFFF ? 0x002C : ch;
+      }
+    };
+    charArrayIterator.setText(text.toCharArray(), start, length);
+    return charArrayIterator;
+  }
+
+  private void do3SentenceTest(BreakIterator bi) {
+    assertEquals(0, bi.current());
+    assertEquals(0, bi.first());
+    assertEquals(SENTENCES[0], TEXT.substring(bi.current(), bi.next()));
+    assertEquals(SENTENCES[1], TEXT.substring(bi.current(), bi.next()));
+    int current = bi.current();
+    assertEquals(bi.getText().getEndIndex(), bi.next());
+    int next = bi.current();
+    assertEquals(SENTENCES[2], TEXT.substring(current, next));
+    assertEquals(BreakIterator.DONE, bi.next());
+
+    assertEquals(TEXT.length(), bi.last());
+    int end = bi.current();
+    assertEquals(SENTENCES[2], TEXT.substring(bi.previous(), end));
+    end = bi.current();
+    assertEquals(SENTENCES[1], TEXT.substring(bi.previous(), end));
+    end = bi.current();
+    assertEquals(SENTENCES[0], TEXT.substring(bi.previous(), end));
+    assertEquals(BreakIterator.DONE, bi.previous());
+    assertEquals(0, bi.current());
+
+    assertEquals(59, bi.following(39));
+    assertEquals(59, bi.following(31));
+    assertEquals(31, bi.following(30));
+
+    assertEquals(0, bi.preceding(57));
+    assertEquals(0, bi.preceding(58));
+    assertEquals(31, bi.preceding(59));
+
+    assertEquals(0, bi.first());
+    assertEquals(59, bi.next(2));
+    assertEquals(0, bi.next(-2));
+  }
+
+  public void testSingleSentence() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText(getCharArrayIterator(SENTENCES[0]));
+    test1Sentence(bi, SENTENCES[0]);
+  }
+
+  private void test1Sentence(BreakIterator bi, String text) {
+    int start = bi.getText().getBeginIndex();
+    assertEquals(start, bi.first());
+    int current = bi.current();
+    assertEquals(bi.getText().getEndIndex(), bi.next());
+    int end = bi.current() - start;
+    assertEquals(text, text.substring(current - start, end - start));
+
+    assertEquals(text.length(), bi.last() - start);
+    end = bi.current();
+    bi.previous();
+    assertEquals(BreakIterator.DONE, bi.previous());
+    int previous = bi.current();
+    assertEquals(text, text.substring(previous - start, end - start));
+    assertEquals(start, bi.current());
+
+    assertEquals(BreakIterator.DONE, bi.following(bi.last() / 2 + start));
+
+    assertEquals(BreakIterator.DONE, bi.preceding(bi.last() / 2 + start));
+
+    assertEquals(start, bi.first());
+    assertEquals(BreakIterator.DONE, bi.next(13));
+    assertEquals(BreakIterator.DONE, bi.next(-8));
+  }
+
+  public void testSliceEnd() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText(getCharArrayIterator(SENTENCES[0] + PADDING, 0, SENTENCES[0].length()));
+
+    test1Sentence(bi, SENTENCES[0]);
+  }
+
+  public void testSliceStart() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText(getCharArrayIterator(PADDING + SENTENCES[0], PADDING.length(), SENTENCES[0].length()));
+    test1Sentence(bi, SENTENCES[0]);
+  }
+
+  public void testSliceMiddle() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText(getCharArrayIterator(PADDING + SENTENCES[0] + PADDING, PADDING.length(), SENTENCES[0].length()));
+
+    test1Sentence(bi, SENTENCES[0]);
+  }
+
+  /** the current position must be ignored, initial position is always first() */
+  public void testFirstPosition() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText(getCharArrayIterator(SENTENCES[0]));
+    assertEquals(SENTENCES[0].length(), bi.last()); // side-effect: set current position to last()
+    test1Sentence(bi, SENTENCES[0]);
+  }
+
+  public void testWhitespaceOnly() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText("   \n \n\n\r\n\t  \n");
+    test0Sentences(bi);
+  }
+
+  public void testEmptyString() throws Exception {
+    NLPSentenceDetectorOp sentenceDetectorOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+    BreakIterator bi = new OpenNLPSentenceBreakIterator(sentenceDetectorOp);
+    bi.setText("");
+    test0Sentences(bi);
+  }
+
+  private void test0Sentences(BreakIterator bi) {
+    assertEquals(0, bi.current());
+    assertEquals(0, bi.first());
+    assertEquals(BreakIterator.DONE, bi.next());
+    assertEquals(0, bi.last());
+    assertEquals(BreakIterator.DONE, bi.previous());
+    assertEquals(BreakIterator.DONE, bi.following(0));
+    assertEquals(BreakIterator.DONE, bi.preceding(0));
+    assertEquals(0, bi.first());
+    assertEquals(BreakIterator.DONE, bi.next(13));
+    assertEquals(BreakIterator.DONE, bi.next(-8));
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPTokenizerFactory.java b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPTokenizerFactory.java
new file mode 100644
index 0000000..db2bbb2
--- /dev/null
+++ b/lucene/analysis/opennlp/src/test/org/apache/lucene/analysis/opennlp/TestOpenNLPTokenizerFactory.java
@@ -0,0 +1,97 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.io.StringReader;
+import java.util.HashMap;
+import java.util.Map;
+
+import org.apache.lucene.analysis.BaseTokenStreamTestCase;
+import org.apache.lucene.analysis.Tokenizer;
+import org.apache.lucene.analysis.custom.CustomAnalyzer;
+import org.apache.lucene.analysis.util.ClasspathResourceLoader;
+import org.junit.Test;
+
+/**
+ * Tests the Tokenizer as well- the Tokenizer needs the OpenNLP model files,
+ * which this can load from src/test-files/opennlp/solr/conf
+ *
+ */
+public class TestOpenNLPTokenizerFactory extends BaseTokenStreamTestCase {
+
+  static private String SENTENCES = "Sentence number 1 has 6 words. Sentence number 2, 5 words.";
+  static private String[] SENTENCES_split = {"Sentence number 1 has 6 words. ", "Sentence number 2, 5 words."};
+  static private String[] SENTENCES_punc = {"Sentence", "number", "1", "has", "6", "words", ".", "Sentence", "number", "2", ",", "5", "words", "."};
+  static private int[] SENTENCES_startOffsets = {0, 9, 16, 18, 22, 24, 29, 31, 40, 47, 48, 50, 52, 57};
+  static private int[] SENTENCES_endOffsets = {8, 15, 17, 21, 23, 29, 30, 39, 46, 48, 49, 51, 57, 58};
+
+  static private String SENTENCE1 = "Sentence number 1 has 6 words.";
+  static private String[] SENTENCE1_punc = {"Sentence", "number", "1", "has", "6", "words", "."};
+
+  @Test
+  public void testTokenizer() throws IOException {
+    CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+        .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin", "tokenizerModel", "en-test-tokenizer.bin")
+        .build();
+    assertAnalyzesTo(analyzer, SENTENCES, SENTENCES_punc, SENTENCES_startOffsets, SENTENCES_endOffsets);
+    assertAnalyzesTo(analyzer, SENTENCE1, SENTENCE1_punc);
+  }
+
+  @Test
+  public void testTokenizerNoSentenceDetector() throws IOException {
+    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+      CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+          .withTokenizer("opennlp", "tokenizerModel", "en-test-tokenizer.bin")
+          .build();
+    });
+    assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'sentenceModel'"));
+  }
+
+  @Test
+  public void testTokenizerNoTokenizer() throws IOException {
+    IllegalArgumentException expected = expectThrows(IllegalArgumentException.class, () -> {
+      CustomAnalyzer analyzer = CustomAnalyzer.builder(new ClasspathResourceLoader(getClass()))
+          .withTokenizer("opennlp", "sentenceModel", "en-test-sent.bin")
+          .build();
+    });
+    assertTrue(expected.getMessage().contains("Configuration Error: missing parameter 'tokenizerModel'"));
+  }
+
+  // test analyzer caching the tokenizer
+  @Test
+  public void testClose() throws IOException {
+    Map<String,String> args = new HashMap<String,String>() {{ put("sentenceModel", "en-test-sent.bin");
+                                                              put("tokenizerModel", "en-test-tokenizer.bin"); }};
+    OpenNLPTokenizerFactory factory = new OpenNLPTokenizerFactory(args);
+    factory.inform(new ClasspathResourceLoader(getClass()));
+
+    Tokenizer ts = factory.create(newAttributeFactory());
+    ts.setReader(new StringReader(SENTENCES));
+
+    ts.reset();
+    ts.close();
+    ts.reset();
+    ts.setReader(new StringReader(SENTENCES));
+    assertTokenStreamContents(ts, SENTENCES_punc);
+    ts.close();
+    ts.reset();
+    ts.setReader(new StringReader(SENTENCES));
+    assertTokenStreamContents(ts, SENTENCES_punc);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/tools/test-model-data/README.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/README.txt b/lucene/analysis/opennlp/src/tools/test-model-data/README.txt
new file mode 100644
index 0000000..3ac0aa3
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/README.txt
@@ -0,0 +1,6 @@
+Use small training data to create small models for unit tests.
+Training data derived from Reuters corpus in very unscientific way.
+Tagging done with CCG Urbana-Champaign online demos:
+	http://cogcomp.cs.illinois.edu/page/demos
+
+Run 'ant train-test-models' to generate models from training data here.


[06/12] lucene-solr:master: LUCENE-2899: Add OpenNLP Analysis capabilities as a module

Posted by sa...@apache.org.
LUCENE-2899: Add OpenNLP Analysis capabilities as a module


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/3e2f9e62
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/3e2f9e62
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/3e2f9e62

Branch: refs/heads/master
Commit: 3e2f9e62d772218bf1fcae6d58542fad3ec43742
Parents: d02d1f1
Author: Steve Rowe <sa...@apache.org>
Authored: Fri Dec 15 11:24:18 2017 -0500
Committer: Steve Rowe <sa...@apache.org>
Committed: Fri Dec 15 11:24:18 2017 -0500

----------------------------------------------------------------------
 dev-tools/idea/.idea/ant.xml                    |    1 +
 dev-tools/idea/.idea/modules.xml                |    1 +
 dev-tools/idea/.idea/workspace.xml              |   83 +-
 .../idea/lucene/analysis/opennlp/opennlp.iml    |   30 +
 .../contrib/analysis-extras/analysis-extras.iml |    1 +
 .../lucene/analysis/opennlp/pom.xml.template    |   78 +
 .../maven/lucene/analysis/pom.xml.template      |    1 +
 lucene/CHANGES.txt                              |    9 +
 lucene/analysis/README.txt                      |    5 +
 lucene/analysis/build.xml                       |    6 +-
 .../miscellaneous/TypeAsSynonymFilter.java      |   80 +
 .../TypeAsSynonymFilterFactory.java             |   55 +
 ...ache.lucene.analysis.util.TokenFilterFactory |    1 +
 .../analysis/minhash/MinHashFilterTest.java     |    6 +-
 .../TestTypeAsSynonymFilterFactory.java         |   50 +
 lucene/analysis/opennlp/build.xml               |  118 +
 lucene/analysis/opennlp/ivy.xml                 |   29 +
 .../analysis/opennlp/OpenNLPChunkerFilter.java  |  108 +
 .../opennlp/OpenNLPChunkerFilterFactory.java    |   81 +
 .../opennlp/OpenNLPLemmatizerFilter.java        |  123 +
 .../opennlp/OpenNLPLemmatizerFilterFactory.java |   89 +
 .../analysis/opennlp/OpenNLPPOSFilter.java      |   96 +
 .../opennlp/OpenNLPPOSFilterFactory.java        |   71 +
 .../opennlp/OpenNLPSentenceBreakIterator.java   |  224 ++
 .../analysis/opennlp/OpenNLPTokenizer.java      |   98 +
 .../opennlp/OpenNLPTokenizerFactory.java        |   79 +
 .../lucene/analysis/opennlp/package-info.java   |   21 +
 .../analysis/opennlp/tools/NLPChunkerOp.java    |   41 +
 .../analysis/opennlp/tools/NLPLemmatizerOp.java |   80 +
 .../analysis/opennlp/tools/NLPNERTaggerOp.java  |   56 +
 .../analysis/opennlp/tools/NLPPOSTaggerOp.java  |   41 +
 .../opennlp/tools/NLPSentenceDetectorOp.java    |   50 +
 .../analysis/opennlp/tools/NLPTokenizerOp.java  |   48 +
 .../opennlp/tools/OpenNLPOpsFactory.java        |  176 +
 .../analysis/opennlp/tools/package-info.java    |   21 +
 lucene/analysis/opennlp/src/java/overview.html  |   61 +
 ...ache.lucene.analysis.util.TokenFilterFactory |   18 +
 ...apache.lucene.analysis.util.TokenizerFactory |   16 +
 .../lucene/analysis/opennlp/en-test-chunker.bin |  Bin 0 -> 89915 bytes
 .../lucene/analysis/opennlp/en-test-lemmas.dict |   12 +
 .../analysis/opennlp/en-test-lemmatizer.bin     |  Bin 0 -> 7370 bytes
 .../analysis/opennlp/en-test-ner-person.bin     |  Bin 0 -> 1700 bytes
 .../analysis/opennlp/en-test-pos-maxent.bin     |  Bin 0 -> 18424 bytes
 .../lucene/analysis/opennlp/en-test-sent.bin    |  Bin 0 -> 1050 bytes
 .../analysis/opennlp/en-test-tokenizer.bin      |  Bin 0 -> 15096 bytes
 .../TestOpenNLPChunkerFilterFactory.java        |   74 +
 .../TestOpenNLPLemmatizerFilterFactory.java     |  169 +
 .../opennlp/TestOpenNLPPOSFilterFactory.java    |   95 +
 .../TestOpenNLPSentenceBreakIterator.java       |  201 +
 .../opennlp/TestOpenNLPTokenizerFactory.java    |   97 +
 .../src/tools/test-model-data/README.txt        |    6 +
 .../src/tools/test-model-data/chunks.txt        | 3566 ++++++++++++++++++
 .../src/tools/test-model-data/lemmas.txt        |  875 +++++
 .../tools/test-model-data/ner_TrainerParams.txt |   21 +
 .../src/tools/test-model-data/ner_flashman.txt  |  143 +
 .../opennlp/src/tools/test-model-data/pos.txt   |   30 +
 .../src/tools/test-model-data/sentences.txt     |  144 +
 .../src/tools/test-model-data/tokenizer.txt     |   69 +
 .../apache/lucene/analysis/TestStopFilter.java  |    9 +-
 lucene/ivy-versions.properties                  |    3 +
 lucene/licenses/opennlp-maxent-3.0.3.jar.sha1   |    1 +
 lucene/licenses/opennlp-maxent-LICENSE-ASL.txt  |  202 +
 lucene/licenses/opennlp-maxent-NOTICE.txt       |    6 +
 lucene/licenses/opennlp-tools-1.8.3.jar.sha1    |    1 +
 lucene/licenses/opennlp-tools-LICENSE-ASL.txt   |  202 +
 lucene/licenses/opennlp-tools-NOTICE.txt        |    6 +
 lucene/module-build.xml                         |   22 +
 .../analysis/BaseTokenStreamTestCase.java       |   32 +-
 solr/CHANGES.txt                                |    7 +
 solr/contrib/analysis-extras/README.txt         |   10 +-
 solr/contrib/analysis-extras/build.xml          |   20 +-
 solr/contrib/analysis-extras/ivy.xml            |    3 +
 ...ractNamedEntitiesUpdateProcessorFactory.java |  571 +++
 .../apache/solr/update/processor/package.html   |   24 +
 .../collection1/conf/en-test-ner-person.bin     |  Bin 0 -> 1700 bytes
 .../solr/collection1/conf/en-test-sent.bin      |  Bin 0 -> 1050 bytes
 .../solr/collection1/conf/en-test-tokenizer.bin |  Bin 0 -> 15096 bytes
 .../collection1/conf/schema-opennlp-extract.xml |   49 +
 .../conf/solrconfig-opennlp-extract.xml         |  206 +
 .../solrconfig.snippet.randomindexconfig.xml    |   48 +
 ...ractNamedEntitiesUpdateProcessorFactory.java |  192 +
 .../processor/UpdateProcessorTestBase.java      |  168 -
 solr/licenses/opennlp-maxent-3.0.3.jar.sha1     |    1 +
 solr/licenses/opennlp-maxent-LICENSE-ASL.txt    |  202 +
 solr/licenses/opennlp-maxent-NOTICE.txt         |    6 +
 solr/licenses/opennlp-tools-1.8.3.jar.sha1      |    1 +
 solr/licenses/opennlp-tools-LICENSE-ASL.txt     |  202 +
 solr/licenses/opennlp-tools-NOTICE.txt          |    6 +
 .../solr-ref-guide/src/filter-descriptions.adoc |   32 +
 solr/solr-ref-guide/src/language-analysis.adoc  |  208 +
 solr/solr-ref-guide/src/tokenizers.adoc         |    4 +
 .../src/update-request-processors.adoc          |    6 +
 .../processor/UpdateProcessorTestBase.java      |  168 +
 93 files changed, 10040 insertions(+), 232 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/dev-tools/idea/.idea/ant.xml
----------------------------------------------------------------------
diff --git a/dev-tools/idea/.idea/ant.xml b/dev-tools/idea/.idea/ant.xml
index 8723e63..6c7bc8c 100644
--- a/dev-tools/idea/.idea/ant.xml
+++ b/dev-tools/idea/.idea/ant.xml
@@ -11,6 +11,7 @@
     <buildFile url="file://$PROJECT_DIR$/lucene/analysis/icu/build.xml" />
     <buildFile url="file://$PROJECT_DIR$/lucene/analysis/kuromoji/build.xml" />
     <buildFile url="file://$PROJECT_DIR$/lucene/analysis/morfologik/build.xml" />
+    <buildFile url="file://$PROJECT_DIR$/lucene/analysis/opennlp/build.xml" />
     <buildFile url="file://$PROJECT_DIR$/lucene/analysis/phonetic/build.xml" />
     <buildFile url="file://$PROJECT_DIR$/lucene/analysis/smartcn/build.xml" />
     <buildFile url="file://$PROJECT_DIR$/lucene/analysis/stempel/build.xml" />

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/dev-tools/idea/.idea/modules.xml
----------------------------------------------------------------------
diff --git a/dev-tools/idea/.idea/modules.xml b/dev-tools/idea/.idea/modules.xml
index 7ad2a78..4df1000 100644
--- a/dev-tools/idea/.idea/modules.xml
+++ b/dev-tools/idea/.idea/modules.xml
@@ -15,6 +15,7 @@
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/icu/icu.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/kuromoji/kuromoji.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/morfologik/morfologik.iml" />
+      <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/opennlp/opennlp.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/phonetic/phonetic.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/smartcn/smartcn.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/stempel/stempel.iml" />

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/dev-tools/idea/.idea/workspace.xml
----------------------------------------------------------------------
diff --git a/dev-tools/idea/.idea/workspace.xml b/dev-tools/idea/.idea/workspace.xml
index e22108f..11794af 100644
--- a/dev-tools/idea/.idea/workspace.xml
+++ b/dev-tools/idea/.idea/workspace.xml
@@ -44,6 +44,14 @@
       <option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
       <patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
     </configuration>
+    <configuration default="false" name="Module analyzers-opennlp" type="JUnit" factoryName="JUnit">
+      <module name="opennlp" />
+      <option name="TEST_OBJECT" value="pattern" />
+      <option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/idea-build/lucene/analysis/opennlp" />
+      <option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
+      <option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
+      <patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
+    </configuration>
     <configuration default="false" name="Module analyzers-phonetic" type="JUnit" factoryName="JUnit">
       <module name="phonetic" />
       <option name="TEST_OBJECT" value="pattern" />
@@ -333,48 +341,49 @@
       <patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
     </configuration>
 
-    <list size="41">
+    <list size="42">
       <item index="0" class="java.lang.String" itemvalue="JUnit.Lucene core" />
       <item index="1" class="java.lang.String" itemvalue="JUnit.Module analyzers-common" />
       <item index="2" class="java.lang.String" itemvalue="JUnit.Module analyzers-icu" />
       <item index="3" class="java.lang.String" itemvalue="JUnit.Module analyzers-kuromoji" />
       <item index="4" class="java.lang.String" itemvalue="JUnit.Module analyzers-morfologik" />
-      <item index="5" class="java.lang.String" itemvalue="JUnit.Module analyzers-phonetic" />
-      <item index="6" class="java.lang.String" itemvalue="JUnit.Module analyzers-smartcn" />
-      <item index="7" class="java.lang.String" itemvalue="JUnit.Module analyzers-stempel" />
-      <item index="8" class="java.lang.String" itemvalue="JUnit.Module analyzers-uima" />
-      <item index="9" class="java.lang.String" itemvalue="JUnit.Module backward-codecs" />
-      <item index="10" class="java.lang.String" itemvalue="JUnit.Module benchmark" />
-      <item index="11" class="java.lang.String" itemvalue="JUnit.Module classification" />
-      <item index="12" class="java.lang.String" itemvalue="JUnit.Module codecs" />
-      <item index="13" class="java.lang.String" itemvalue="JUnit.Module expressions" />
-      <item index="14" class="java.lang.String" itemvalue="JUnit.Module facet" />
-      <item index="15" class="java.lang.String" itemvalue="JUnit.Module grouping" />
-      <item index="16" class="java.lang.String" itemvalue="JUnit.Module highlighter" />
-      <item index="17" class="java.lang.String" itemvalue="JUnit.Module join" />
-      <item index="18" class="java.lang.String" itemvalue="JUnit.Module memory" />
-      <item index="19" class="java.lang.String" itemvalue="JUnit.Module misc" />
-      <item index="20" class="java.lang.String" itemvalue="JUnit.Module queries" />
-      <item index="21" class="java.lang.String" itemvalue="JUnit.Module queryparser" />
-      <item index="22" class="java.lang.String" itemvalue="JUnit.Module replicator" />
-      <item index="23" class="java.lang.String" itemvalue="JUnit.Module sandbox" />
-      <item index="24" class="java.lang.String" itemvalue="JUnit.Module spatial" />
-      <item index="25" class="java.lang.String" itemvalue="JUnit.Module spatial-extras" />
-      <item index="26" class="java.lang.String" itemvalue="JUnit.Module spatial3d" />
-      <item index="27" class="java.lang.String" itemvalue="JUnit.Module suggest" />
-      <item index="28" class="java.lang.String" itemvalue="Application.solrcloud" />
-      <item index="29" class="java.lang.String" itemvalue="JUnit.Solr core" />
-      <item index="30" class="java.lang.String" itemvalue="JUnit.Solrj" />
-      <item index="31" class="java.lang.String" itemvalue="JUnit.Solr analysis-extras contrib" />
-      <item index="32" class="java.lang.String" itemvalue="JUnit.Solr analytics contrib" />
-      <item index="33" class="java.lang.String" itemvalue="JUnit.Solr clustering contrib" />
-      <item index="34" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler contrib" />
-      <item index="35" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler-extras contrib" />
-      <item index="36" class="java.lang.String" itemvalue="JUnit.Solr extraction contrib" />
-      <item index="37" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
-      <item index="38" class="java.lang.String" itemvalue="JUnit.Solr ltr contrib" />
-      <item index="39" class="java.lang.String" itemvalue="JUnit.Solr uima contrib" />
-      <item index="40" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
+      <item index="5" class="java.lang.String" itemvalue="JUnit.Module analyzers-opennlp" />
+      <item index="6" class="java.lang.String" itemvalue="JUnit.Module analyzers-phonetic" />
+      <item index="7" class="java.lang.String" itemvalue="JUnit.Module analyzers-smartcn" />
+      <item index="8" class="java.lang.String" itemvalue="JUnit.Module analyzers-stempel" />
+      <item index="9" class="java.lang.String" itemvalue="JUnit.Module analyzers-uima" />
+      <item index="10" class="java.lang.String" itemvalue="JUnit.Module backward-codecs" />
+      <item index="11" class="java.lang.String" itemvalue="JUnit.Module benchmark" />
+      <item index="12" class="java.lang.String" itemvalue="JUnit.Module classification" />
+      <item index="13" class="java.lang.String" itemvalue="JUnit.Module codecs" />
+      <item index="14" class="java.lang.String" itemvalue="JUnit.Module expressions" />
+      <item index="15" class="java.lang.String" itemvalue="JUnit.Module facet" />
+      <item index="16" class="java.lang.String" itemvalue="JUnit.Module grouping" />
+      <item index="17" class="java.lang.String" itemvalue="JUnit.Module highlighter" />
+      <item index="18" class="java.lang.String" itemvalue="JUnit.Module join" />
+      <item index="19" class="java.lang.String" itemvalue="JUnit.Module memory" />
+      <item index="20" class="java.lang.String" itemvalue="JUnit.Module misc" />
+      <item index="21" class="java.lang.String" itemvalue="JUnit.Module queries" />
+      <item index="22" class="java.lang.String" itemvalue="JUnit.Module queryparser" />
+      <item index="23" class="java.lang.String" itemvalue="JUnit.Module replicator" />
+      <item index="24" class="java.lang.String" itemvalue="JUnit.Module sandbox" />
+      <item index="25" class="java.lang.String" itemvalue="JUnit.Module spatial" />
+      <item index="26" class="java.lang.String" itemvalue="JUnit.Module spatial-extras" />
+      <item index="27" class="java.lang.String" itemvalue="JUnit.Module spatial3d" />
+      <item index="28" class="java.lang.String" itemvalue="JUnit.Module suggest" />
+      <item index="29" class="java.lang.String" itemvalue="Application.solrcloud" />
+      <item index="30" class="java.lang.String" itemvalue="JUnit.Solr core" />
+      <item index="31" class="java.lang.String" itemvalue="JUnit.Solrj" />
+      <item index="32" class="java.lang.String" itemvalue="JUnit.Solr analysis-extras contrib" />
+      <item index="33" class="java.lang.String" itemvalue="JUnit.Solr analytics contrib" />
+      <item index="34" class="java.lang.String" itemvalue="JUnit.Solr clustering contrib" />
+      <item index="35" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler contrib" />
+      <item index="36" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler-extras contrib" />
+      <item index="37" class="java.lang.String" itemvalue="JUnit.Solr extraction contrib" />
+      <item index="38" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
+      <item index="39" class="java.lang.String" itemvalue="JUnit.Solr ltr contrib" />
+      <item index="40" class="java.lang.String" itemvalue="JUnit.Solr uima contrib" />
+      <item index="41" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
     </list>
   </component>
 </project>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/dev-tools/idea/lucene/analysis/opennlp/opennlp.iml
----------------------------------------------------------------------
diff --git a/dev-tools/idea/lucene/analysis/opennlp/opennlp.iml b/dev-tools/idea/lucene/analysis/opennlp/opennlp.iml
new file mode 100644
index 0000000..7725065
--- /dev/null
+++ b/dev-tools/idea/lucene/analysis/opennlp/opennlp.iml
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="false">
+    <output url="file://$MODULE_DIR$/../../../idea-build/lucene/analysis/opennlp/classes/java" />
+    <output-test url="file://$MODULE_DIR$/../../../idea-build/lucene/analysis/opennlp/classes/test" />
+    <exclude-output />
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
+      <sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
+      <sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="module-library">
+      <library>
+        <CLASSES>
+          <root url="file://$MODULE_DIR$/lib" />
+        </CLASSES>
+        <JAVADOC />
+        <SOURCES />
+        <jarDirectory url="file://$MODULE_DIR$/lib" recursive="false" />
+      </library>
+    </orderEntry>
+    <orderEntry type="library" scope="TEST" name="JUnit" level="project" />
+    <orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
+    <orderEntry type="module" module-name="analysis-common" />
+    <orderEntry type="module" module-name="lucene-core" />
+  </component>
+</module>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml
----------------------------------------------------------------------
diff --git a/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml b/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml
index 287b46a..7c0c0c1 100644
--- a/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml
+++ b/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml
@@ -37,5 +37,6 @@
     <orderEntry type="module" module-name="lucene-core" />
     <orderEntry type="module" module-name="misc" />
     <orderEntry type="module" module-name="sandbox" />
+    <orderEntry type="module" module-name="opennlp" />
   </component>
 </module>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/dev-tools/maven/lucene/analysis/opennlp/pom.xml.template
----------------------------------------------------------------------
diff --git a/dev-tools/maven/lucene/analysis/opennlp/pom.xml.template b/dev-tools/maven/lucene/analysis/opennlp/pom.xml.template
new file mode 100644
index 0000000..4109a0a
--- /dev/null
+++ b/dev-tools/maven/lucene/analysis/opennlp/pom.xml.template
@@ -0,0 +1,78 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.lucene</groupId>
+    <artifactId>lucene-parent</artifactId>
+    <version>@version@</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+  <groupId>org.apache.lucene</groupId>
+  <artifactId>lucene-analyzers-opennlp</artifactId>
+  <packaging>jar</packaging>
+  <name>Lucene OpenNLP integration</name>
+  <description>
+    Lucene OpenNLP integration
+  </description>
+  <properties>
+    <module-directory>lucene/analysis/opennlp</module-directory>
+    <relative-top-level>../../../..</relative-top-level>
+    <module-path>${relative-top-level}/${module-directory}</module-path>
+  </properties>
+  <scm>
+    <connection>scm:git:${vc-anonymous-base-url}</connection>
+    <developerConnection>scm:git:${vc-dev-base-url}</developerConnection>
+    <url>${vc-browse-base-url};f=${module-directory}</url>
+  </scm>
+  <dependencies>
+    <dependency>
+      <!-- lucene-test-framework dependency must be declared before lucene-core -->
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-test-framework</artifactId>
+      <scope>test</scope>
+    </dependency>
+    @lucene-analyzers-opennlp.internal.dependencies@
+    @lucene-analyzers-opennlp.external.dependencies@
+    @lucene-analyzers-opennlp.internal.test.dependencies@
+    @lucene-analyzers-opennlp.external.test.dependencies@
+  </dependencies>
+  <build>
+    <sourceDirectory>${module-path}/src/java</sourceDirectory>
+    <testSourceDirectory>${module-path}/src/test</testSourceDirectory>
+    <resources>
+      <resource>
+        <directory>${module-path}/src/resources</directory>
+      </resource>
+    </resources>
+    <testResources>
+      <testResource>
+        <directory>${project.build.testSourceDirectory}</directory>
+        <excludes>
+          <exclude>**/*.java</exclude>
+        </excludes>
+      </testResource>
+      <testResource>
+        <directory>${module-path}/src/test-files</directory>
+      </testResource>
+    </testResources>
+  </build>
+</project>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/dev-tools/maven/lucene/analysis/pom.xml.template
----------------------------------------------------------------------
diff --git a/dev-tools/maven/lucene/analysis/pom.xml.template b/dev-tools/maven/lucene/analysis/pom.xml.template
index 9058abf..466ad30 100644
--- a/dev-tools/maven/lucene/analysis/pom.xml.template
+++ b/dev-tools/maven/lucene/analysis/pom.xml.template
@@ -35,6 +35,7 @@
     <module>icu</module>
     <module>kuromoji</module>
     <module>morfologik</module>
+    <module>opennlp</module>
     <module>phonetic</module>
     <module>smartcn</module>
     <module>stempel</module>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 0fbf446..db8aaab 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -65,6 +65,15 @@ API Changes
 * LUCENE-8051: LevensteinDistance renamed to LevenshteinDistance.
   (Pulak Ghosh via Adrien Grand)
 
+New Features
+
+* LUCENE-2899: Add new module analysis/opennlp, with analysis components
+  to perform tokenization, part-of-speech tagging, lemmatization and phrase
+  chunking by invoking the corresponding OpenNLP tools. Named entity
+  recognition is also provided as a Solr update request processor.
+  (Lance Norskog, Grant Ingersoll, Joern Kottmann, Em, Kai Gülzau,
+  Rene Nederhand, Robert Muir, Steven Bower, Steve Rowe)
+
 Improvements
 
 * LUCENE-8081: Allow IndexWriter to opt out of flushing on indexing threads

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/README.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/README.txt b/lucene/analysis/README.txt
index 7dc7f53..c68584e 100644
--- a/lucene/analysis/README.txt
+++ b/lucene/analysis/README.txt
@@ -28,6 +28,9 @@ lucene-analyzers-kuromoji-XX.jar
 lucene-analyzers-morfologik-XX.jar
   An analyzer using the Morfologik stemming library.
 
+lucene-analyzers-opennlp-XX.jar
+  An analyzer using the OpenNLP natural-language processing library.
+
 lucene-analyzers-phonetic-XX.jar
   An add-on analysis library that provides phonetic encoders via Apache
   Commons-Codec. Note: this module depends on the commons-codec jar 
@@ -49,6 +52,7 @@ common/src/java
 icu/src/java
 kuromoji/src/java
 morfologik/src/java
+opennlp/src/java
 phonetic/src/java
 smartcn/src/java
 stempel/src/java
@@ -59,6 +63,7 @@ common/src/test
 icu/src/test
 kuromoji/src/test
 morfologik/src/test
+opennlp/src/test
 phonetic/src/test
 smartcn/src/test
 stempel/src/test

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/build.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/build.xml b/lucene/analysis/build.xml
index 844f5f3..ed1566c 100644
--- a/lucene/analysis/build.xml
+++ b/lucene/analysis/build.xml
@@ -65,6 +65,10 @@
     <ant dir="morfologik" />
   </target>
 
+  <target name="opennlp">
+    <ant dir="opennlp" />
+  </target>
+
   <target name="phonetic">
     <ant dir="phonetic" />
   </target>
@@ -82,7 +86,7 @@
   </target>
 
   <target name="default" depends="compile"/>
-  <target name="compile" depends="common,icu,kuromoji,morfologik,phonetic,smartcn,stempel,uima" />
+  <target name="compile" depends="common,icu,kuromoji,morfologik,opennlp,phonetic,smartcn,stempel,uima" />
 
   <target name="clean">
     <forall-analyzers target="clean"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilter.java
new file mode 100644
index 0000000..8269d5d
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilter.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * Adds the {@link TypeAttribute#type()} as a synonym,
+ * i.e. another token at the same position, optionally with a specified prefix prepended.
+ */
+public final class TypeAsSynonymFilter extends TokenFilter {
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+  private final String prefix;
+
+  AttributeSource.State savedToken = null;
+
+
+  public TypeAsSynonymFilter(TokenStream input) {
+    this(input, null);
+  }
+
+  /**
+   * @param input input tokenstream
+   * @param prefix Prepend this string to every token type emitted as token text.
+   *               If null, nothing will be prepended.
+   */
+  public TypeAsSynonymFilter(TokenStream input, String prefix) {
+    super(input);
+    this.prefix = prefix;
+  }
+
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (savedToken != null) {         // Emit last token's type at the same position
+      restoreState(savedToken);
+      savedToken = null;
+      termAtt.setEmpty();
+      if (prefix != null) {
+        termAtt.append(prefix);
+      }
+      termAtt.append(typeAtt.type());
+      posIncrAtt.setPositionIncrement(0);
+      return true;
+    } else if (input.incrementToken()) { // Ho pending token type to emit
+      savedToken = captureState();
+      return true;
+    }
+    return false;
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    savedToken = null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilterFactory.java
new file mode 100644
index 0000000..69708b7
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilterFactory.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link TypeAsSynonymFilter}.
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_type_as_synonym" class="solr.TextField" positionIncrementGap="100"&gt;
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.UAX29URLEmailTokenizerFactory"/&gt;
+ *     &lt;filter class="solr.TypeAsSynonymFilterFactory" prefix="_type_" /&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ *
+ * <p>
+ * If the optional {@code prefix} parameter is used, the specified value will be prepended
+ * to the type, e.g. with prefix="_type_", for a token "example.com" with type "&lt;URL&gt;",
+ * the emitted synonym will have text "_type_&lt;URL&gt;".
+ */
+public class TypeAsSynonymFilterFactory extends TokenFilterFactory {
+  private final String prefix;
+
+  public TypeAsSynonymFilterFactory(Map<String,String> args) {
+    super(args);
+    prefix = get(args, "prefix");  // default value is null
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public TokenStream create(TokenStream input) {
+    return new TypeAsSynonymFilter(input, prefix);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
index d871ad6..6dcc81c 100644
--- a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
+++ b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
@@ -80,6 +80,7 @@ org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilterFactory
 org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilterFactory
 org.apache.lucene.analysis.miscellaneous.TrimFilterFactory
 org.apache.lucene.analysis.miscellaneous.TruncateTokenFilterFactory
+org.apache.lucene.analysis.miscellaneous.TypeAsSynonymFilterFactory
 org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory
 org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilterFactory
 org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilterFactory

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/MinHashFilterTest.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/MinHashFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/MinHashFilterTest.java
index a4080fe..1bc6ed7 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/MinHashFilterTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/MinHashFilterTest.java
@@ -183,14 +183,14 @@ public class MinHashFilterTest extends BaseTokenStreamTestCase {
     TokenStream ts = createTokenStream(5, "woof woof woof woof woof", 1, 1, 100, false);
     assertTokenStreamContents(ts, hashes, new int[]{0},
         new int[]{24}, new String[]{MinHashFilter.MIN_HASH_TYPE}, new int[]{1}, new int[]{1}, 24, 0, null,
-        true);
+        true, null);
 
     ts = createTokenStream(5, "woof woof woof woof woof", 2, 1, 1, false);
     assertTokenStreamContents(ts, new String[]{new String(new char[]{0, 0, 8449, 54077, 64133, 32857, 8605, 41409}),
             new String(new char[]{0, 1, 16887, 58164, 39536, 14926, 6529, 17276})}, new int[]{0, 0},
         new int[]{24, 24}, new String[]{MinHashFilter.MIN_HASH_TYPE, MinHashFilter.MIN_HASH_TYPE}, new int[]{1, 0},
         new int[]{1, 1}, 24, 0, null,
-        true);
+        true, null);
   }
 
   @Test
@@ -203,7 +203,7 @@ public class MinHashFilterTest extends BaseTokenStreamTestCase {
         false);
     assertTokenStreamContents(ts, hashes, new int[]{0, 0},
         new int[]{49, 49}, new String[]{MinHashFilter.MIN_HASH_TYPE, MinHashFilter.MIN_HASH_TYPE}, new int[]{1, 0},
-        new int[]{1, 1}, 49, 0, null, true);
+        new int[]{1, 1}, 49, 0, null, true, null);
   }
 
   private ArrayList<String> getTokens(TokenStream ts) throws IOException {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTypeAsSynonymFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTypeAsSynonymFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTypeAsSynonymFilterFactory.java
new file mode 100644
index 0000000..6beb139
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTypeAsSynonymFilterFactory.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import org.apache.lucene.analysis.CannedTokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+public class TestTypeAsSynonymFilterFactory extends BaseTokenStreamFactoryTestCase {
+
+  private static final Token[] TOKENS =  { token("Visit", "<ALPHANUM>"), token("example.com", "<URL>") };
+
+  public void testBasic() throws Exception {
+    TokenStream stream = new CannedTokenStream(TOKENS);
+    stream = tokenFilterFactory("TypeAsSynonym").create(stream);
+    assertTokenStreamContents(stream, new String[] { "Visit", "<ALPHANUM>", "example.com", "<URL>" },
+        null, null, new String[] { "<ALPHANUM>", "<ALPHANUM>", "<URL>", "<URL>" }, new int[] { 1, 0, 1, 0 });
+  }
+
+  public void testPrefix() throws Exception {
+    TokenStream stream = new CannedTokenStream(TOKENS);
+    stream = tokenFilterFactory("TypeAsSynonym", "prefix", "_type_").create(stream);
+    assertTokenStreamContents(stream, new String[] { "Visit", "_type_<ALPHANUM>", "example.com", "_type_<URL>" },
+        null, null, new String[] { "<ALPHANUM>", "<ALPHANUM>", "<URL>", "<URL>" }, new int[] { 1, 0, 1, 0 });
+  }
+
+  private static Token token(String term, String type) {
+    Token token = new Token();
+    token.setEmpty();
+    token.append(term);
+    token.setType(type);
+    return token;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/build.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/build.xml b/lucene/analysis/opennlp/build.xml
new file mode 100644
index 0000000..e2cd20a
--- /dev/null
+++ b/lucene/analysis/opennlp/build.xml
@@ -0,0 +1,118 @@
+<?xml version="1.0"?>
+
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+<project name="analyzers-opennlp" default="default">
+
+  <description>
+    OpenNLP Library Integration
+  </description>
+
+  <path id="opennlpjars">
+    <fileset dir="lib"/>
+  </path>
+
+  <property name="test.model.data.dir" location="src/tools/test-model-data"/>
+  <property name="tests.userdir" location="src/test-files"/>
+  <property name="test.model.dir" location="${tests.userdir}/org/apache/lucene/analysis/opennlp"/>
+
+  <import file="../analysis-module-build.xml"/>
+
+  <property name="analysis-extras.conf.dir"
+            location="${common.dir}/../solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf"/>
+
+  <path id="classpath">
+    <pathelement path="${analyzers-common.jar}"/>
+    <path refid="opennlpjars"/>
+    <path refid="base.classpath"/>
+  </path>
+
+  <path id="test.classpath">
+    <path refid="test.base.classpath"/>
+    <pathelement path="${tests.userdir}"/>
+  </path>
+
+  <target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
+
+  <!--
+    This does not create real NLP models, just small unencumbered ones for the unit tests.
+    All text taken from reuters corpus.
+    Tags applied with online demos at CCG Urbana-Champaign.
+    -->
+  <target name="train-test-models" description="Train all small test models for unit tests" depends="resolve">
+    <mkdir dir="${test.model.dir}"/>
+    <!-- https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.sentdetect.training -->
+    <trainModel command="SentenceDetectorTrainer" lang="en" data="sentences.txt" model="en-test-sent.bin"/>
+    <copy file="${test.model.dir}/en-test-sent.bin" todir="${analysis-extras.conf.dir}"/>
+
+    <!-- https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.tokenizer.training -->
+    <trainModel command="TokenizerTrainer" lang="en" data="tokenizer.txt" model="en-test-tokenizer.bin"/>
+    <copy file="${test.model.dir}/en-test-tokenizer.bin" todir="${analysis-extras.conf.dir}"/>
+
+    <!-- https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.postagger.training -->
+    <trainModel command="POSTaggerTrainer" lang="en" data="pos.txt" model="en-test-pos-maxent.bin"/>
+
+    <!-- https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.chunker.training -->
+    <trainModel command="ChunkerTrainerME" lang="en" data="chunks.txt" model="en-test-chunker.bin"/>
+
+    <!-- https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.namefind.training -->
+    <trainModel command="TokenNameFinderTrainer" lang="en" data="ner_flashman.txt" model="en-test-ner-person.bin">
+      <extra-args>
+        <arg value="-params"/>
+        <arg value="ner_TrainerParams.txt"/>
+      </extra-args>
+    </trainModel>
+    <copy file="${test.model.dir}/en-test-ner-person.bin" todir="${analysis-extras.conf.dir}"/>
+
+    <!-- https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.lemmatizer.training -->
+    <trainModel command="LemmatizerTrainerME" lang="en" data="lemmas.txt" model="en-test-lemmatizer.bin"/>
+  </target>
+
+  <macrodef name="trainModel">
+    <attribute name="command"/>
+    <attribute name="lang"/>
+    <attribute name="data"/>
+    <attribute name="model"/>
+    <element name="extra-args" optional="true"/>
+    <sequential>
+      <java classname="opennlp.tools.cmdline.CLI"
+            dir="${test.model.data.dir}"
+            fork="true"
+            failonerror="true">
+        <classpath>
+          <path refid="opennlpjars"/>
+        </classpath>
+
+        <arg value="@{command}"/>
+
+        <arg value="-lang"/>
+        <arg value="@{lang}"/>
+
+        <arg value="-data"/>
+        <arg value="@{data}"/>
+
+        <arg value="-model"/>
+        <arg value="${test.model.dir}/@{model}"/>
+
+        <extra-args/>
+      </java>
+    </sequential>
+  </macrodef>
+
+  <target name="regenerate" depends="train-test-models"/>
+</project>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/ivy.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/ivy.xml b/lucene/analysis/opennlp/ivy.xml
new file mode 100644
index 0000000..c7b885f
--- /dev/null
+++ b/lucene/analysis/opennlp/ivy.xml
@@ -0,0 +1,29 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<ivy-module version="2.0">
+  <info organisation="org.apache.lucene" module="analyzers-opennlp" />
+  <configurations defaultconfmapping="compile->master">
+    <conf name="compile" transitive="false"/>
+  </configurations>
+  <dependencies>
+    <dependency org="org.apache.opennlp" name="opennlp-tools" rev="${/org.apache.opennlp/opennlp-tools}" transitive="false" conf="compile" />
+    <dependency org="org.apache.opennlp" name="opennlp-maxent" rev="${/org.apache.opennlp/opennlp-maxent}" transitive="false" conf="compile" />
+    <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}" />
+  </dependencies>
+</ivy-module>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilter.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilter.java
new file mode 100644
index 0000000..cfc47e6
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilter.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.tools.NLPChunkerOp;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * Run OpenNLP chunker.  Prerequisite: the OpenNLPTokenizer and OpenNLPPOSFilter must precede this filter.
+ * Tags terms in the TypeAttribute, replacing the POS tags previously put there by OpenNLPPOSFilter.
+ */
+public final class OpenNLPChunkerFilter extends TokenFilter {
+
+  private List<AttributeSource> sentenceTokenAttrs = new ArrayList<>();
+  private int tokenNum = 0;
+  private boolean moreTokensAvailable = true;
+  private String[] sentenceTerms = null;
+  private String[] sentenceTermPOSTags = null;
+
+  private final NLPChunkerOp chunkerOp;
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+  public OpenNLPChunkerFilter(TokenStream input, NLPChunkerOp chunkerOp) {
+    super(input);
+    this.chunkerOp = chunkerOp;
+  }
+
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if ( ! moreTokensAvailable) {
+      clear();
+      return false;
+    }
+    if (tokenNum == sentenceTokenAttrs.size()) {
+      nextSentence();
+      if (sentenceTerms == null) {
+        clear();
+        return false;
+      }
+      assignTokenTypes(chunkerOp.getChunks(sentenceTerms, sentenceTermPOSTags, null));
+      tokenNum = 0;
+    }
+    clearAttributes();
+    sentenceTokenAttrs.get(tokenNum++).copyTo(this);
+    return true;
+  }
+
+  private void nextSentence() throws IOException {
+    List<String> termList = new ArrayList<>();
+    List<String> posTagList = new ArrayList<>();
+    sentenceTokenAttrs.clear();
+    boolean endOfSentence = false;
+    while ( ! endOfSentence && (moreTokensAvailable = input.incrementToken())) {
+      termList.add(termAtt.toString());
+      posTagList.add(typeAtt.type());
+      endOfSentence = 0 != (flagsAtt.getFlags() & OpenNLPTokenizer.EOS_FLAG_BIT);
+      sentenceTokenAttrs.add(input.cloneAttributes());
+    }
+    sentenceTerms = termList.size() > 0 ? termList.toArray(new String[termList.size()]) : null;
+    sentenceTermPOSTags = posTagList.size() > 0 ? posTagList.toArray(new String[posTagList.size()]) : null;
+  }
+
+  private void assignTokenTypes(String[] tags) {
+    for (int i = 0 ; i < tags.length ; ++i) {
+      sentenceTokenAttrs.get(i).getAttribute(TypeAttribute.class).setType(tags[i]);
+    }
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    moreTokensAvailable = true;
+    clear();
+  }
+
+  private void clear() {
+    sentenceTokenAttrs.clear();
+    sentenceTerms = null;
+    sentenceTermPOSTags = null;
+    tokenNum = 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilterFactory.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilterFactory.java
new file mode 100644
index 0000000..96eb672
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilterFactory.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.tools.NLPChunkerOp;
+import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link OpenNLPChunkerFilter}.
+ *
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_opennlp_chunked" class="solr.TextField" positionIncrementGap="100"&gt;
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.OpenNLPTokenizerFactory" sentenceModel="filename" tokenizerModel="filename"/&gt;
+ *     &lt;filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="filename"/&gt;
+ *     &lt;filter class="solr.OpenNLPChunkerFilterFactory" chunkerModel="filename"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ * @since 7.3.0
+ */
+public class OpenNLPChunkerFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+  public static final String CHUNKER_MODEL = "chunkerModel";
+
+  private final String chunkerModelFile;
+
+  public OpenNLPChunkerFilterFactory(Map<String,String> args) {
+    super(args);
+    chunkerModelFile = get(args, CHUNKER_MODEL);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public OpenNLPChunkerFilter create(TokenStream in) {
+    try {
+      NLPChunkerOp chunkerOp = null;
+
+      if (chunkerModelFile != null) {
+        chunkerOp = OpenNLPOpsFactory.getChunker(chunkerModelFile);
+      }
+      return new OpenNLPChunkerFilter(in, chunkerOp);
+    } catch (IOException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+
+  @Override
+  public void inform(ResourceLoader loader) {
+    try {
+      // load and register read-only models in cache with file/resource names
+      if (chunkerModelFile != null) {
+        OpenNLPOpsFactory.getChunkerModel(chunkerModelFile, loader);
+      }
+    } catch (IOException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java
new file mode 100644
index 0000000..4c484b9
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.tools.NLPLemmatizerOp;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * <p>Runs OpenNLP dictionary-based and/or MaxEnt lemmatizers.</p>
+ * <p>
+ *   Both a dictionary-based lemmatizer and a MaxEnt lemmatizer are supported,
+ *   via the "dictionary" and "lemmatizerModel" params, respectively.
+ *   If both are configured, the dictionary-based lemmatizer is tried first,
+ *   and then the MaxEnt lemmatizer is consulted for out-of-vocabulary tokens.
+ * </p>
+ * <p>
+ *   The dictionary file must be encoded as UTF-8, with one entry per line,
+ *   in the form <tt>word[tab]lemma[tab]part-of-speech</tt>
+ * </p>
+ */
+public class OpenNLPLemmatizerFilter extends TokenFilter {
+  private final NLPLemmatizerOp lemmatizerOp;
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
+  private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
+  private List<AttributeSource> sentenceTokenAttrs = new ArrayList<>();
+  private Iterator<AttributeSource> sentenceTokenAttrsIter = null;
+  private boolean moreTokensAvailable = true;
+  private String[] sentenceTokens = null;     // non-keyword tokens
+  private String[] sentenceTokenTypes = null; // types for non-keyword tokens
+  private String[] lemmas = null;             // lemmas for non-keyword tokens
+  private int lemmaNum = 0;                   // lemma counter
+
+  public OpenNLPLemmatizerFilter(TokenStream input, NLPLemmatizerOp lemmatizerOp) {
+    super(input);
+    this.lemmatizerOp = lemmatizerOp;
+  }
+
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if ( ! moreTokensAvailable) {
+      clear();
+      return false;
+    }
+    if (sentenceTokenAttrsIter == null || ! sentenceTokenAttrsIter.hasNext()) {
+      nextSentence();
+      if (sentenceTokens == null) { // zero non-keyword tokens
+        clear();
+        return false;
+      }
+      lemmas = lemmatizerOp.lemmatize(sentenceTokens, sentenceTokenTypes);
+      lemmaNum = 0;
+      sentenceTokenAttrsIter = sentenceTokenAttrs.iterator();
+    }
+    clearAttributes();
+    sentenceTokenAttrsIter.next().copyTo(this);
+    if ( ! keywordAtt.isKeyword()) {
+      termAtt.setEmpty().append(lemmas[lemmaNum++]);
+    }
+    return true;
+
+  }
+
+  private void nextSentence() throws IOException {
+    List<String> tokenList = new ArrayList<>();
+    List<String> typeList = new ArrayList<>();
+    sentenceTokenAttrs.clear();
+    boolean endOfSentence = false;
+    while ( ! endOfSentence && (moreTokensAvailable = input.incrementToken())) {
+      if ( ! keywordAtt.isKeyword()) {
+        tokenList.add(termAtt.toString());
+        typeList.add(typeAtt.type());
+      }
+      endOfSentence = 0 != (flagsAtt.getFlags() & OpenNLPTokenizer.EOS_FLAG_BIT);
+      sentenceTokenAttrs.add(input.cloneAttributes());
+    }
+    sentenceTokens = tokenList.size() > 0 ? tokenList.toArray(new String[tokenList.size()]) : null;
+    sentenceTokenTypes = typeList.size() > 0 ? typeList.toArray(new String[typeList.size()]) : null;
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    moreTokensAvailable = true;
+    clear();
+  }
+
+  private void clear() {
+    sentenceTokenAttrs.clear();
+    sentenceTokenAttrsIter = null;
+    sentenceTokens = null;
+    sentenceTokenTypes = null;
+    lemmas = null;
+    lemmaNum = 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilterFactory.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilterFactory.java
new file mode 100644
index 0000000..90a0e43
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilterFactory.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.tools.NLPLemmatizerOp;
+import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link OpenNLPLemmatizerFilter}.
+ *
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_opennlp_lemma" class="solr.TextField" positionIncrementGap="100"
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.OpenNLPTokenizerFactory"
+ *                sentenceModel="filename"
+ *                tokenizerModel="filename"/&gt;
+ *     /&gt;
+ *     &lt;filter class="solr.OpenNLPLemmatizerFilterFactory"
+ *             dictionary="filename"
+ *             lemmatizerModel="filename"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ * @since 7.3.0
+ */
+public class OpenNLPLemmatizerFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+  public static final String DICTIONARY = "dictionary";
+  public static final String LEMMATIZER_MODEL = "lemmatizerModel";
+
+  private final String dictionaryFile;
+  private final String lemmatizerModelFile;
+
+  public OpenNLPLemmatizerFilterFactory(Map<String,String> args) {
+    super(args);
+    dictionaryFile = get(args, DICTIONARY);
+    lemmatizerModelFile = get(args, LEMMATIZER_MODEL);
+
+    if (dictionaryFile == null && lemmatizerModelFile == null) {
+      throw new IllegalArgumentException("Configuration Error: missing parameter: at least one of '"
+          + DICTIONARY + "' and '" + LEMMATIZER_MODEL + "' must be provided.");
+    }
+
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public OpenNLPLemmatizerFilter create(TokenStream in) {
+    try {
+      NLPLemmatizerOp lemmatizerOp = OpenNLPOpsFactory.getLemmatizer(dictionaryFile, lemmatizerModelFile);
+      return new OpenNLPLemmatizerFilter(in, lemmatizerOp);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  public void inform(ResourceLoader loader) throws IOException {
+    // register models in cache with file/resource names
+    if (dictionaryFile != null) {
+      OpenNLPOpsFactory.getLemmatizerDictionary(dictionaryFile, loader);
+    }
+    if (lemmatizerModelFile != null) {
+      OpenNLPOpsFactory.getLemmatizerModel(lemmatizerModelFile, loader);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilter.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilter.java
new file mode 100644
index 0000000..a5bea28
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilter.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.tools.NLPPOSTaggerOp;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * Run OpenNLP POS tagger.  Tags all terms in the TypeAttribute.
+ */
+public final class OpenNLPPOSFilter extends TokenFilter {
+
+  private List<AttributeSource> sentenceTokenAttrs = new ArrayList<>();
+  String[] tags = null;
+  private int tokenNum = 0;
+  private boolean moreTokensAvailable = true;
+
+  private final NLPPOSTaggerOp posTaggerOp;
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+  public OpenNLPPOSFilter(TokenStream input, NLPPOSTaggerOp posTaggerOp) {
+    super(input);
+    this.posTaggerOp = posTaggerOp;
+  }
+
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if ( ! moreTokensAvailable) {
+      clear();
+      return false;
+    }
+    if (tokenNum == sentenceTokenAttrs.size()) { // beginning of stream, or previous sentence exhausted
+      String[] sentenceTokens = nextSentence();
+      if (sentenceTokens == null) {
+        clear();
+        return false;
+      }
+      tags = posTaggerOp.getPOSTags(sentenceTokens);
+      tokenNum = 0;
+    }
+    clearAttributes();
+    sentenceTokenAttrs.get(tokenNum).copyTo(this);
+    typeAtt.setType(tags[tokenNum++]);
+    return true;
+  }
+
+  private String[] nextSentence() throws IOException {
+    List<String> termList = new ArrayList<>();
+    sentenceTokenAttrs.clear();
+    boolean endOfSentence = false;
+    while ( ! endOfSentence && (moreTokensAvailable = input.incrementToken())) {
+      termList.add(termAtt.toString());
+      endOfSentence = 0 != (flagsAtt.getFlags() & OpenNLPTokenizer.EOS_FLAG_BIT);
+      sentenceTokenAttrs.add(input.cloneAttributes());
+    }
+    return termList.size() > 0 ? termList.toArray(new String[termList.size()]) : null;
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    moreTokensAvailable = true;
+  }
+
+  private void clear() {
+    sentenceTokenAttrs.clear();
+    tags = null;
+    tokenNum = 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilterFactory.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilterFactory.java
new file mode 100644
index 0000000..952218f
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilterFactory.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link OpenNLPPOSFilter}.
+ *
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_opennlp_pos" class="solr.TextField" positionIncrementGap="100"&gt;
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.OpenNLPTokenizerFactory" sentenceModel="filename" tokenizerModel="filename"/&gt;
+ *     &lt;filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="filename"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ * @since 7.3.0
+ */
+public class OpenNLPPOSFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+  public static final String POS_TAGGER_MODEL = "posTaggerModel";
+
+  private final String posTaggerModelFile;
+
+  public OpenNLPPOSFilterFactory(Map<String,String> args) {
+    super(args);
+    posTaggerModelFile = require(args, POS_TAGGER_MODEL);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public OpenNLPPOSFilter create(TokenStream in) {
+    try {
+      return new OpenNLPPOSFilter(in, OpenNLPOpsFactory.getPOSTagger(posTaggerModelFile));
+    } catch (IOException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+
+  @Override
+  public void inform(ResourceLoader loader) {
+    try { // load and register the read-only model in cache with file/resource name
+      OpenNLPOpsFactory.getPOSTaggerModel(posTaggerModelFile, loader);
+    } catch (IOException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPSentenceBreakIterator.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPSentenceBreakIterator.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPSentenceBreakIterator.java
new file mode 100644
index 0000000..f69fbc6
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPSentenceBreakIterator.java
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.text.BreakIterator;
+import java.text.CharacterIterator;
+
+import opennlp.tools.util.Span;
+import org.apache.lucene.analysis.opennlp.tools.NLPSentenceDetectorOp;
+import org.apache.lucene.analysis.util.CharArrayIterator;
+
+/**
+ * A {@link BreakIterator} that splits sentences using an OpenNLP sentence chunking model.
+ */
+public final class OpenNLPSentenceBreakIterator extends BreakIterator {
+
+  private CharacterIterator text;
+  private int currentSentence;
+  private int[] sentenceStarts;
+  private NLPSentenceDetectorOp sentenceOp;
+
+  public OpenNLPSentenceBreakIterator(NLPSentenceDetectorOp sentenceOp) {
+    this.sentenceOp = sentenceOp;
+  }
+
+  @Override
+  public int current() {
+    return text.getIndex();
+  }
+
+  @Override
+  public int first() {
+    currentSentence = 0;
+    text.setIndex(text.getBeginIndex());
+    return current();
+  }
+
+  @Override
+  public int last() {
+    if (sentenceStarts.length > 0) {
+      currentSentence = sentenceStarts.length - 1;
+      text.setIndex(text.getEndIndex());
+    } else { // there are no sentences; both the first and last positions are the begin index
+      currentSentence = 0;
+      text.setIndex(text.getBeginIndex());
+    }
+    return current();
+  }
+
+  @Override
+  public int next() {
+    if (text.getIndex() == text.getEndIndex() || 0 == sentenceStarts.length) {
+      return DONE;
+    } else if (currentSentence < sentenceStarts.length - 1) {
+      text.setIndex(sentenceStarts[++currentSentence]);
+      return current();
+    } else {
+      return last();
+    }
+  }
+
+  @Override
+  public int following(int pos) {
+    if (pos < text.getBeginIndex() || pos > text.getEndIndex()) {
+      throw new IllegalArgumentException("offset out of bounds");
+    } else if (0 == sentenceStarts.length) {
+      text.setIndex(text.getBeginIndex());
+      return DONE;
+    } else if (pos >= sentenceStarts[sentenceStarts.length - 1]) {
+      // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
+      // https://bugs.openjdk.java.net/browse/JDK-8015110
+      text.setIndex(text.getEndIndex());
+      currentSentence = sentenceStarts.length - 1;
+      return DONE;
+    } else { // there are at least two sentences
+      currentSentence = (sentenceStarts.length - 1) / 2; // start search from the middle
+      moveToSentenceAt(pos, 0, sentenceStarts.length - 2);
+      text.setIndex(sentenceStarts[++currentSentence]);
+      return current();
+    }
+  }
+
+  /** Binary search over sentences */
+  private void moveToSentenceAt(int pos, int minSentence, int maxSentence) {
+    if (minSentence != maxSentence) {
+      if (pos < sentenceStarts[currentSentence]) {
+        int newMaxSentence = currentSentence - 1;
+        currentSentence = minSentence + (currentSentence - minSentence) / 2;
+        moveToSentenceAt(pos, minSentence, newMaxSentence);
+      } else if (pos >= sentenceStarts[currentSentence + 1]) {
+        int newMinSentence = currentSentence + 1;
+        currentSentence = maxSentence - (maxSentence - currentSentence) / 2;
+        moveToSentenceAt(pos, newMinSentence, maxSentence);
+      }
+    } else {
+      assert currentSentence == minSentence;
+      assert pos >= sentenceStarts[currentSentence];
+      assert (currentSentence == sentenceStarts.length - 1 && pos <= text.getEndIndex())
+          || pos < sentenceStarts[currentSentence + 1];
+    }
+    // we have arrived - nothing to do
+  }
+
+  @Override
+  public int previous() {
+    if (text.getIndex() == text.getBeginIndex()) {
+      return DONE;
+    } else {
+      if (0 == sentenceStarts.length) {
+        text.setIndex(text.getBeginIndex());
+        return DONE;
+      }
+      if (text.getIndex() == text.getEndIndex()) {
+        text.setIndex(sentenceStarts[currentSentence]);
+      } else {
+        text.setIndex(sentenceStarts[--currentSentence]);
+      }
+      return current();
+    }
+  }
+
+  @Override
+  public int preceding(int pos) {
+    if (pos < text.getBeginIndex() || pos > text.getEndIndex()) {
+      throw new IllegalArgumentException("offset out of bounds");
+    } else if (0 == sentenceStarts.length) {
+      text.setIndex(text.getBeginIndex());
+      currentSentence = 0;
+      return DONE;
+    } else if (pos < sentenceStarts[0]) {
+      // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
+      // https://bugs.openjdk.java.net/browse/JDK-8015110
+      text.setIndex(text.getBeginIndex());
+      currentSentence = 0;
+      return DONE;
+    } else {
+      currentSentence = sentenceStarts.length / 2; // start search from the middle
+      moveToSentenceAt(pos, 0, sentenceStarts.length - 1);
+      if (0 == currentSentence) {
+        text.setIndex(text.getBeginIndex());
+        return DONE;
+      } else {
+        text.setIndex(sentenceStarts[--currentSentence]);
+        return current();
+      }
+    }
+  }
+
+  @Override
+  public int next(int n) {
+    currentSentence += n;
+    if (n < 0) {
+      if (text.getIndex() == text.getEndIndex()) {
+        ++currentSentence;
+      }
+      if (currentSentence < 0) {
+        currentSentence = 0;
+        text.setIndex(text.getBeginIndex());
+        return DONE;
+      } else {
+        text.setIndex(sentenceStarts[currentSentence]);
+      }
+    } else if (n > 0) {
+      if (currentSentence >= sentenceStarts.length) {
+        currentSentence = sentenceStarts.length - 1;
+        text.setIndex(text.getEndIndex());
+        return DONE;
+      } else {
+        text.setIndex(sentenceStarts[currentSentence]);
+      }
+    }
+    return current();
+  }
+
+  @Override
+  public CharacterIterator getText() {
+    return text;
+  }
+
+  @Override
+  public void setText(CharacterIterator newText) {
+    text = newText;
+    text.setIndex(text.getBeginIndex());
+    currentSentence = 0;
+    Span[] spans = sentenceOp.splitSentences(characterIteratorToString());
+    sentenceStarts = new int[spans.length];
+    for (int i = 0; i < spans.length; ++i) {
+      // Adjust start positions to match those of the passed-in CharacterIterator
+      sentenceStarts[i] = spans[i].getStart() + text.getBeginIndex();
+    }
+  }
+
+  private String characterIteratorToString() {
+    String fullText;
+    if (text instanceof CharArrayIterator) {
+      CharArrayIterator charArrayIterator = (CharArrayIterator)text;
+      fullText = new String(charArrayIterator.getText(), charArrayIterator.getStart(), charArrayIterator.getLength());
+    } else {
+      // TODO: is there a better way to extract full text from arbitrary CharacterIterators?
+      StringBuilder builder = new StringBuilder();
+      for (char ch = text.first(); ch != CharacterIterator.DONE; ch = text.next()) {
+        builder.append(ch);
+      }
+      fullText = builder.toString();
+      text.setIndex(text.getBeginIndex());
+    }
+    return fullText;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java
new file mode 100644
index 0000000..75a3b81
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+
+import opennlp.tools.util.Span;
+
+import org.apache.lucene.analysis.opennlp.tools.NLPSentenceDetectorOp;
+import org.apache.lucene.analysis.opennlp.tools.NLPTokenizerOp;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
+import org.apache.lucene.util.AttributeFactory;
+
+/**
+ * Run OpenNLP SentenceDetector and Tokenizer.
+ * The last token in each sentence is marked by setting the {@link #EOS_FLAG_BIT} in the FlagsAttribute;
+ * following filters can use this information to apply operations to tokens one sentence at a time.
+ */
+public final class OpenNLPTokenizer extends SegmentingTokenizerBase {
+  public static int EOS_FLAG_BIT = 1;
+
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
+  private Span[] termSpans = null;
+  private int termNum = 0;
+  private int sentenceStart = 0;
+
+  private NLPSentenceDetectorOp sentenceOp = null;
+  private NLPTokenizerOp tokenizerOp = null;
+
+  public OpenNLPTokenizer(AttributeFactory factory, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp) throws IOException {
+    super(factory, new OpenNLPSentenceBreakIterator(sentenceOp));
+    if (sentenceOp == null || tokenizerOp == null) {
+      throw new IllegalArgumentException("OpenNLPTokenizer: both a Sentence Detector and a Tokenizer are required");
+    }
+    this.sentenceOp = sentenceOp;
+    this.tokenizerOp = tokenizerOp;
+  }
+
+  @Override
+  public void close() throws IOException {
+    super.close();
+    termSpans = null;
+    termNum = sentenceStart = 0;
+  };
+
+  @Override
+  protected void setNextSentence(int sentenceStart, int sentenceEnd) {
+    this.sentenceStart = sentenceStart;
+    String sentenceText = new String(buffer, sentenceStart, sentenceEnd - sentenceStart);
+    termSpans = tokenizerOp.getTerms(sentenceText);
+    termNum = 0;
+  }
+
+  @Override
+  protected boolean incrementWord() {
+    if (termSpans == null || termNum == termSpans.length) {
+      return false;
+    }
+    clearAttributes();
+    Span term = termSpans[termNum];
+    termAtt.copyBuffer(buffer, sentenceStart + term.getStart(), term.length());
+    offsetAtt.setOffset(correctOffset(offset + sentenceStart + term.getStart()),
+                        correctOffset(offset + sentenceStart + term.getEnd()));
+    if (termNum == termSpans.length - 1) {
+      flagsAtt.setFlags(flagsAtt.getFlags() | EOS_FLAG_BIT); // mark the last token in the sentence with EOS_FLAG_BIT
+    }
+    ++termNum;
+    return true;
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    termSpans = null;
+    termNum = sentenceStart = 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizerFactory.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizerFactory.java
new file mode 100644
index 0000000..a60f23f
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizerFactory.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.opennlp.tools.NLPSentenceDetectorOp;
+import org.apache.lucene.analysis.opennlp.tools.NLPTokenizerOp;
+import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeFactory;
+
+/**
+ * Factory for {@link OpenNLPTokenizer}.
+ *
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_opennlp" class="solr.TextField" positionIncrementGap="100"
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.OpenNLPTokenizerFactory" sentenceModel="filename" tokenizerModel="filename"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ * @since 7.3.0
+ */
+public class OpenNLPTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {
+  public static final String SENTENCE_MODEL = "sentenceModel";
+  public static final String TOKENIZER_MODEL = "tokenizerModel";
+
+  private final String sentenceModelFile;
+  private final String tokenizerModelFile;
+
+  public OpenNLPTokenizerFactory(Map<String,String> args) {
+    super(args);
+    sentenceModelFile = require(args, SENTENCE_MODEL);
+    tokenizerModelFile = require(args, TOKENIZER_MODEL);
+    if ( ! args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public OpenNLPTokenizer create(AttributeFactory factory) {
+    try {
+      NLPSentenceDetectorOp sentenceOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+      NLPTokenizerOp tokenizerOp = OpenNLPOpsFactory.getTokenizer(tokenizerModelFile);
+      return new OpenNLPTokenizer(factory, sentenceOp, tokenizerOp);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  public void inform(ResourceLoader loader) throws IOException {
+    // register models in cache with file/resource names
+    if (sentenceModelFile != null) {
+      OpenNLPOpsFactory.getSentenceModel(sentenceModelFile, loader);
+    }
+    if (tokenizerModelFile != null) {
+      OpenNLPOpsFactory.getTokenizerModel(tokenizerModelFile, loader);
+    }
+  }
+}


[04/12] lucene-solr:master: LUCENE-2899: Add OpenNLP Analysis capabilities as a module

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/tools/test-model-data/chunks.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/chunks.txt b/lucene/analysis/opennlp/src/tools/test-model-data/chunks.txt
new file mode 100644
index 0000000..f02c5b7
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/chunks.txt
@@ -0,0 +1,3566 @@
+Iran NNP B-NP
+announced VBD B-VP
+tonight NN B-NP
+that IN B-PP
+its NNS B-NP
+major JJ B-NP
+offensive NN I-NP
+against IN B-PP
+Iraq NNP B-NP
+in IN B-PP
+the DT B-NP
+Gulf NNP I-NP
+war NN I-NP
+had VBD B-VP
+ended VBN I-VP
+after IN B-PP
+dealing VBG B-VP
+savage JJ B-NP
+blows NNS I-NP
+against IN B-PP
+the DT B-NP
+Baghdad NNP I-NP
+government NN I-NP
+. . O
+The DT B-NP
+Iranian JJ I-NP
+news NN I-NP
+agency NN I-NP
+IRNA NNP I-NP
+, , O
+in IN B-PP
+a DT B-NP
+report NN I-NP
+received VBN B-VP
+in IN B-PP
+London NNP B-NP
+, , O
+said VBD B-VP
+the DT B-NP
+operation NN I-NP
+code-named VBN B-VP
+Karbala-5 CD B-NP
+launched VBD B-VP
+into IN B-PP
+Iraq NNP B-NP
+on IN B-PP
+January NNP B-NP
+9 CD I-NP
+was VBD B-VP
+now RB B-ADVP
+over RP B-NP
+. . O
+It PRP B-NP
+quoted VBD B-VP
+a DT B-NP
+joint NN I-NP
+statewment NN I-NP
+by IN B-PP
+the DT B-NP
+Iranian JJ I-NP
+Army NNP I-NP
+and CC I-NP
+Revolutionary NNP I-NP
+Guards NNPS I-NP
+Corps NNP I-NP
+as IN B-PP
+saying VBG B-VP
+that IN B-SBAR
+their DT B-NP
+forces NNS I-NP
+had VBD B-VP
+" JJ B-NP
+dealt VBD B-VP
+one CD B-NP
+of IN B-PP
+the DT B-NP
+severest JJS I-NP
+blows NNS I-NP
+on IN B-PP
+the DT B-NP
+Iraqi JJ I-NP
+war NN I-NP
+machine NN I-NP
+in IN B-PP
+the DT B-NP
+history NN I-NP
+of IN B-PP
+the DT B-NP
+Iraq-imposed JJ I-NP
+war NN I-NP
+. . O
+" NN B-VP
+The DT B-NP
+statement NN I-NP
+by IN B-PP
+the DT B-NP
+Iranian JJ I-NP
+High NNP I-NP
+Command NNP I-NP
+appeared VBD B-VP
+to TO I-VP
+herald VB I-VP
+the DT B-NP
+close NN I-NP
+of IN B-PP
+an DT B-NP
+assault NN I-NP
+on IN B-PP
+the DT B-NP
+port JJ I-NP
+city NN I-NP
+of IN B-PP
+Basra NNP B-NP
+in IN B-PP
+southern JJ B-NP
+Iraq NNP I-NP
+. . O
+" NN B-VP
+The DT B-NP
+operation NN I-NP
+was VBD B-VP
+launched VBN I-VP
+at IN B-PP
+a DT B-NP
+time NN I-NP
+when WRB B-ADVP
+the DT B-NP
+Baghdad NNP I-NP
+government NN I-NP
+was VBD B-VP
+spreading VBG I-VP
+extensive JJ B-NP
+propaganda NN I-NP
+on IN B-PP
+the DT B-NP
+resistance NN I-NP
+power NN I-NP
+of IN B-PP
+its NNS B-NP
+army NN I-NP
+... NNS I-NP
+, , O
+" NNS B-NP
+said VBD B-VP
+the DT B-NP
+statement NN I-NP
+quoted VBN B-VP
+by IN B-PP
+IRNA NNP B-NP
+. . O
+It PRP B-NP
+claimed VBD B-VP
+massive JJ B-NP
+victories NNS I-NP
+in IN B-PP
+the DT B-NP
+seven-week NN I-NP
+offensive NN I-NP
+and CC O
+called VBN B-VP
+on IN B-PP
+supporters NNS B-NP
+of IN B-SBAR
+Baghdad NNP B-NP
+to TO B-VP
+" VB I-VP
+come VBN I-VP
+to TO B-PP
+their IN B-NP
+senses JJ I-NP
+" NNS I-NP
+and CC O
+discontinue VB B-VP
+support NN B-NP
+for IN B-PP
+what WP B-NP
+it PRP B-NP
+called VBD B-VP
+the DT B-NP
+tottering VBG I-NP
+regime NN I-NP
+in IN B-PP
+Iraq NNP B-NP
+. . I-NP
+Iran NNP I-NP
+said VBD B-VP
+its NNS B-NP
+forces NNS I-NP
+had VBD B-VP
+" CD B-NP
+liberated JJ I-NP
+" NN I-NP
+155 CD I-NP
+square JJ I-NP
+kilometers NNS I-NP
+of IN B-PP
+enemy-occupied JJ-occupied B-NP
+territory NN I-NP
+during IN B-PP
+the DT B-NP
+1987 CD I-NP
+offensive NN I-NP
+and CC O
+taken VBN B-VP
+over IN B-PP
+islands NNS B-NP
+, , O
+townships NNS B-NP
+, , O
+rivers NNS B-NP
+and CC O
+part NN B-NP
+of IN B-PP
+a DT B-NP
+road NN I-NP
+leading VBG B-VP
+into IN B-PP
+Basra NNP B-NP
+. . O
+The DT B-NP
+Iranian JJ I-NP
+forces NNS I-NP
+" NNS I-NP
+are VBP B-VP
+in IN B-PP
+full JJ B-NP
+control NN I-NP
+of IN B-PP
+these DT B-NP
+areas NNS I-NP
+, , O
+" NNS B-NP
+the DT B-NP
+statement NN I-NP
+said VBD B-VP
+. . O
+It PRP B-NP
+said VBD B-VP
+81 CD B-NP
+Iraqi JJ I-NP
+brigades NNS I-NP
+and CC I-NP
+battalions NNS I-NP
+were VBD B-VP
+totally RB I-VP
+destroyed VBN I-VP
+, , O
+along IN B-ADVP
+with IN B-PP
+700 CD B-NP
+tanks NNS I-NP
+and CC O
+1,500 CD B-NP
+other JJ I-NP
+vehicles NNS I-NP
+. . O
+The DT B-NP
+victory NN I-NP
+list NN I-NP
+also RB B-ADVP
+included VBD B-VP
+80 CD B-NP
+warplanes NNS I-NP
+downed VBD B-VP
+, , O
+250 CD B-NP
+anti- - I-NP
+aircraft NN I-NP
+guns NNS I-NP
+and CC O
+400 CD B-NP
+pieces NNS I-NP
+of IN B-PP
+military JJ B-NP
+hardware NN I-NP
+destroyed VBN B-VP
+and CC O
+the DT B-NP
+seizure NN I-NP
+of IN B-PP
+220 CD B-NP
+tanks NNS I-NP
+and CC O
+armoured JJ B-NP
+personnel NNS I-NP
+carriers NNS I-NP
+. . O
+U.S. NNP O
+bank NN I-NP
+discount NN I-NP
+window RB I-NP
+borrowings NNS I-NP
+less NNS I-NP
+extended VBN B-NP
+credits NN I-NP
+averaged VBD B-VP
+310 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+in IN B-PP
+the DT B-NP
+week NN I-NP
+to TO B-PP
+Wednesday NNP B-NP
+February NNP I-NP
+25 CD I-NP
+, , O
+the DT B-NP
+Federal JJ I-NP
+Reserve NNP I-NP
+said VBD B-VP
+. . O
+The DT B-NP
+Fed JJ I-NP
+said VBD B-VP
+that IN B-SBAR
+overall JJ B-NP
+borrowings NNS I-NP
+in IN B-PP
+the DT B-NP
+week NN I-NP
+fell MD B-VP
+131 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+to TO B-PP
+614 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+, , O
+with IN B-PP
+extended VBN B-NP
+credits NN I-NP
+up IN B-PP
+10 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+at IN B-PP
+304 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+. . O
+The DT B-NP
+week NN I-NP
+was VBD B-VP
+the DT B-NP
+second NN I-NP
+half NN I-NP
+of IN B-PP
+a DT B-NP
+two-week NN I-NP
+statement NN I-NP
+period. NNS I-NP
+Net VBD B-VP
+borrowings NNS B-NP
+in IN B-PP
+the DT B-NP
+prior NN I-NP
+week NN I-NP
+averaged RB B-NP
+451 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+. . O
+Commenting NNP O
+on IN B-PP
+the DT B-NP
+two-week NN I-NP
+statement NN I-NP
+period NNS I-NP
+ended VBD B-VP
+February NNP B-NP
+25 CD I-NP
+, , O
+the DT B-NP
+Fed NNP I-NP
+said VBD B-VP
+that NN B-SBAR
+banks NNS B-NP
+had VBD B-VP
+average JJ B-NP
+net NN I-NP
+free JJ I-NP
+reserves NN I-NP
+of IN B-PP
+644 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+a DT B-NP
+day NN I-NP
+, , O
+down IN B-PP
+from JJ B-NP
+1.34 NN I-NP
+billion NN I-NP
+two RB B-NP
+weeks NNS I-NP
+earlier IN B-ADVP
+. . O
+A RB B-ADJP
+Federal JJ I-ADJP
+Reserve . B-NP
+spokesman NN B-VP
+told VBN I-VP
+a DT B-NP
+press NN I-NP
+briefing VBG B-VP
+that IN B-SBAR
+there EX B-NP
+were VBD B-VP
+no RB B-NP
+large JJ I-NP
+single NN I-NP
+day NN I-NP
+net RB I-NP
+misses NNS I-NP
+in IN B-PP
+the DT B-NP
+Fed's default I-NP
+reserve NN I-NP
+projections NNS I-NP
+in IN B-PP
+the DT B-NP
+week NN I-NP
+to TO B-PP
+Wednesday NNP B-NP
+. . I-NP
+He NNP I-NP
+said VBD B-VP
+that NN B-NP
+natural JJ I-NP
+float NN I-NP
+had VBD B-VP
+been VBN I-VP
+" NN B-NP
+acting VBG B-VP
+a DT B-NP
+bit NN I-NP
+strangely RB B-VP
+" VBN I-VP
+for IN B-PP
+this DT B-NP
+time NN I-NP
+of IN B-PP
+year NN B-NP
+, , O
+noting VBG B-VP
+that IN B-SBAR
+there EX B-NP
+had VBD B-VP
+been VBN I-VP
+poor JJ B-NP
+weather NN I-NP
+during IN B-PP
+the DT B-NP
+latest JJ I-NP
+week NN I-NP
+. . O
+The DT B-NP
+spokesman NN I-NP
+said VBD B-VP
+that IN B-SBAR
+natural JJ B-NP
+float NN I-NP
+ranged VBN B-VP
+from IN B-PP
+under IN B-NP
+500 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+on IN B-PP
+Friday NNP B-NP
+, , O
+for IN B-PP
+which NNP B-NP
+he NN B-NP
+could VBN B-VP
+give JJ B-NP
+no RB I-NP
+reason NN I-NP
+, , O
+to TO B-PP
+nearly JJ B-NP
+one CD I-NP
+billion IN B-PP
+dlrs NN B-NP
+on IN B-PP
+both NN B-NP
+Thursday default B-NP
+and CC O
+Wednesday default B-NP
+. . O
+The DT B-NP
+Fed JJ I-NP
+spokeman NN I-NP
+could VBN B-VP
+give JJ B-NP
+no NN I-NP
+reason NN I-NP
+for IN B-PP
+Thursday's NNP B-NP
+high NN I-NP
+float NNS I-NP
+, , O
+but NNS B-NP
+he DT B-NP
+said VBD B-VP
+that IN B-PP
+about NN B-NP
+750 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+of IN B-PP
+Wednesday's NNP B-NP
+float NN I-NP
+figure NNS I-NP
+was VBD B-VP
+due VBD I-VP
+to TO I-VP
+holdover VB I-VP
+and CC O
+transportation NN B-VP
+float IN B-PRT
+at IN B-PP
+two NN B-NP
+widely WDT I-NP
+separated VBN B-VP
+Fed VBN B-NP
+districts NNS I-NP
+. . O
+For NNP O
+the DT B-NP
+week NN I-NP
+as IN B-PP
+a DT B-NP
+whole NN I-NP
+, , O
+he DT B-NP
+said VBD B-VP
+that IN B-SBAR
+float NN B-NP
+related VBN B-VP
+as IN B-PP
+of NNP B-NP
+adjustments NNS I-NP
+were VBD B-VP
+" RB B-ADJP
+small JJ I-ADJP
+, , O
+" IN B-PP
+adding VBG B-VP
+that IN B-SBAR
+they NN B-NP
+fell NN I-NP
+to TO B-PP
+a DT B-NP
+negative JJ I-NP
+750 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+on IN B-PP
+Tuesday NNP B-NP
+due NN I-NP
+to TO B-PP
+a DT B-NP
+number NN I-NP
+of IN B-PP
+corrections NN B-NP
+for IN B-PP
+unrelated VBN B-NP
+cash NN I-NP
+letter IN B-PP
+errors NNS B-NP
+in IN B-PP
+six JJ B-NP
+districts NNS I-NP
+around IN B-PP
+the DT B-NP
+country NN I-NP
+. . O
+The DT B-NP
+spokesman NN I-NP
+said VBD B-VP
+that NN B-NP
+on IN B-PP
+both JJ B-NP
+Tuesday NNP I-NP
+and CC I-NP
+Wednesday NNP B-NP
+, , O
+two IN B-PP
+different JJ B-NP
+clearing NN I-NP
+banks NNS I-NP
+had VBD B-VP
+system JJ B-NP
+problems NNS I-NP
+and CC O
+the DT B-NP
+securities NNS I-NP
+and CC I-NP
+Federal JJ I-NP
+funds NNS I-NP
+wires NNS I-NP
+had VBD B-VP
+to TO I-VP
+be VB I-VP
+held VBN I-VP
+open JJ B-NP
+until NNS I-NP
+about IN B-PP
+2000 CD B-NP
+or NNP I-NP
+2100 CD I-NP
+EST NNS I-NP
+on IN B-PP
+both JJ B-NP
+days NN I-NP
+. . O
+However NNP B-NP
+, , O
+he CD B-NP
+said VBD B-VP
+that IN B-SBAR
+both NNP B-NP
+problems NN I-NP
+were VBD B-VP
+cleared VBN I-VP
+up IN B-ADVP
+during VBG B-VP
+both IN B-PP
+afternoons NNS B-NP
+and CC O
+there DT B-NP
+was VBD B-VP
+no RB B-ADJP
+evidence JJ I-ADJP
+of IN B-PP
+any DT B-NP
+reserve JJ I-NP
+impact NN I-NP
+. . O
+During VBG B-VP
+the DT B-NP
+week NN I-NP
+ended VBN B-VP
+Wednesday NNP B-NP
+, , O
+45 CD B-NP
+pct NN I-NP
+of IN B-PP
+net JJ B-NP
+discount NN I-NP
+window NN I-NP
+borrowings NNS I-NP
+were VBD B-VP
+made JJ B-ADJP
+by IN B-PP
+the DT B-NP
+smallest NN I-NP
+banks NNS I-NP
+, , O
+with IN B-PP
+30 CD B-NP
+pct NN I-NP
+by IN B-PP
+the DT B-NP
+14 CD I-NP
+large RB I-NP
+money JJ I-NP
+center NN I-NP
+banks NNS I-NP
+and CC O
+25 CD B-NP
+pct NN I-NP
+by IN B-PP
+large JJ B-NP
+regional NN I-NP
+institutions NNS I-NP
+. . O
+On NNP B-NP
+Wednesday NNP I-NP
+, , O
+55 CD B-NP
+pct NN I-NP
+of IN B-PP
+the DT B-NP
+borrowing NN I-NP
+was VBD B-VP
+accounted VBN I-VP
+for IN B-PP
+by IN B-PP
+the DT B-NP
+money NN I-NP
+center NN I-NP
+banks NNS I-NP
+, , O
+with IN B-PP
+30 CD B-NP
+pct NN I-NP
+by IN B-PP
+the DT B-NP
+large JJ I-NP
+regionals NN I-NP
+and CC O
+15 CD B-NP
+pct NN I-NP
+by IN B-PP
+the DT B-NP
+smallest JJ I-NP
+banks NNS I-NP
+. . O
+The DT B-NP
+Fed JJ I-NP
+spokesman NN I-NP
+said VBD B-VP
+the DT B-NP
+banking NN I-NP
+system IN B-NP
+had VBD B-VP
+excess VBZ B-NP
+reserves NN I-NP
+on IN B-PP
+Thursday NNP B-NP
+, , O
+Monday NNP B-NP
+and CC I-NP
+Tuesday NNP I-NP
+and CC O
+a DT B-NP
+deficit NN I-NP
+on IN B-PP
+Friday NNP B-NP
+and CC O
+Wedndsday NNP B-NP
+. . I-NP
+That NNP I-NP
+produced VBD B-VP
+a DT B-NP
+small JJ I-NP
+daily NN I-NP
+average JJ I-NP
+deficit NN I-NP
+for IN B-PP
+the DT B-NP
+week NN I-NP
+as IN B-PP
+a DT B-NP
+whole NN I-NP
+. . B-VP
+For NNP B-PP
+the DT B-NP
+two-week NN I-NP
+period NNS I-NP
+, , O
+he NNS B-NP
+said VBD B-VP
+there EX B-NP
+were VBD B-VP
+relatively JJ B-NP
+high NN I-NP
+excess VBZ B-VP
+reserves NN B-NP
+on IN B-PP
+a DT B-NP
+daily JJ I-NP
+avearge NN I-NP
+, , O
+almost IN B-PP
+all DT B-NP
+of IN B-PP
+which CD B-NP
+were VBD B-VP
+at IN B-PP
+the DT B-NP
+smallest JJ I-NP
+banks NNS I-NP
+. . O
+Reuter IN B-PP
+&#3; CD B-NP
+American RB I-NP
+Express JJ I-NP
+Co NNP I-NP
+remained VBN I-NP
+silent NN I-NP
+on IN B-PP
+market NN B-NP
+rumors NN I-NP
+it PRP B-NP
+would VBD B-VP
+spinoff IN B-PP
+all DT B-NP
+or JJ I-NP
+part NN I-NP
+of IN B-PP
+its NNS B-NP
+Shearson NNP I-NP
+Lehman NNP I-NP
+Brothers NNS I-NP
+Inc NNP I-NP
+, , O
+but IN B-SBAR
+some DT B-NP
+analysts NNS I-NP
+said VBD B-VP
+the DT B-NP
+company NN I-NP
+may NN I-NP
+be VB B-VP
+considering NN B-NP
+such IN B-PP
+a DT B-NP
+move JJ I-NP
+because NN I-NP
+it PRP B-NP
+is VBZ B-VP
+unhappy NN B-NP
+with IN B-PP
+the DT B-NP
+market JJ I-NP
+value NN I-NP
+of IN B-PP
+its NNS B-NP
+stock NN I-NP
+. . B-ADVP
+American RB B-NP
+Express JJ I-NP
+stock NN I-NP
+got NN I-NP
+a DT B-NP
+lift NN I-NP
+from WRB B-ADVP
+the DT B-NP
+rumor NN I-NP
+, , O
+as IN B-SBAR
+the DT B-NP
+market NN I-NP
+calculated VBN B-VP
+a DT B-NP
+partially JJ I-NP
+public NN I-NP
+Shearson IN B-PP
+may NN B-NP
+command VBN B-VP
+a DT B-NP
+good JJ I-NP
+market NN I-NP
+value NN I-NP
+, , O
+thereby IN B-PP
+boosting VBG B-VP
+the DT B-NP
+total JJ I-NP
+value NN I-NP
+of IN B-PP
+American NNP B-NP
+Express default I-NP
+. . O
+The DT B-NP
+rumor NN I-NP
+also NN I-NP
+was VBD B-VP
+accompanied VBN I-VP
+by IN B-PP
+talk NN B-NP
+the DT B-NP
+financial JJ I-NP
+services NNS I-NP
+firm IN B-PP
+would JJ B-NP
+split NN I-NP
+its NNS I-NP
+stock IN B-PP
+and CC O
+boost JJ B-NP
+its NNS I-NP
+dividend VBD B-VP
+. . O
+American RB O
+Express VBZ B-VP
+closed VBN I-VP
+on IN B-PP
+the DT B-NP
+New JJ I-NP
+York NNP I-NP
+Stock NNP I-NP
+Exchange VBD B-VP
+at IN B-PP
+72-5/8 CD B-NP
+, , O
+up IN B-PP
+4-1/8 NN B-NP
+on IN B-PP
+heavy NN B-NP
+volume default I-NP
+. . B-ADVP
+American RB B-ADJP
+Express JJ I-ADJP
+would VBD B-VP
+not IN B-PP
+comment NN B-NP
+on IN B-PP
+the DT B-NP
+rumors NN I-NP
+or IN B-PP
+its NNS B-NP
+stock NN I-NP
+activity NN I-NP
+. . O
+Analysts NNS B-NP
+said VBD B-VP
+comments VBN I-VP
+by IN B-PP
+the DT B-NP
+company NN I-NP
+at IN B-PP
+an DT B-NP
+analysts' NN I-NP
+meeting VBG B-VP
+Tuesday default B-NP
+helped VBN I-NP
+fuel JJ B-NP
+the DT I-NP
+rumors NN I-NP
+as IN B-PP
+did NN B-NP
+an DT B-NP
+announcement JJ I-NP
+yesterday NN I-NP
+of IN B-PP
+management JJ B-NP
+changes NNS I-NP
+. . O
+At RB O
+the DT B-NP
+meeting VBG I-NP
+, , I-NP
+company NN I-NP
+officials IN B-NP
+said VBD B-VP
+American RB B-NP
+Express JJ I-NP
+stock NN I-NP
+is VBZ B-VP
+undervalued VBN I-VP
+and CC O
+does NNS B-VP
+not NN B-NP
+fully NN I-NP
+reflect NN B-VP
+the DT B-NP
+performance NN I-NP
+of IN B-PP
+Shearson NNP B-NP
+, , O
+according IN B-PP
+to TO B-PP
+analysts NNS B-NP
+. . O
+Yesterday NNP B-NP
+, , O
+Shearson NNP B-NP
+said VBD B-VP
+it PRP B-NP
+was VBD B-VP
+elevating VBG I-VP
+its NNS B-NP
+chief NNP I-NP
+operating VBG I-NP
+officer IN I-NP
+, , O
+Jeffery NNP B-NP
+Lane NNP I-NP
+, , O
+to TO B-PP
+the DT B-NP
+added JJ I-NP
+position NN I-NP
+of IN B-PP
+president NN B-NP
+, , O
+which IN B-NP
+had VBD B-VP
+been VBN I-VP
+vacant NN B-NP
+. . O
+It PRP B-NP
+also RB I-VP
+created VBN I-VP
+four IN B-PP
+new JJ B-NP
+positions NNS I-NP
+for IN B-PP
+chairmen NN B-NP
+of IN B-PP
+its NNS B-NP
+operating VBG I-NP
+divisions NNS I-NP
+. . O
+Analysts NNS B-NP
+speculated VBD B-VP
+a DT B-NP
+partial JJ I-NP
+spinoff NNP I-NP
+would VBD B-VP
+make NN B-NP
+most NN I-NP
+sense NNS I-NP
+, , O
+contrary JJ B-ADJP
+to TO B-PP
+one CD B-NP
+variation NN I-NP
+on IN B-PP
+market JJ B-NP
+rumors NN I-NP
+of IN B-PP
+a DT B-NP
+total JJ I-NP
+spinoff NNP I-NP
+. . O
+Some DT B-NP
+analysts NNS I-NP
+, , O
+however NNS B-NP
+, , O
+disagreed VBD B-VP
+that IN B-PP
+any JJ B-NP
+spinoff NN I-NP
+of IN B-PP
+Shearson NNP B-NP
+would VBD B-VP
+be VB I-VP
+good NN B-NP
+since IN B-SBAR
+it PRP B-NP
+is VBZ B-VP
+a DT B-NP
+strong VBG I-NP
+profit NN I-NP
+center NN I-NP
+for IN B-PP
+American NNP B-NP
+Express NNS I-NP
+, , O
+contributing VBG B-VP
+about IN B-NP
+20 CD I-NP
+pct NN I-NP
+of IN B-PP
+earnings NNS B-NP
+last JJ B-NP
+year NN I-NP
+. . O
+" NN B-NP
+I IN B-PP
+think NN B-NP
+it PRP B-NP
+is VBZ B-VP
+highly RB O
+unlikely JJ B-NP
+that NN I-NP
+American RB B-NP
+Express JJ I-NP
+is VBZ B-VP
+going VBG I-VP
+to TO B-PP
+sell JJ B-NP
+shearson NN I-NP
+, , O
+" IN B-NP
+said VBD B-VP
+Perrin CD B-NP
+Long VBG I-NP
+of IN B-PP
+Lipper NNP B-NP
+Analytical default I-NP
+. . O
+He JJ I-VP
+questioned VBD I-VP
+what IN B-NP
+would VBN B-VP
+be VB I-VP
+a DT B-NP
+better NN I-NP
+investment NN I-NP
+than NN I-NP
+" RB B-NP
+a DT I-NP
+very NN I-NP
+profitable NN I-NP
+securities NNS I-NP
+firm IN B-PP
+. . B-NP
+" NN I-NP
+Several JJ I-NP
+analysts NNS I-NP
+said VBD B-VP
+American RB B-ADJP
+Express JJ I-ADJP
+is VBZ B-VP
+not RB O
+in IN B-PP
+need JJ B-NP
+of IN B-PP
+cash NNP B-NP
+, , O
+which IN B-PP
+might NN B-NP
+be VB B-VP
+the DT B-NP
+only JJ I-NP
+reason NN I-NP
+to TO B-VP
+sell JJ I-VP
+a DT B-NP
+part NN I-NP
+of IN B-PP
+a DT B-NP
+strong NN I-NP
+asset IN B-PP
+. . B-NP
+But JJ I-NP
+others NNS I-NP
+believe VBP B-VP
+the DT B-NP
+company NN I-NP
+could VBN B-VP
+very JJ B-ADVP
+well RB B-ADVP
+of IN B-ADVP
+considered VBD B-VP
+the DT B-NP
+option NN I-NP
+of IN B-PP
+spinning VBG B-VP
+out JJ B-NP
+part NN I-NP
+of IN B-PP
+Shearson NNP B-NP
+, , O
+and CC O
+one JJ B-NP
+rumor NN I-NP
+suggests NNS I-NP
+selling VBG B-VP
+about IN B-NP
+20 CD I-NP
+pct NN I-NP
+of IN B-PP
+it PRP B-NP
+in IN B-PP
+the DT B-NP
+market NN I-NP
+. . O
+Larry JJ O
+Eckenfelder . O
+of IN B-PP
+Prudential-Bache DT B-NP
+Securities NNS I-NP
+said VBD B-VP
+he DT B-NP
+believes NN I-NP
+American RB B-VP
+Express VBN I-VP
+could VBN I-VP
+have VBP B-VP
+considered VBN I-VP
+a DT B-NP
+partial JJ I-NP
+spinoff NN I-NP
+in IN B-PP
+the DT B-NP
+past NN I-NP
+. . O
+" IN B-PP
+Shearson NNP B-NP
+being NN I-NP
+as IN B-PP
+profitable NN B-NP
+as IN B-SBAR
+it PRP B-NP
+is VBZ B-VP
+would VBD I-VP
+have VBP I-VP
+fetched VBN I-VP
+a DT B-NP
+big NN I-NP
+premium NN I-NP
+in IN B-PP
+the DT B-NP
+market NN I-NP
+place. NN I-NP
+Shearson's NNP I-NP
+book NN I-NP
+value NN I-NP
+is VBZ B-VP
+in IN B-PP
+the DT B-NP
+1.4 CD I-NP
+mln NN I-NP
+dlr IN B-PP
+range NN B-NP
+. . O
+Shearson NNP O
+in IN B-PP
+the DT B-NP
+market NN I-NP
+place NN I-NP
+would MD B-VP
+probably RB I-VP
+be VB I-VP
+worth RB B-ADVP
+three DT B-NP
+to TO I-NP
+3.5 CD I-NP
+bilion NN I-NP
+dlrs NN I-NP
+in IN B-PP
+terms NN B-NP
+of IN B-PP
+market JJ B-NP
+capitalization NN I-NP
+, , O
+" IN B-NP
+said VBD B-VP
+Eckenfelder CD B-NP
+. . O
+Some DT B-NP
+analysts NNS I-NP
+said VBD B-VP
+American RB B-NP
+Express JJ I-NP
+could VBN B-VP
+use IN B-PP
+capital JJ B-NP
+since NN I-NP
+it PRP B-NP
+plans VBD B-VP
+to TO I-VP
+expand NNS B-NP
+globally JJ B-ADJP
+. . O
+" NNS B-VP
+They NNP B-NP
+have VBP B-VP
+enormous NNS B-NP
+internal JJ B-NP
+growth NNS I-NP
+plans NNS I-NP
+that IN B-PP
+takes NNS B-NP
+capital JJ B-ADJP
+. . O
+You NNP B-NP
+want NN I-NP
+your NN I-NP
+stock RB B-ADVP
+to TO B-PP
+reflect JJ B-NP
+realistic NN I-NP
+valuations NNS I-NP
+to TO B-PP
+enhance JJ B-NP
+your NN I-NP
+ability NN I-NP
+to TO B-PP
+make JJ B-NP
+all DT I-NP
+kinds NN I-NP
+of IN B-PP
+endeavors NNS B-NP
+down IN B-PP
+the DT B-NP
+road NN I-NP
+, , O
+" IN B-NP
+said VBD B-VP
+E.F. CD B-NP
+Hutton NNP I-NP
+Group NNP I-NP
+analyst IN B-PP
+Michael default B-NP
+Lewis default I-NP
+. . B-NP
+" NN I-NP
+They've DT B-NP
+outlined VBD B-VP
+the DT B-NP
+fact NN I-NP
+that IN B-SBAR
+they're DT B-NP
+investing VBG I-NP
+heavily NN I-NP
+in IN B-PP
+the DT B-NP
+future NNS I-NP
+, , O
+which IN B-PP
+goes NNS B-NP
+heavily NN I-NP
+into IN B-PP
+the DT B-NP
+international JJ I-NP
+arena, NN I-NP
+" NN I-NP
+said VBD B-VP
+Lewis CD B-NP
+. . O
+" default B-VP
+. . I-VP
+..That . O
+does NNS B-VP
+not NN B-NP
+preclude NN I-NP
+acquisitions NNS I-NP
+and CC O
+divestitures NNS B-NP
+along IN B-PP
+the DT B-NP
+way NN I-NP
+, , O
+" IN B-PP
+he DT B-NP
+said VBD I-NP
+. . O
+Lewis VBZ O
+said VBD B-VP
+if CD B-NP
+American RB I-NP
+Express JJ I-NP
+reduced VBN I-NP
+its NNS I-NP
+exposure NN I-NP
+to TO B-PP
+the DT B-NP
+brokerage NN I-NP
+business NNS I-NP
+by IN B-PP
+selling VBG B-VP
+part NN B-NP
+of IN B-PP
+shearson NN B-NP
+, , O
+its NNS B-NP
+stock NN I-NP
+might NN I-NP
+better IN B-PP
+reflect NN B-NP
+other IN B-PP
+assets NNS B-NP
+, , O
+such NNS B-NP
+as IN B-PP
+the DT B-NP
+travel NN I-NP
+related VBN I-NP
+services NNS I-NP
+business NNS I-NP
+. . O
+" NN B-VP
+It PRP B-NP
+could VBD B-VP
+find CD B-NP
+its NNS I-NP
+true VBD B-VP
+water IN B-PP
+mark NN B-NP
+with IN B-PP
+a DT B-NP
+lesser JJ I-NP
+exposure NN I-NP
+to TO B-VP
+brokerage VB I-VP
+. . O
+The DT B-NP
+value NN I-NP
+of IN B-PP
+the DT B-NP
+other NN I-NP
+components NNP-named I-NP
+could VBN B-VP
+command VBN I-VP
+a DT B-NP
+higher NN I-NP
+multiple WRB B-ADVP
+because NN B-NP
+they NN I-NP
+constitute VBD B-VP
+a DT B-NP
+higher NN I-NP
+percentage NN I-NP
+of IN B-PP
+the DT B-NP
+total NN I-NP
+operating IN B-PP
+earnings NNS B-NP
+of IN B-PP
+the DT B-NP
+company NN I-NP
+, , O
+" IN B-PP
+he DT B-NP
+said VBD I-NP
+. . O
+Lewis VBZ O
+said VBD B-VP
+Shearson CD B-NP
+contributed VBN B-VP
+316 CD B-NP
+mln NN I-NP
+in IN B-PP
+after-tax JJ B-NP
+operating VBG I-NP
+earnings NNS I-NP
+, , O
+up NNS B-NP
+from IN B-PP
+about NN B-NP
+200 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+in IN B-PP
+1985 default B-NP
+. . O
+Reuter IN B-PP
+&#3; CD B-NP
+Coleco NNP I-NP
+Industries NNP I-NP
+Inc NNP I-NP
+said VBD B-VP
+it PRP B-NP
+expects NNS B-VP
+to TO B-NP
+return JJ I-VP
+to TO B-PP
+profitability NN B-NP
+in IN B-PP
+1987 default B-NP
+. . O
+Earlier NNP B-NP
+, , O
+Coleco NNP B-NP
+reported VBN B-VP
+a DT B-NP
+net JJ I-NP
+loss CD I-NP
+of IN B-PP
+111.2 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+for IN B-PP
+the DT B-NP
+year NN I-NP
+ended VBN B-VP
+December IN B-PP
+31 CD B-NP
+compared VBN B-VP
+to TO B-PP
+a DT B-NP
+profit NN I-NP
+of IN B-PP
+64.2 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+in IN B-PP
+the DT B-NP
+year NN I-NP
+earlier IN B-PP
+. . B-NP
+In IN B-PP
+a DT B-NP
+prepared JJ I-NP
+statement NN I-NP
+, , O
+the DT B-NP
+company NN I-NP
+said VBD B-VP
+the DT B-NP
+dramatic NN I-NP
+swing IN B-PP
+in IN B-PP
+operating VBG B-NP
+results NNS I-NP
+was VBD B-VP
+due JJ B-NP
+primarily NN I-NP
+to TO B-PP
+the DT B-NP
+steep NN I-NP
+decline NN I-NP
+in IN B-PP
+sales NNS B-NP
+of IN B-PP
+Cabbage JJ B-NP
+Patch NNP I-NP
+Kids NNP I-NP
+products NNS I-NP
+from IN B-PP
+600 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+to TO B-PP
+230 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+. . O
+Coleco NNP B-NP
+said VBD B-VP
+it PRP B-NP
+changed VBD B-VP
+from VBN I-VP
+a DT B-NP
+single JJ I-NP
+product NN I-NP
+company NN I-NP
+to TO B-PP
+a DT B-NP
+more JJ I-NP
+diversified CD I-NP
+organization NN I-NP
+through IN B-PP
+four JJ B-NP
+major NN I-NP
+acquisitions NNS I-NP
+last JJ B-NP
+year NN I-NP
+. . O
+Products NNS B-NP
+from IN B-PP
+the DT B-NP
+new NN I-NP
+acquisitions NNS I-NP
+and CC O
+other VB B-VP
+new RB B-NP
+product NN I-NP
+introductions NNS I-NP
+are VBP B-VP
+expected VBN I-VP
+to TO I-VP
+enable NNS B-NP
+it PRP B-NP
+to TO B-VP
+return JJ B-NP
+to TO B-PP
+profitability NN B-NP
+, , O
+it PRP B-NP
+said VBD B-VP
+. . O
+At RB O
+the DT B-NP
+annual JJ I-NP
+Toy NNP I-NP
+Fair NNP I-NP
+earlier IN B-PP
+this DT B-NP
+month JJ I-NP
+, , I-NP
+vice JJ I-NP
+president NN I-NP
+Morton NNP I-NP
+Handel NNP I-NP
+said VBD B-VP
+analysts' CD B-NP
+1987 NN I-NP
+projected VBN I-NP
+earnings NNS I-NP
+of IN B-PP
+90 CD B-NP
+cts NNS I-NP
+a DT B-NP
+share NN I-NP
+on IN B-PP
+sales NNS B-NP
+of IN B-PP
+600 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+are VBP B-VP
+reasonable NN B-NP
+. . O
+Venezuela NNP-5 B-NP
+is VBZ B-VP
+seeking VBG I-VP
+a DT B-NP
+'constructive JJ I-NP
+and CC I-NP
+flexible' NNS I-NP
+attitude IN B-PP
+from JJ B-NP
+its NNS I-NP
+creditor NN I-NP
+banks NNS I-NP
+in IN B-PP
+current JJ B-NP
+talks NNS I-NP
+to TO B-PP
+reschedule JJ B-NP
+21 CD I-NP
+billion NN I-NP
+dlrs NN I-NP
+in IN B-PP
+foreign NN B-NP
+debt VBN B-VP
+, , O
+finance JJ B-NP
+minister NN I-NP
+manuel JJ I-NP
+azpurua NN I-NP
+told VBN B-VP
+a DT B-NP
+press NN I-NP
+conference. NN I-NP
+He NNP I-NP
+declined VBD B-VP
+to TO B-PP
+comment NN B-NP
+on IN B-PP
+meetings NNS B-NP
+this DT B-NP
+week NN I-NP
+in IN B-PP
+new JJ B-NP
+york NN I-NP
+between VBN B-VP
+public IN B-PP
+finances NNS B-NP
+director IN B-PP
+jorge JJ B-NP
+marcano NN I-NP
+and CC O
+venezuela's VBN B-NP
+13-bank NN I-NP
+advisory NN I-NP
+committee NNP-named I-NP
+except NN I-NP
+to TO B-PP
+say NN B-NP
+, , O
+" IN B-PP
+they NN B-NP
+are VBP B-VP
+progressing NNS B-NP
+. . O
+" NNP B-NP
+Azpurua NNP I-NP
+said VBD B-VP
+venezuela NN B-NP
+has NNS I-NP
+shown IN B-PP
+solidarity JJ B-NP
+with IN B-PP
+brazil's NNS B-NP
+decision VBD B-VP
+to TO B-PP
+suspend CD B-NP
+payments NNS I-NP
+, , O
+but NNS B-NP
+each IN B-PP
+country NN B-NP
+must VBZ B-VP
+negotiate RB I-VP
+according VBG I-VP
+to TO B-PP
+its NNS B-NP
+own JJ I-NP
+interest NN I-NP
+. . O
+Asked VBD B-VP
+to TO B-PP
+comment NN B-NP
+on IN B-PP
+chile's NN B-NP
+agreement NN I-NP
+with IN B-PP
+its NNS B-NP
+creditors NN I-NP
+today NN I-NP
+, , O
+which IN B-PP
+includes NNS B-NP
+an DT B-NP
+interest JJ I-NP
+rate NN I-NP
+margin NN I-NP
+of IN B-PP
+one CD B-NP
+pct NN I-NP
+over IN B-PP
+libor JJ B-NP
+, , O
+azpurua NNP B-NP
+said VBD B-VP
+only NN B-NP
+, , O
+" IN B-SBAR
+that NN B-NP
+is VBZ B-VP
+good JJ B-NP
+news NNS I-NP
+. . O
+" NNS B-NP
+According VBG B-VP
+to TO B-PP
+banking VBG B-NP
+sources NNS I-NP
+, , O
+the DT B-NP
+banks' NN I-NP
+latest NN I-NP
+offer IN B-PP
+to TO B-PP
+venezuela CD B-NP
+is VBZ B-VP
+also RB B-ADVP
+a DT B-NP
+one JJ I-NP
+pct NN I-NP
+margin JJ I-NP
+as IN B-PP
+against NN B-NP
+the DT B-NP
+last JJ I-NP
+february's NN I-NP
+1-1/8 CD B-NP
+pct NN I-NP
+rescheduling VBG I-NP
+accord NNS I-NP
+and CC O
+the DT B-NP
+7/8 NN I-NP
+pct NN I-NP
+Venezuela NNP I-NP
+wants NNS I-NP
+. . O
+Azpurua NNP B-NP
+said VBD B-VP
+four NN B-NP
+basic NN I-NP
+elements NNS I-NP
+are VBP B-VP
+being VBN I-VP
+negotiated VBN I-VP
+with IN B-PP
+the DT B-NP
+banks NNS I-NP
+now: NN I-NP
+spread VBD B-VP
+reduction VBN I-VP
+, , O
+deferral JJ B-ADJP
+of IN B-PP
+principal JJ B-NP
+payments NNS I-NP
+due NNS I-NP
+in IN B-PP
+1987 CD B-NP
+and CC I-NP
+1988 CD I-NP
+, , O
+lenghtening VBG B-VP
+the DT B-NP
+12-1/2 CD I-NP
+year NN I-NP
+repayment NN I-NP
+schedule NN I-NP
+, , O
+and CC O
+debt VBN B-VP
+capitalization IN B-PP
+schemes NNS B-NP
+. . O
+Azpurua NNP B-NP
+said VBD B-VP
+the DT B-NP
+governent NN I-NP
+plans NN I-NP
+to TO B-PP
+pay NN B-NP
+2.1 CD I-NP
+billion NN I-NP
+dlrs NN I-NP
+in IN B-PP
+public NNP B-NP
+and CC O
+private JJ B-NP
+debt NN I-NP
+principal NN I-NP
+this DT B-NP
+year NN I-NP
+. . O
+It PRP B-NP
+was VBD B-VP
+due VBD I-VP
+to TO I-VP
+amortize VB I-VP
+1.05 CD B-NP
+billion NN I-NP
+dlrs NN I-NP
+under IN B-PP
+the DT B-NP
+rescheduling NN I-NP
+, , O
+and CC O
+pay NN B-NP
+420 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+in IN B-PP
+non-restructured JJ B-NP
+principal NN I-NP
+, , O
+both IN B-PP
+public JJ B-NP
+sector NN I-NP
+. . O
+He NNP B-NP
+said VBD B-VP
+venezuela's CD B-NP
+original JJ I-NP
+proposal NN I-NP
+was VBD B-VP
+to TO B-PP
+pay NN B-NP
+no RB I-NP
+principal JJ I-NP
+on IN B-PP
+restructured JJ B-NP
+debt NN I-NP
+this DT B-NP
+year NN I-NP
+, , O
+but IN B-NP
+is VBZ B-VP
+now RB I-VP
+insisting VBG I-VP
+that IN B-SBAR
+if NNP B-NP
+it PRP B-NP
+makes VBZ B-VP
+payments NNS B-NP
+they IN B-PP
+be VB B-NP
+compensated VBN B-VP
+by IN B-PP
+new JJ B-NP
+bank NN I-NP
+loans NNS I-NP
+. . O
+The DT B-NP
+banking VBG I-NP
+sources NNS I-NP
+said VBD B-VP
+the DT B-NP
+committee NN I-NP
+has NNS B-VP
+been VBN I-VP
+prepared VBN I-VP
+to TO I-VP
+lower VB I-VP
+amortizations VBN I-VP
+to TO B-PP
+around IN B-NP
+400 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+this IN B-PP
+year NN B-NP
+, , O
+but IN B-PP
+that NN B-NP
+no RB B-NP
+direct JJ I-NP
+commitment NN I-NP
+was VBD B-VP
+likely JJ B-ADJP
+on IN B-PP
+new JJ B-NP
+loans NNS I-NP
+. . O
+" CD B-NP
+debtors NNS I-NP
+and CC I-NP
+bank NNS I-NP
+creditors NN I-NP
+have VBP B-VP
+a DT B-NP
+joint JJ I-NP
+responsibility NN I-NP
+and CC O
+there DT B-NP
+will MD B-VP
+be VB I-VP
+no RB I-VP
+lasting VBG I-VP
+solution NN B-NP
+unless NNS I-NP
+a DT B-NP
+positive JJ I-NP
+flow NN I-NP
+of IN B-PP
+financing VBG B-NP
+is VBZ B-VP
+guaranteed NNS B-NP
+, , O
+" NNS B-NP
+azpurua DT B-NP
+said VBD B-VP
+. . O
+However IN B-ADVP
+, , O
+he NNS B-NP
+appeared VBD B-VP
+to TO I-VP
+discard VB I-VP
+earlier JJ B-NP
+venezuelan NN I-NP
+proposals NN I-NP
+for IN B-PP
+a DT B-NP
+direct NN I-NP
+link NN I-NP
+between VBN B-VP
+oil JJ B-NP
+income NN I-NP
+and CC O
+debt VBN B-NP
+payments NNS I-NP
+, , O
+"because NNS B-NP
+circumstances NNS I-NP
+change VBD B-VP
+too RB B-ADJP
+quickly JJ I-ADJP
+. . O
+" NNS B-VP
+At RB B-ADVP
+the DT B-NP
+same JJ I-NP
+time NN I-NP
+, , O
+he NN B-NP
+said VBD B-VP
+the DT B-NP
+government NN I-NP
+is VBZ B-VP
+presently RB I-VP
+studying VBG I-VP
+possible JJ B-NP
+mechanisms NNS I-NP
+for IN B-PP
+capitlizing VBG B-VP
+public NN B-NP
+and CC O
+private RB B-NP
+sector JJ I-NP
+foreign NNS I-NP
+debt VBD B-VP
+, , O
+based NNS B-NP
+on IN B-PP
+experience NN B-NP
+in IN B-PP
+other JJ B-NP
+countries NNS I-NP
+. . O
+The DT B-NP
+rules NN I-NP
+would MD B-VP
+be VB I-VP
+published VBN I-VP
+by IN B-PP
+the DT B-NP
+finance JJ I-NP
+ministry NN I-NP
+and CC O
+the DT B-NP
+central JJ I-NP
+bank NN I-NP
+. . O
+Thomson NNP B-NP
+McKinnon NNP I-NP
+Mortgage NNP I-NP
+Assets NNS I-NP
+Corp NNP I-NP
+, , O
+a DT B-NP
+unit NN I-NP
+of IN B-PP
+Thomson NNP B-NP
+McKinnon NNP I-NP
+Inc NNP I-NP
+, , O
+is VBZ B-VP
+offering IN B-NP
+100 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+of IN B-PP
+collateralized VBN B-NP
+mortgage NN I-NP
+obligations NNS I-NP
+in IN B-PP
+three DT B-NP
+tranches NNS I-NP
+that IN B-PP
+include JJ B-NP
+floating NN I-NP
+rate NN I-NP
+and CC O
+inverse RB B-VP
+floating VBG I-VP
+rate JJ B-NP
+CMOS NNP I-NP
+. . O
+The DT B-NP
+floating VBG I-NP
+rate NN I-NP
+class NN I-NP
+amounts NNS I-NP
+to TO B-PP
+60 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+. . O
+It PRP B-NP
+has VBZ B-VP
+an DT B-NP
+average JJ I-NP
+life NN I-NP
+of IN B-PP
+7.11 CD B-NP
+years NNS I-NP
+and CC O
+matures NNS B-NP
+2018. CD B-PP
+The DT B-NP
+CMOs JJ I-NP
+have NN I-NP
+an DT B-NP
+initial JJ I-NP
+coupon NN I-NP
+of IN B-PP
+7.0375 CD B-NP
+pct NN I-NP
+, , O
+which IN B-NP
+will MD B-VP
+be VB I-VP
+reset NN B-NP
+60 CD I-NP
+basis NNS I-NP
+points NNS I-NP
+above VBP B-VP
+LIBOR VBN I-VP
+, , O
+said VBD B-VP
+sole CD B-NP
+manager NN I-NP
+Thomson NNP I-NP
+McKinnon NNP I-NP
+. . O
+The DT B-NP
+inverse JJ I-NP
+floater NN I-NP
+totals IN B-PP
+4.8 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+. . O
+It PRP B-NP
+has VBZ B-VP
+an DT B-NP
+average JJ I-NP
+life NN I-NP
+of IN B-PP
+13.49 CD B-NP
+years NNS I-NP
+and CC O
+matures NNS B-NP
+2018. CD B-NP
+These NNP I-NP
+CMOs NNS I-NP
+were VBD B-VP
+given JJ I-VP
+an DT B-NP
+initial JJ I-NP
+coupon NN I-NP
+of IN B-PP
+11-1/2 CD B-NP
+pct NN I-NP
+and CC O
+priced VBN B-VP
+at IN B-PP
+104.40. CD B-NP
+Subsequent JJ I-NP
+rates NNS I-NP
+on IN B-PP
+the DT B-NP
+inverse NN I-NP
+floater IN B-PP
+will DT B-NP
+equal JJ I-NP
+11-1/2 CD I-NP
+pct NN I-NP
+minus VBZ B-VP
+the DT B-NP
+product NN I-NP
+of IN B-PP
+three DT B-NP
+times NNS I-NP
+(LIBOR NNP I-NP
+minus NNS I-NP
+6-1/2 CD B-NP
+pct) NNS I-NP
+. . O
+A RB O
+Thomson NNP B-NP
+officer IN I-PRT
+explained VBN B-VP
+that IN B-PP
+the DT B-NP
+coupon NN I-NP
+of IN B-PP
+the DT B-NP
+inverse NN I-NP
+floating IN B-PP
+rate JJ B-NP
+tranche NN I-NP
+would VBN B-VP
+increase IN B-PP
+if NNP B-NP
+LIBOR default I-NP
+declined VBD B-VP
+. . O
+" NN B-VP
+The DT B-NP
+yield JJ I-NP
+floats NN I-NP
+opposite NN I-NP
+of IN B-PP
+LIBOR NNP B-NP
+, , O
+" CD B-NP
+he NN I-NP
+said VBD B-VP
+. . O
+The DT B-NP
+fixed-rate JJ I-NP
+tranche NN I-NP
+totals NNS I-NP
+35.2 IN B-PP
+mln NN B-NP
+dlrs NN I-NP
+. . O
+It PRP B-NP
+has VBZ B-VP
+an DT B-NP
+average JJ I-NP
+life NN I-NP
+of IN B-PP
+3.5 CD B-NP
+years NNS I-NP
+and CC O
+matures NNS B-NP
+2016. CD B-PP
+The DT B-NP
+CMOs JJ I-NP
+were NN I-NP
+assigned VBN B-VP
+a DT B-NP
+7.65 NN I-NP
+pct NN I-NP
+coupon NN I-NP
+and CC O
+par RB B-VP
+pricing VBG I-VP
+. . B-PP
+The DT B-NP
+issue NN I-NP
+is VBZ B-VP
+rated VBN I-VP
+AAA RB B-ADVP
+by IN B-PP
+Standard NNP B-NP
+and CC I-NP
+Poor's NNP I-NP
+and CC O
+secured JJ B-ADVP
+by IN B-SBAR
+Federal JJ B-NP
+Home . I-NP
+Loan NNP I-NP
+Mortgage NNP I-NP
+Corp NNP I-NP
+, , O
+Freddie NNP B-NP
+Mac NNP I-NP
+, , O
+certificates NNS B-NP
+. . O
+OPEC NNP B-NP
+may NN I-NP
+be VB B-VP
+forced VBD I-VP
+to TO B-PP
+meet NN B-NP
+before NN I-NP
+a DT B-NP
+scheduled JJ I-NP
+June CD I-NP
+session NN I-NP
+to TO B-PP
+readdress JJ B-NP
+its NNS I-NP
+production NN I-NP
+cutting VBG I-NP
+agreement NN I-NP
+if IN B-SBAR
+the DT B-NP
+organization NN I-NP
+wants NNS B-VP
+to TO I-VP
+halt VB I-VP
+the DT B-NP
+current NN I-NP
+slide NN I-NP
+in IN B-PP
+oil JJ B-NP
+prices NNS I-NP
+, , O
+oil JJ B-NP
+industry NN I-NP
+analysts NNS I-NP
+said VBD B-VP
+. . O
+" NN B-VP
+The DT B-NP
+movement NN I-NP
+to TO B-VP
+higher VB I-VP
+oil JJ B-NP
+prices NNS I-NP
+was VBD B-VP
+never IN B-ADVP
+to TO B-VP
+be VB I-VP
+as IN B-PP
+easy NN B-NP
+as IN B-PP
+OPEC NNP B-NP
+thought IN B-PP
+. . B-NP
+They NNP I-NP
+may NN I-NP
+need VBD B-VP
+an DT B-NP
+emergency NN I-NP
+meeting VBG B-VP
+to TO B-PP
+sort NN B-NP
+out IN B-PP
+the DT B-NP
+problems NN I-NP
+, , O
+" IN B-NP
+said VBD B-VP
+Daniel CD B-NP
+Yergin NNP I-NP
+, , O
+director IN B-PP
+of IN B-PP
+Cambridge JJ B-NP
+Energy NNP I-NP
+Research NNP I-NP
+Associates NNS I-NP
+, , O
+CERA NNP B-NP
+. . O
+Analysts NNS B-NP
+and CC O
+oil JJ B-NP
+industry NN I-NP
+sources NNS I-NP
+said VBD B-VP
+the DT B-NP
+problem NN I-NP
+OPEC IN B-PP
+faces NNS B-NP
+is VBZ B-VP
+excess NNS B-NP
+oil JJ B-ADJP
+supply RB B-ADVP
+in IN B-PP
+world JJ B-NP
+oil NNS I-NP
+markets NNS I-NP
+. . O
+" NN B-NP
+OPEC's NNS I-NP
+problem NN I-NP
+is VBZ B-VP
+not RB O
+a DT B-NP
+price NN I-NP
+problem NN I-NP
+but NN B-VP
+a DT B-NP
+production NN I-NP
+issue NNS I-NP
+and CC O
+must JJ B-VP
+be VB I-VP
+addressed VBN I-VP
+in IN B-PP
+that DT B-NP
+way NN I-NP
+, , O
+" IN B-NP
+said VBD B-VP
+Paul default B-NP
+Mlotok NNP I-NP
+, , O
+oil JJ B-NP
+analyst NN I-NP
+with IN B-PP
+Salomon NNP B-NP
+Brothers NNS I-NP
+Inc NNP I-NP
+. . O
+He JJ O
+said VBD B-VP
+the DT B-NP
+market's NN I-NP
+earlier IN B-PP
+optimism JJ B-NP
+about NN I-NP
+OPEC NNS I-NP
+and CC O
+its NNS B-NP
+ability NN I-NP
+to TO B-PP
+keep CD B-NP
+production NN I-NP
+under IN B-PP
+control JJ B-NP
+have NN I-NP
+given NNS I-NP
+way NN B-ADJP
+to TO B-PP
+a DT B-NP
+pessimistic NN I-NP
+outlook NN I-NP
+that IN B-PP
+the DT B-NP
+organization NN I-NP
+must NN I-NP
+address NNS I-NP
+soon IN B-PP
+if NNP B-NP
+it PRP B-NP
+wishes VBD B-VP
+to TO B-PP
+regain JJ B-NP
+the DT I-NP
+initiative JJ I-NP
+in IN B-PP
+oil JJ B-NP
+prices NNS I-NP
+. . O
+But JJ B-NP
+some NN I-NP
+other IN B-PP
+analysts NNS B-NP
+were VBD B-VP
+uncertain RB B-ADJP
+that IN B-PP
+even VBN B-NP
+an DT B-NP
+emergency NN I-NP
+meeting VBG B-VP
+would JJ B-NP
+address NNS I-NP
+the DT B-NP
+problem NN I-NP
+of IN B-PP
+OPEC NNP B-NP
+production NN I-NP
+above IN B-PP
+the DT B-NP
+15.8 CD I-NP
+mln NN I-NP
+bpd NNS I-NP
+quota IN B-PP
+set NN B-NP
+last JJ B-NP
+December NNP I-NP
+. . O
+" IN B-SBAR
+OPEC NNP B-NP
+has NNS I-NP
+to TO B-PP
+learn JJ B-NP
+that NN I-NP
+in IN B-PP
+a DT B-NP
+buyers NNS I-NP
+market NN I-NP
+you IN B-PP
+cannot NN B-NP
+have VBP B-VP
+deemed VBN I-VP
+quotas NNS B-NP
+, , O
+fixed NNS B-NP
+prices NNS I-NP
+and CC O
+set NN B-NP
+differentials NNS I-NP
+, , O
+" NNS B-NP
+said VBD B-VP
+the DT B-NP
+regional JJ I-NP
+manager NN I-NP
+for IN B-PP
+one NN B-NP
+of IN B-PP
+the DT B-NP
+major NN I-NP
+oil NN I-NP
+companies NNS I-NP
+who IN B-PP
+spoke NN B-NP
+on IN B-PP
+condition NN B-NP
+that IN B-SBAR
+he DT B-NP
+not NN I-NP
+be VB B-VP
+named RB I-VP
+. . I-VP
+" NN I-VP
+The DT B-NP
+market NN I-NP
+is VBZ B-VP
+now RB I-VP
+trying VBG I-VP
+to TO I-VP
+teach IN B-PP
+them DT B-NP
+that NN I-NP
+lesson IN B-PP
+again NN B-NP
+, , O
+" IN B-PP
+he DT B-NP
+added VBD I-NP
+. . O
+David VBD B-VP
+T NNP B-NP
+. . I-NP
+Mizrahi NNP I-NP
+, , O
+editor CD B-NP
+of IN B-PP
+Mideast JJ B-NP
+reports NNS I-NP
+, , O
+expects NNS B-NP
+OPEC VBD B-VP
+to TO B-PP
+meet JJ B-NP
+before NN I-NP
+June JJ I-NP
+, , O
+although IN B-SBAR
+not JJ B-NP
+immediately NN I-NP
+. . O
+However NNP B-NP
+, , O
+he CD B-NP
+is VBZ B-VP
+not RB O
+optimistic JJ B-NP
+that NN I-NP
+OPEC default I-NP
+can NN I-NP
+address NNS I-NP
+its NNS B-NP
+principal JJ I-NP
+problems NNS I-NP
+. . O
+" IN O
+They NNP B-NP
+will MD B-VP
+not JJ B-NP
+meet NN I-NP
+now NN I-NP
+as IN B-PP
+they NN B-NP
+try NN I-NP
+to TO B-VP
+take VB I-VP
+advantage NN B-NP
+of IN B-PP
+the DT B-NP
+winter NN I-NP
+demand VBN B-VP
+to TO B-PP
+sell JJ B-NP
+their NN I-NP
+oil NNS I-NP
+, , O
+but NNS B-NP
+in IN B-PP
+late JJ B-NP
+March NNP I-NP
+and CC O
+April default B-NP
+when JJ I-NP
+demand VBN I-NP
+slackens NNS I-NP
+, , O
+" NNS B-NP
+Mizrahi NNP I-NP
+said VBD B-VP
+. . O
+But JJ B-NP
+Mizrahi NNP I-NP
+said VBD B-VP
+that NN B-SBAR
+OPEC default B-NP
+is VBZ B-VP
+unlikely RB B-ADJP
+to TO B-VP
+do VB I-VP
+anything VBG I-VP
+more JJ B-NP
+than NN I-NP
+reiterate NN I-NP
+its NNS I-NP
+agreement NN I-NP
+to TO B-PP
+keep CD B-NP
+output NN I-NP
+at IN B-PP
+15.8 CD B-NP
+mln NN I-NP
+bpd NNS I-NP
+. . O
+" NNP B-NP
+Analysts NNS I-NP
+said VBD B-VP
+that IN B-SBAR
+the DT B-NP
+next NN I-NP
+two NN I-NP
+months VBZ B-VP
+will RB I-VP
+be VB I-VP
+critical NN B-NP
+for IN B-PP
+OPEC's NNP B-NP
+ability NN I-NP
+to TO B-VP
+hold VB I-VP
+together NN B-NP
+prices NNS I-NP
+and CC O
+output JJ B-NP
+. . O
+" IN B-PP
+OPEC NNP B-NP
+must JJ I-NP
+hold VBD B-VP
+to TO B-PP
+its NNS B-NP
+pact NN I-NP
+for IN B-PP
+the DT B-NP
+next NN I-NP
+six IN B-PP
+to TO B-PP
+eight JJ B-NP
+weeks NNS I-NP
+since IN B-SBAR
+buyers NNS B-NP
+will MD B-VP
+come VBN I-VP
+back NN B-NP
+into IN B-PP
+the DT B-NP
+market NN I-NP
+then VBN B-VP
+, , O
+" NNP B-NP
+said VBD B-VP
+Dillard CD B-NP
+Spriggs NNS I-NP
+of IN B-PP
+Petroleum default B-NP
+Analysis RB B-VP
+Ltd VBN I-VP
+in IN B-PP
+New NNP B-NP
+York NNP I-NP
+. . O
+But JJ B-NP
+Bijan NNP I-NP
+Moussavar-Rahmani NNP I-NP
+of IN B-PP
+Harvard default B-NP
+University's NNS I-NP
+Energy NNP I-NP
+and CC I-NP
+Environment JJ I-NP
+Policy NNP I-NP
+Center NNP I-NP
+said VBD B-VP
+that NN B-SBAR
+the DT B-NP
+demand VBN B-VP
+for IN B-PP
+OPEC NNP B-NP
+oil NN I-NP
+has NNS I-NP
+been VBN B-VP
+rising VBG I-VP
+through IN B-PP
+the DT B-NP
+first JJ I-NP
+quarter NN I-NP
+and CC O
+this DT B-NP
+may NN I-NP
+have VBP B-VP
+prompted VBN I-VP
+excesses NNS B-NP
+in IN B-PP
+its NNS B-NP
+production NN I-NP
+. . O
+" CD B-NP
+Demand CD I-NP
+for IN B-PP
+their NN B-NP
+(OPEC) default I-NP
+oil JJ I-NP
+is VBZ B-VP
+clearly RB B-ADJP
+above JJ I-ADJP
+15.8 CD B-NP
+mln NN I-NP
+bpd NNS I-NP
+and CC O
+is VBZ B-VP
+probably RB B-NP
+closer NN I-NP
+to TO B-PP
+17 CD B-NP
+mln NN I-NP
+bpd NNS I-NP
+or IN B-PP
+higher NN B-NP
+now RB I-NP
+so JJ I-NP
+what NN I-NP
+we NNS I-NP
+are VBP B-VP
+seeing IN O
+characterized VBN B-VP
+as IN B-PP
+cheating VBG B-NP
+is VBZ B-VP
+OPEC NNP B-NP
+meeting VBG I-NP
+this DT B-NP
+demand VBD B-VP
+through IN B-PP
+current NN B-NP
+production NN I-NP
+, , O
+" IN B-PP
+he DT B-NP
+told JJ I-NP
+Reuters NNS I-NP
+in IN B-PP
+a DT B-NP
+telephone NN I-NP
+interview WRB B-ADVP
+. . O
+BankAmerica NNP B-NP
+Corp NNP I-NP
+is VBZ B-VP
+not RB O
+under IN B-PP
+pressure NN B-NP
+to TO B-PP
+act IN B-NP
+quickly JJ I-NP
+on IN B-PP
+its NNS B-NP
+proposed VBD B-VP
+equity JJ I-VP
+offering VBG I-VP
+and CC I-VP
+would VBN I-VP
+do IN B-PP
+well JJ B-NP
+to TO B-VP
+delay NN I-VP
+it PRP B-NP
+because NN B-NP
+of IN B-PP
+the DT B-NP
+stock's NN I-NP
+recent NN I-NP
+poor NNS I-NP
+performance NNS I-NP
+, , O
+banking NNS B-NP
+analysts NNS I-NP
+said VBD B-VP
+. . B-NP
+Some JJ I-NP
+analysts NNS I-NP
+said VBD B-VP
+they IN B-PP
+have NN B-NP
+recommended VBN B-VP
+BankAmerica NNP B-NP
+delay NN I-NP
+its NNS I-NP
+up VBD B-VP
+to TO B-PP
+one-billion-dlr CD B-NP
+equity NN I-NP
+offering VBG I-NP
+, , O
+which IN B-SBAR
+has NNS B-NP
+yet VBD B-VP
+to TO I-VP
+be VB I-VP
+approved VBN I-VP
+by IN B-PP
+the DT B-NP
+Securities NNS I-NP
+and CC I-NP
+Exchange JJ I-NP
+Commission NNP I-NP
+. . I-NP
+BankAmerica NNP I-NP
+stock NN I-NP
+fell NNS I-NP
+this IN B-PP
+week NN B-NP
+, , O
+along IN B-PP
+with IN B-PP
+other NN B-NP
+banking VBG I-NP
+issues NNS I-NP
+, , O
+on IN B-PP
+the DT B-NP
+news NN I-NP
+that IN B-PP
+Brazil JJ B-NP
+has NNS I-NP
+suspended VBD B-VP
+interest IN B-PP
+payments NNS B-NP
+on IN B-PP
+a DT B-NP
+large JJ I-NP
+portion NN I-NP
+of IN B-PP
+its NNS B-NP
+foreign IN B-PP
+debt NN B-NP
+. . O
+The DT B-NP
+stock NN I-NP
+traded VBN B-VP
+around IN B-PP
+12 CD B-NP
+, , O
+down IN B-PP
+1/8 NN B-NP
+, , O
+this IN B-PP
+afternoon NN B-NP
+, , O
+after IN B-PP
+falling VBG B-VP
+to TO B-PP
+11-1/2 CD B-NP
+earlier NN I-NP
+this IN B-PP
+week NN B-NP
+on IN B-PP
+the DT B-NP
+news NN I-NP
+. . O
+Banking NNP B-NP
+analysts NNS I-NP
+said VBD B-VP
+that IN B-SBAR
+with IN B-PP
+the DT B-NP
+immediate JJ I-NP
+threat NN I-NP
+of IN B-PP
+the DT B-NP
+First JJ I-NP
+Interstate NNP I-NP
+Bancorp NNP I-NP
+<I> NNP I-NP
+takeover IN B-PP
+bid NN B-NP
+gone NN I-NP
+, , O
+BankAmerica NNP B-NP
+is VBZ B-VP
+under IN B-PP
+no NN B-NP
+pressure NN I-NP
+to TO B-PP
+sell JJ B-NP
+the DT I-NP
+securities NN I-NP
+into IN B-PP
+a DT B-NP
+market NN I-NP
+that IN B-NP
+will MD B-VP
+be VB I-VP
+nervous RB B-ADJP
+on IN B-PP
+bank NN B-NP
+stocks NNS I-NP
+in IN B-PP
+the DT B-NP
+near JJ I-NP
+term NN I-NP
+. . O
+BankAmerica NNP O
+filed VBD B-VP
+the DT B-NP
+offer NN I-NP
+on IN B-PP
+January NNP B-NP
+26. CD I-NP
+It PRP B-NP
+was VBD B-VP
+seen JJ B-ADJP
+as IN B-PP
+one NN B-NP
+of IN B-PP
+the DT B-NP
+major NN I-NP
+factors NNS I-NP
+leading VBG B-VP
+the DT B-NP
+First JJ I-NP
+Interstate NNP I-NP
+withdrawing VBG B-VP
+its NNS B-NP
+takeover IN B-PP
+bid VBN B-NP
+on IN B-PP
+February NNP B-NP
+9. CD I-NP
+A RB I-NP
+BankAmerica NNP I-NP
+spokesman NN I-NP
+said VBD B-VP
+SEC CD B-NP
+approval JJ I-NP
+is VBZ B-VP
+taking IN B-PP
+longer JJ B-NP
+than NN I-NP
+expected VBN I-NP
+and CC I-NP
+market JJ I-NP
+conditions NN I-NP
+must JJ I-NP
+now RB B-ADVP
+be VB B-VP
+re-evaluated VBN I-VP
+. . O
+" IN B-PP
+The DT B-NP
+circumstances NNS I-NP
+at IN B-PP
+the DT B-NP
+time NN I-NP
+will MD B-VP
+determine NN I-VP
+what IN B-NP
+we JJ B-NP
+do NN I-NP
+, , O
+" IN B-NP
+said VBD B-VP
+Arthur RB B-ADJP
+Miller JJ I-ADJP
+, , O
+BankAmerica's NNP B-NP
+Vice JJ I-NP
+President NN I-NP
+for IN B-PP
+Financial JJ B-NP
+Communications NNP I-NP
+, , O
+when JJ B-NP
+asked VBD B-VP
+if NNP B-NP
+BankAmerica NNP I-NP
+would VBD B-VP
+proceed NN B-NP
+with IN B-PP
+the DT B-NP
+offer NN I-NP
+immediately NN I-NP
+after IN B-PP
+it PRP B-NP
+receives NNS B-VP
+SEC NNP B-NP
+approval JJ I-NP
+. . O
+" IN B-PP
+I'd NNP B-NP
+put NN B-VP
+it PRP B-NP
+off NNP B-NP
+as IN B-PP
+long NN B-NP
+as IN B-PP
+they NN B-NP
+conceivably NN I-NP
+could VBN B-VP
+, , O
+" NNP B-NP
+said VBD B-VP
+Lawrence CD B-NP
+Cohn NNP I-NP
+, , I-NP
+analyst JJ I-NP
+with IN B-PP
+Merrill default B-NP
+Lynch NNP I-NP
+, , I-NP
+Pierce NNP I-NP
+, , I-NP
+Fenner NNP I-NP
+and CC I-NP
+Smith NNP I-NP
+. . O
+Cohn NNP B-NP
+said VBD B-VP
+the DT B-NP
+longer NN I-NP
+BankAmerica NNP I-NP
+waits NNS I-NP
+, , O
+the DT B-NP
+longer JJR I-NP
+they NN I-NP
+have VBP B-VP
+to TO I-VP
+show WRB I-VP
+the DT B-NP
+market NN I-NP
+an DT B-NP
+improved VBD B-VP
+financial JJ B-NP
+outlook NN I-NP
+. . O
+Although RB O
+BankAmerica NNP B-NP
+has NNS I-NP
+yet VBD B-VP
+to TO I-VP
+specify VB I-VP
+the DT B-NP
+types NNS I-NP
+of IN B-PP
+equities NNS B-NP
+it PRP B-NP
+would VBD B-VP
+offer IN B-ADVP
+, , O
+most JJ B-NP
+analysts NN I-NP
+believed VBN B-VP
+a DT B-NP
+convertible NN I-NP
+preferred VBN I-NP
+stock NN I-NP
+would VBD B-VP
+encompass VBN I-VP
+at IN B-PP
+least JJ B-NP
+part NN I-NP
+of IN B-PP
+it PRP B-NP
+. . O
+Such NNP O
+an DT B-NP
+offering VBG I-NP
+at IN B-PP
+a DT B-NP
+depressed JJ I-NP
+stock NN I-NP
+price NN I-NP
+would VBN B-VP
+mean VBN I-VP
+a DT B-NP
+lower JJ I-NP
+conversion NN I-NP
+price NN I-NP
+and CC O
+more RB B-VP
+dilution VBN I-VP
+to TO B-PP
+BankAmerica NNP B-NP
+stock NN I-NP
+holders NNS I-NP
+, , O
+noted JJ B-NP
+Daniel . I-NP
+Williams NNS I-NP
+, , O
+analyst JJ B-ADJP
+with IN B-PP
+Sutro NNP B-NP
+Group default I-NP
+. . O
+Several JJ B-NP
+analysts NNS I-NP
+said VBD B-VP
+that IN B-SBAR
+while NN B-NP
+they NN I-NP
+believe VB B-VP
+the DT B-NP
+Brazilian JJ I-NP
+debt NN I-NP
+problem NN I-NP
+will RB B-VP
+continue VBD I-VP
+to TO I-VP
+hang NNS B-NP
+over IN B-PP
+the DT B-NP
+banking VBG I-NP
+industry NN I-NP
+through IN B-PP
+the DT B-NP
+quarter NN I-NP
+, , O
+the DT B-NP
+initial JJ I-NP
+shock NN I-NP
+reaction NN I-NP
+is VBZ B-VP
+likely RB B-ADVP
+to TO B-PP
+ease NNS B-NP
+over IN B-PP
+the DT B-NP
+coming VBG I-NP
+weeks NNS I-NP
+. . O
+Nevertheless NNP B-NP
+, , O
+BankAmerica, NNP B-NP
+which IN B-PP
+holds NN B-NP
+about IN B-PP
+2.70 CD B-NP
+billion NN I-NP
+dlrs NN I-NP
+in IN B-PP
+Brazilian JJ B-NP
+loans NNS I-NP
+, , O
+stands NNS B-NP
+to TO B-PP
+lose JJ B-NP
+15-20 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+if IN B-PP
+the DT B-NP
+interest NN I-NP
+rate NN I-NP
+is VBZ B-VP
+reduced VBN I-VP
+on IN B-PP
+the DT B-NP
+debt NN I-NP
+, , O
+and CC O
+as IN B-PP
+much NN B-NP
+as IN B-PP
+200 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+if NNP I-NP
+Brazil NNP I-NP
+pays NNS B-VP
+no RB B-NP
+interest NN I-NP
+for IN B-PP
+a DT B-NP
+year NN I-NP
+, , O
+said VBD B-VP
+Joseph NNP B-NP
+Arsenio RB I-NP
+, , O
+analyst JJ B-ADJP
+with IN B-PP
+Birr NNP B-NP
+, , I-NP
+Wilson NNP I-NP
+and CC I-NP
+Co JJ I-NP
+. . O
+He DT B-NP
+noted VBN B-VP
+, , O
+however IN B-ADVP
+, , O
+that IN B-SBAR
+any NN B-NP
+potential JJ I-NP
+losses NNS I-NP
+would VBD B-VP
+not RB B-NP
+show JJ I-NP
+up NNS I-NP
+in IN B-PP
+the DT B-NP
+current NN I-NP
+quarter IN B-PP
+. . O
+The DT B-NP
+Federal JJ I-NP
+Deposit NN I-NP
+Insurance IN B-PP
+Corp NNP B-NP
+(FDIC) NNP I-NP
+said VBD B-VP
+three NN B-NP
+troubled VBD I-NP
+banks NNS I-NP
+in IN B-PP
+Texas NNP B-NP
+and CC I-NP
+Louisiana NNP I-NP
+were VBD B-VP
+merged VBN I-VP
+with IN B-PP
+healthy NN B-NP
+financial JJ I-NP
+institutions NNS I-NP
+. . O
+The DT B-NP
+FDIC NNP I-NP
+said VBD B-VP
+it PRP B-NP
+subsidized VBD B-VP
+the DT B-NP
+merger NN I-NP
+of IN B-PP
+Central JJ B-NP
+Bank NNP I-NP
+and CC I-NP
+Trust JJ I-NP
+Co NNP I-NP
+, , I-NP
+Glenmora NNP I-NP
+, , I-NP
+La. NNP I-NP
+, , I-NP
+with IN B-PP
+the DT B-NP
+healthy NN I-NP
+Peoples NNS I-NP
+Bank NNP I-NP
+and CC I-NP
+Trust JJ I-NP
+Co NNP I-NP
+, , O
+Natchitoches NNS B-NP
+, , O
+La. NNP B-NP
+, , O
+after IN B-PP
+state NN B-NP
+regulators VBN B-VP
+notified VBN I-VP
+it PRP B-NP
+that IN B-PP
+Central JJ B-NP
+was NNS I-NP
+in IN B-PP
+danger NN B-NP
+of IN B-PP
+failing VBG B-NP
+. . O
+Central JJ O
+had VBD B-VP
+assets NNS B-NP
+of IN B-PP
+28.3 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+. . O
+The DT B-NP
+FDIC JJ I-NP
+said VBD B-VP
+the DT B-NP
+deposits NN I-NP
+of IN B-PP
+the DT B-NP
+failed NN I-NP
+Farmers NNS I-NP
+State VBD B-VP
+Bank NNP B-NP
+, , O
+Hart NNP B-NP
+, , O
+Tex NNP B-NP
+. . O
+, , O
+were VBD B-VP
+assumed VBN I-VP
+by IN B-PP
+Hale NNP B-NP
+County NNP I-NP
+State NNP I-NP
+Bank NNP I-NP
+, , O
+Plainview NNP B-NP
+, , O
+Tex NNP B-NP
+. . O
+Farmers NNS B-NP
+, , O
+with IN B-PP
+9.6 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+in IN B-PP
+assets NN B-NP
+, , O
+was VBD B-VP
+closed VBN I-VP
+by IN B-PP
+Texas NNP B-NP
+bank NN I-NP
+regulators NNS I-NP
+. . O
+The DT B-NP
+deposits NN I-NP
+of IN B-PP
+the DT B-NP
+failed NN I-NP
+First IN B-PP
+National JJ B-NP
+Bank NNP I-NP
+of IN B-PP
+Crosby NNP B-NP
+, , O
+Crosby NNP B-NP
+, , O
+Tex NNP B-NP
+. . O
+, , O
+with IN B-PP
+total JJ B-NP
+assets NNS I-NP
+of IN B-PP
+8.2 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+, , O
+were VBD B-VP
+assumed VBN I-VP
+by IN B-PP
+Central JJ B-NP
+Bancshares NNS I-NP
+of IN B-PP
+the DT B-NP
+South NNP I-NP
+Inc NNP I-NP
+, , O
+Birmingham NNP B-NP
+, , O
+Ala. NNP B-NP
+, , O
+after IN B-PP
+First JJ B-NP
+National JJ I-NP
+was VBD I-NP
+closed VBN B-VP
+by IN B-PP
+federal JJ B-NP
+bank NN I-NP
+regulators NNS I-NP
+, , O
+the DT B-NP
+FDIC NNP I-NP
+said VBD B-VP
+. . O
+Brazil's JJ O
+14-bank NN B-NP
+advisory NN I-NP
+committee NNP-named I-NP
+expressed VBN B-VP
+" IN B-PP
+grave JJ B-NP
+concern NN I-NP
+" NN I-NP
+to TO B-PP
+chief NNP B-NP
+debt VBD B-VP
+negotiator NN B-NP
+Antonio RB B-ADVP
+Padua NNP-5 B-NP
+de VBD B-VP
+Seixas NNS B-NP
+over IN B-PP
+the DT B-NP
+country's NN I-NP
+suspension NN I-NP
+of IN B-PP
+interest JJ B-NP
+payments NNS I-NP
+, , O
+according IN B-PP
+to TO B-PP
+a DT B-NP
+telex NN I-NP
+from IN B-PP
+committee NN B-NP
+chairman NN I-NP
+Citibank NNP I-NP
+to TO B-PP
+creditor NN B-NP
+banks NNS I-NP
+worldwide VBD B-VP
+. . B-NP
+Bankers NNS I-NP
+said VBD B-VP
+the DT B-NP
+diplomatic NN I-NP
+phrase NN I-NP
+belied VBN B-VP
+the DT B-NP
+deep NN I-NP
+anger IN B-PP
+and CC O
+frustration VBN B-VP
+on IN B-PP
+the DT B-NP
+committee NN I-NP
+over IN B-PP
+Brazil's NNP B-NP
+unilateral JJ I-NP
+move NN I-NP
+last JJ B-NP
+Friday NNP I-NP
+and CC O
+its NNS B-NP
+subsequent JJ I-NP
+freeze NNS I-NP
+on IN B-PP
+some DT B-NP
+15 CD I-NP
+billion NN I-NP
+dlrs NN I-NP
+of IN B-PP
+short-term NN B-NP
+trade NN I-NP
+and CC O
+interbank RB B-NP
+lines NNS I-NP
+. . O
+Seixas NNS B-NP
+, , O
+director NNS B-NP
+of IN B-PP
+the DT B-NP
+Brazilian JJ I-NP
+central NN I-NP
+bank's NNS I-NP
+foreign IN B-PP
+debt NN B-NP
+department NN I-NP
+, , O
+met IN B-PP
+the DT B-NP
+full JJ I-NP
+panel NN I-NP
+on IN B-PP
+Tuesday NNP B-NP
+and CC I-NP
+Wednesday NNP B-NP
+. . O
+Seixas NNS B-NP
+, , O
+who NNS B-NP
+met NN I-NP
+again IN B-PP
+this DT B-NP
+morning VBG I-NP
+with IN B-PP
+senior NNP B-NP
+Citibank NNP I-NP
+executive JJ I-NP
+William . I-NP
+Rhodes NNS I-NP
+and CC I-NP
+representatives NNS I-NP
+from IN B-PP
+committee NN B-NP
+vice-chairmen VBN I-NP
+Morgan NNP I-NP
+Guaranty NNP I-NP
+Trust JJ I-NP
+Co NNP I-NP
+and CC I-NP
+Lloyds NNP I-NP
+Bank NNP I-NP
+Plc NNP I-NP
+, , O
+told JJ B-NP
+the DT I-NP
+banks NNS I-NP
+that IN B-PP
+the DT B-NP
+government NN I-NP
+was VBD B-VP
+preparing VBG I-VP
+a DT B-NP
+telex NN I-NP
+to TO B-PP
+explain NNS B-NP
+and CC O
+clarify VB B-VP
+the DT B-NP
+freeze NN I-NP
+on IN B-PP
+short-term JJ B-NP
+credits NN I-NP
+. . O
+The DT B-NP
+telex NN I-NP
+could VBN B-VP
+be VB I-VP
+sent NN B-NP
+to TO B-PP
+creditors NN B-NP
+as IN B-PP
+early NN B-NP
+as IN B-PP
+today NN B-NP
+, , O
+bankers NNS B-NP
+said VBD B-VP
+. . O
+Despite JJ O
+the DT B-NP
+rising VBG I-NP
+tempers NNS I-NP
+, , O
+bankers NNS B-NP
+said VBD B-VP
+there EX B-NP
+are VBP B-VP
+no RB I-VP
+plans VBN I-VP
+for IN B-PP
+Brazilian NNP B-NP
+finance NN I-NP
+minister NN I-NP
+Dilson NNP I-NP
+Funaro NNP I-NP
+to TO B-PP
+meet JJ B-NP
+commercial NN I-NP
+bankers NNS I-NP
+during IN B-PP
+his NNS B-NP
+trip VBD B-VP
+to TO B-PP
+Washington NNP B-NP
+on IN B-PP
+Friday NNP B-NP
+and CC O
+Saturday NNP B-NP
+. . I-NP
+Funaro NNP I-NP
+will MD B-VP
+be VB I-VP
+explaining VBG I-VP
+Brazil's NNS B-NP
+actions VBD B-VP
+to TO B-PP
+U.S. NNP B-NP
+Treasury NNP I-NP
+Secretary NNP I-NP
+James NNP I-NP
+Baker NNP I-NP
+, , O
+Federal JJ B-NP
+Reserve . I-NP
+Board NNP I-NP
+chairman NN I-NP
+Paul default I-NP
+Volcker NNP I-NP
+and CC I-NP
+International JJ I-NP
+Monetary NNP I-NP
+Fund NNP I-NP
+managing VBG B-VP
+director IN B-PP
+Michel default B-NP
+Camdessus NNP I-NP
+before NN I-NP
+travelling IN B-NP
+to TO B-VP
+Europe VB I-VP
+at IN B-PP
+the DT B-NP
+weekend JJ I-NP
+. . O
+Sentence NN B-NP
+number NN I-NP
+1 CD I-NP
+has VBZ B-VP
+6 CD I-NP
+words NNS I-NP
+. . O
+Sentence NN B-NP
+number NN I-NP
+2 CD I-NP
+, , O
+5 CD B-NP
+words NNS I-NP
+. . O

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/tools/test-model-data/lemmas.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/lemmas.txt b/lucene/analysis/opennlp/src/tools/test-model-data/lemmas.txt
new file mode 100644
index 0000000..be02176
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/lemmas.txt
@@ -0,0 +1,875 @@
+Showers	NNS	shower
+continued	VBD	continue
+throughout	IN	throughout
+the	DT	the
+week	NN	week
+in	IN	in
+the	DT	the
+Bahia	NNP	bahia
+cocoa	NN	cocoa
+zone	NN	zone
+,	,	,
+alleviating	VBG	alleviate
+the	DT	the
+drought	NN	drought
+since	IN	since
+early	JJ	early
+January	NNP	january
+and	CC	and
+improving	VBG	improve
+prospects	NNS	prospect
+for	IN	for
+the	DT	the
+coming	VBG	come
+temporao	NN	temporao
+,	,	,
+although	IN	although
+normal	JJ	normal
+humidity	NN	humidity
+levels	NNS	level
+have	VBP	have
+not	RB	not
+been	VBN	be
+restored	VBN	restore
+,	,	,
+Comissaria	NNP	comissaria
+Smith	NNP	smith
+said	VBD	say
+in	IN	in
+its	PRP$	its
+weekly	JJ	weekly
+review	NN	review
+.	.	.
+
+The	DT	the
+dry	JJ	dry
+period	NN	period
+means	VBZ	mean
+the	DT	the
+temporao	NN	temporao
+will	MD	will
+be	VB	be
+late	RB	late
+this	DT	this
+year	NN	year
+.	.	.
+
+Arrivals	NNS	arrival
+for	IN	for
+the	DT	the
+week	NN	week
+ended	VBN	end
+February	NNP	february
+22	CD	22
+were	VBD	be
+155	CD	155
+bags	NNS	bag
+of	IN	of
+60	CD	60
+kilos	NN	kilo
+making	VBG	make
+a	DT	a
+cumulative	JJ	cumulative
+total	NN	total
+for	IN	for
+the	DT	the
+season	NN	season
+of	IN	of
+5	CD	5
+mln	NN	mln
+against	IN	against
+5	CD	5
+at	IN	at
+the	DT	the
+same	JJ	same
+stage	NN	stage
+last	JJ	last
+year	NN	year
+.	.	.
+
+Again	RB	again
+it	PRP	it
+seems	VBZ	seem
+that	IN	that
+cocoa	NN	cocoa
+delivered	VBN	deliver
+earlier	RBR	early
+on	IN	on
+consignment	NN	consignment
+was	VBD	be
+included	VBN	include
+in	IN	in
+the	DT	the
+arrivals	NNS	arrival
+figures	NNS	figure
+.	.	.
+
+Comissaria	NNP	comissaria
+Smith	NNP	smith
+said	VBD	say
+there	EX	there
+is	VBZ	be
+still	RB	still
+some	DT	some
+doubt	NN	doubt
+as	IN	as
+to	TO	to
+how	WRB	how
+much	JJ	much
+old	JJ	old
+crop	NN	crop
+cocoa	NN	cocoa
+is	VBZ	be
+still	RB	still
+available	JJ	available
+as	IN	as
+harvesting	NN	harvesting
+has	VBZ	has
+practically	RB	practically
+come	VBN	come
+to	TO	to
+an	DT	an
+end	NN	end
+.	.	.
+
+With	IN	with
+total	JJ	total
+Bahia	NNP	bahia
+crop	NN	crop
+estimates	NNS	estimate
+around	IN	around
+6	CD	6
+mln	NN	mln
+bags	NNS	bag
+and	CC	and
+sales	NNS	sale
+standing	VBG	stand
+at	IN	at
+almost	RB	almost
+6	CD	6
+mln	NN	mln
+there	EX	there
+are	VBP	are
+a	DT	a
+few	JJ	few
+hundred	CD	hundred
+thousand	CD	thousand
+bags	NNS	bag
+still	RB	still
+in	IN	in
+the	DT	the
+hands	NNS	hand
+of	IN	of
+farmers	NNS	farmer
+,	,	,
+middlemen	NNS	middleman
+,	,	,
+exporters	NNS	exporter
+and	CC	and
+processors	NNS	processor
+.	.	.
+
+There	EX	there
+are	VBP	are
+doubts	NNS	doubt
+as	IN	as
+to	TO	to
+how	WRB	how
+much	RB	much
+of	IN	of
+this	DT	this
+cocoa	NN	cocoa
+would	MD	would
+be	VB	be
+fit	NN	fit
+for	IN	for
+export	NN	export
+as	IN	as
+shippers	NNS	shipper
+are	VBP	are
+now	RB	now
+experiencing	VBG	experience
+dificulties	NNS	dificulty
+in	IN	in
+obtaining	VBG	obtain
++	+	+
+Bahia	NNP	bahia
+superior	JJ	superior
++	+	+
+certificates	NNS	certificate
+.	.	.
+
+In	IN	in
+view	NN	view
+of	IN	of
+the	DT	the
+lower	JJR	low
+quality	NN	quality
+over	IN	over
+recent	JJ	recent
+weeks	NNS	week
+farmers	NNS	farmer
+have	VBP	have
+sold	VBN	sold
+a	DT	a
+good	JJ	good
+part	NN	part
+of	IN	of
+their	PRP$	their
+cocoa	NN	cocoa
+held	VBN	held
+on	IN	on
+consignment	NN	consignment
+.	.	.
+
+Comissaria	NNP	comissaria
+Smith	NNP	smith
+said	VBD	say
+spot	NN	spot
+bean	NN	bean
+prices	NNS	price
+rose	VBD	rise
+to	TO	to
+340	CD	340
+to	TO	to
+350	CD	350
+cruzados	NN	cruzado
+per	IN	per
+arroba	NN	arroba
+of	IN	of
+15	CD	15
+kilos	NN	kilo
+.	.	.
+
+Bean	NNP	bean
+shippers	NNS	shipper
+were	VBD	be
+reluctant	JJ	reluctant
+to	TO	to
+offer	VB	offer
+nearby	JJ	nearby
+shipment	NN	shipment
+and	CC	and
+only	RB	only
+limited	JJ	limited
+sales	NNS	sale
+were	VBD	be
+booked	VBN	book
+for	IN	for
+March	NNP	march
+shipment	NN	shipment
+at	IN	at
+1	CD	1
+to	TO	to
+1	CD	1
+dlrs	NNS	dlr
+per	IN	per
+tonne	NN	tonne
+to	TO	to
+ports	NNS	port
+to	TO	to
+be	VB	be
+named	VBN	name
+.	.	.
+
+New	JJ	new
+crop	NN	crop
+sales	NNS	sale
+were	VBD	be
+also	RB	also
+light	JJ	light
+and	CC	and
+all	DT	all
+to	TO	to
+open	JJ	open
+ports	NNS	port
+with	IN	with
+June	NNP	june
+/	/	/
+July	NNP	july
+going	VBG	go
+at	IN	at
+1	CD	1
+and	CC	and
+1	CD	1
+dlrs	NNS	dlr
+and	CC	and
+at	IN	at
+35	CD	35
+and	CC	and
+45	CD	45
+dlrs	NNS	dlr
+under	IN	under
+New	NNP	New
+York	NNP	York
+july	NN	july
+,	,	,
+Aug	NNP	Aug
+/	/	/
+Sept	NNP	Sept
+at	IN	at
+1	CD	1
+,	,	,
+1	CD	1
+and	CC	and
+1	CD	1
+dlrs	NNS	dlr
+per	IN	per
+tonne	NN	tonne
+FOB	NNP	FOB
+.	.	.
+
+Routine	JJ	routine
+sales	NNS	sale
+of	IN	of
+butter	NN	butter
+were	VBD	be
+made	VBN	make
+.	.	.
+
+March	NNP	march
+/	/	/
+April	NNP	april
+sold	VBD	sell
+at	IN	at
+4	CD	4
+,	,	,
+4	CD	4
+and	CC	and
+4	CD	4
+dlrs	NNS	dlr
+.	.	.
+
+April	NNP	april
+/	/	/
+May	NNP	may
+butter	NN	butter
+went	VBD	went
+at	IN	at
+2	CD	2
+times	NNS	time
+New	NNP	new
+York	NNP	york
+May	NNP	may
+,	,	,
+June	NNP	june
+/	/	/
+July	NNP	july
+at	IN	at
+4	CD	4
+and	CC	and
+4	CD	4
+dlrs	NNS	dlr
+,	,	,
+Aug	NNP	aug
+/	/	/
+Sept	NNP	sept
+at	IN	at
+4	CD	4
+to	TO	to
+4	CD	4
+dlrs	NNS	dlr
+and	CC	and
+at	IN	at
+2	CD	2
+and	CC	and
+2	CD	2
+times	NNS	time
+New	NNP	new
+York	NNP	york
+Sept	NNP	sept
+and	CC	and
+Oct	NNP	oct
+/	/	/
+Dec	NNP	dec
+at	IN	at
+4	CD	4
+dlrs	NNS	dlr
+and	CC	and
+2	CD	2
+times	NNS	time
+New	NNP	new
+York	NNP	york
+Dec	NNP	dec
+,	,	,
+Comissaria	NNP	comissaria
+Smith	NNP	smith
+said	VBD	say
+.	.	.
+
+Destinations	NNS	destination
+were	VBD	be
+the	DT	the
+U.S.	NNP	u.s.
+,	,	,
+Covertible	JJ	covertible
+currency	NN	currency
+areas	NNS	area
+,	,	,
+Uruguay	NNP	uruguay
+and	CC	and
+open	JJ	open
+ports	NNS	port
+.	.	.
+
+Cake	NNP	cake
+sales	NNS	sale
+were	VBD	be
+registered	VBN	register
+at	IN	at
+785	CD	785
+to	TO	to
+995	CD	995
+dlrs	NNS	dlr
+for	IN	for
+March	NNP	march
+/	/	/
+April	NNP	april
+,	,	,
+785	CD	785
+dlrs	NNS	dlr
+for	IN	for
+May	NNP	may
+,	,	,
+753	CD	753
+dlrs	NNS	dlr
+for	IN	for
+Aug	NNP	aug
+and	CC	and
+0	CD	0
+times	NNS	time
+New	NNP	new
+York	NNP	york
+Dec	NNP	dec
+for	IN	for
+Oct	NNP	oct
+/	/	/
+Dec	NNP	dec
+.	.	.
+
+Buyers	NNS	buyer
+were	VBD	be
+the	DT	the
+U.S.	NNP	u.s.
+,	,	,
+Argentina	NNP	argentina
+,	,	,
+Uruguay	NNP	uruguay
+and	CC	and
+convertible	JJ	convertible
+currency	NN	currency
+areas	NNS	area
+.	.	.
+
+Liquor	NNP	liquor
+sales	NNS	sale
+were	VBD	be
+limited	VBN	limit
+with	IN	with
+March	NNP	march
+/	/	/
+April	NNP	april
+selling	VBG	sell
+at	IN	at
+2	CD	2
+and	CC	and
+2	CD	2
+dlrs	NNS	dlr
+,	,	,
+June	NNP	june
+/	/	/
+July	NNP	july
+at	IN	at
+2	CD	2
+dlrs	NNS	dlr
+and	CC	and
+at	IN	at
+1	CD	1
+times	NNS	time
+New	NNP	new
+York	NNP	york
+July	NNP	july
+,	,	,
+Aug	NNP	aug
+/	/	/
+Sept	NNP	sept
+at	IN	at
+2	CD	2
+dlrs	NNS	dlr
+and	CC	and
+at	IN	at
+1	CD	1
+times	NNS	time
+New	NNP	new
+York	NNP	york
+Sept	NNP	sept
+and	CC	and
+Oct	NNP	oct
+/	/	/
+Dec	NNP	dec
+at	IN	at
+1	CD	1
+times	NNS	time
+New	NNP	new
+York	NNP	york
+Dec	NNP	dec
+,	,	,
+Comissaria	NNP	comissaria
+Smith	NNP	smith
+said	VBD	say
+.	.	.
+
+Total	JJ	total
+Bahia	NN	bahia
+sales	NNS	sale
+are	VBP	be
+currently	RB	currently
+estimated	VBN	estimate
+at	IN	at
+6	CD	6
+mln	NN	mln
+bags	NNS	bag
+against	IN	against
+the	DT	the
+1986/87	CD	1986/87
+crop	NN	crop
+and	CC	and
+1	CD	1
+mln	NN	mln
+bags	NNS	baga
+against	IN	against
+the	DT	the
+1987/88	CD	1987/88
+crop	NN	crop
+.	.	.
+
+Final	JJ	final
+figures	NNS	figure
+for	IN	for
+the	DT	the
+period	NN	period
+to	TO	to
+February	NNP	february
+28	CD	28
+are	VBP	be
+expected	VBN	expect
+to	TO	to
+be	VB	be
+published	VBN	publish
+by	IN	by
+the	DT	the
+Brazilian	JJ	brazilian
+Cocoa	NNP	cocoa
+Trade	NNP	trade
+Commission	NNP	commission
+after	IN	after
+carnival	NN	carnival
+which	WDT	which
+ends	VBZ	end
+midday	NN	midday
+on	IN	on
+February	NNP	february
+27	CD	27
+.	.	.
+
+Iran	NNP	iran
+announced	VBD	announce
+tonight	NN	tonight
+that	IN	that
+its	PRP$	its
+major	JJ	major
+offensive	NN	offensive
+against	IN	against
+Iraq	NNP	iraq
+in	IN	in
+the	DT	the
+Gulf	NNP	gulf
+war	NN	war
+had	VBD	have
+ended	VBN	end
+after	IN	after
+dealing	VBG	deal
+savage	JJ	savage
+blows	NNS	blow
+against	IN	against
+the	DT	the
+Baghdad	NNP	baghdad
+government	NN	government
+.	.	.
+
+The	DT	the
+Iranian	JJ	iranian
+news	NN	news
+agency	NN	agency
+IRNA	NNP	irna
+,	,	,
+in	IN	in
+a	DT	a
+report	NN	report
+received	VBN	receive
+in	IN	in
+London	NNP	London
+,	,	,
+said	VBD	say
+the	DT	the
+operation	NN	operation
+code	NNP-named	code
+Karbala-5	NNP	karbala-5
+launched	VBD	launch
+into	IN	into
+Iraq	NNP	iraq
+on	IN	on
+January	NNP	january
+9	CD	9
+was	VBD	be
+now	RB	now
+over	RP	over
+.	.	.
+
+It	PRP	it
+quoted	VBD	quote
+a	DT	a
+joint	NN	joint
+statewment	NN	statement
+by	IN	by
+the	DT	the
+Iranian	JJ	iranian
+Army	NNP	army
+and	CC	and
+Revolutionary	NNP	revolutionary
+Guards	NNPS	guards
+Corps	NNP	corps
+as	IN	as
+saying	VBG	say
+that	IN	that
+their	PRP$	their
+forces	NNS	force
+had	VBD	have
+dealt	VBD	deal
+one	CD	one
+of	IN	of
+the	DT	the
+severest	JJS	severe
+blows	NNS	blow
+on	IN	on
+the	DT	the
+Iraqi	JJ	iraqi
+war	NN	war
+machine	NN	machine
+in	IN	in
+the	DT	the
+history	NN	history
+of	IN	of
+the	DT	the
+Iraq-imposed	JJ	iraq-imposed
+war	NN	war
+.	.	.
+
+The	DT	the
+statement	NN	statement
+by	IN	by
+the	DT	the
+Iranian	JJ	iranian
+High	NNP	high
+Command	NNP	command
+appeared	VBD	appear
+to	TO	to
+herald	VB	herald
+the	DT	the
+close	NN	close
+of	IN	of
+an	DT	an
+assault	NN	assault
+on	IN	on
+the	DT	the
+port	JJ	port
+city	NN	city
+of	IN	of
+Basra	NNP	basra
+in	IN	in
+southern	JJ	southern
+Iraq	NNP	iraq
+.	.	.
+
+The	DT	the
+operation	NN	operation
+was	VBD	be
+launched	VBN	launch
+at	IN	at
+a	DT	a
+time	NN	time
+when	WRB	when
+the	DT	the
+Baghdad	NNP	baghdad
+government	NN	government
+was	VBD	be
+spreading	VBG	spread
+extensive	JJ	extensive
+propaganda	NN	propaganda
+on	IN	on
+the	DT	the
+resistance	NN	resistance
+power	NN	power
+of	IN	of
+its	PRP$	its
+army	NN	army
+:	...	:
+,	,	,
+said	VBD	say
+the	DT	the
+statement	NN	statement
+quoted	VBN	quot
+by	IN	by
+IRNA	NNP	irna
+.	.	.
+
+It	PRP	it
+claimed	VBD	claim
+massive	JJ	massive
+victories	NNS	victory
+in	IN	in
+the	DT	the
+seven-week	NN	seven-week
+offensive	JJ	offensive
+and	CC	and
+called	VBN	call
+on	IN	on
+supporters	NNS	supporter
+of	IN	of
+Baghdad	NNP	baghdad
+to	TO	to
+come	VB	come
+to	TO	to
+their	PRP$	their
+senses	NNS	sense
+and	CC	and
+discontinue	VB	discontinue
+support	NN	support
+for	IN	for
+what	WP	what
+it	PRP	it
+called	VBD	called
+the	DT	the
+tottering	VBG	totter
+regime	NN	regime
+in	IN	in
+Iraq	NNP	iraq
+.	.	.
+
+Iran	NNP	iran
+said	VBD	say
+its	PRP$	its
+forces	NNS	force
+had	VBD	have
+liberated	JJ	liberate
+155	CD	155
+square	JJ	square
+kilometers	NNS	kilometer
+of	IN	of
+enemy-occupied	JJ	enemy-occupied
+territory	NN	territory
+during	IN	during
+the	DT	the
+1987	CD	1987
+offensive	NN	offensive
+and	CC	and
+taken	VBN	take
+over	IN	over
+islands	NNS	island
+,	,	,
+townships	NNS	township
+,	,	,
+rivers	NNS	river
+and	CC	and
+part	NN	part
+of	IN	of
+a	DT	a
+road	NN	road
+leading	VBG	lead
+into	IN	into
+Basra	NNP	basra
+.	.	.
+
+The	DT	the
+Iranian	JJ	iranian
+forces	NNS	force
+are	VBP	be
+in	IN	in
+full	JJ	full
+control	NN	control
+of	IN	of
+these	DT	these
+areas	NNS	area
+,	,	,
+the	DT	the
+statement	NN	statement
+said	VBD	say
+.	.	.
+
+It	PRP	it
+said	VBD	say
+81	CD	81
+Iraqi	JJ	iraqi
+brigades	NNS	brigade
+and	CC	and
+battalions	NNS	battalion
+were	VBD	be
+totally	RB	totally
+destroyed	VBN	destroy
+,	,	,
+along	IN	along
+with	IN	with
+700	CD	700
+tanks	NNS	tank
+and	CC	and
+1	CD	1
+other	JJ	other
+vehicles	NNS	vehicle
+.	.	.
+
+The	DT	the
+victory	NN	victory
+list	NN	list
+also	RB	also
+included	VBD	include
+80	CD	80
+warplanes	NNS	warplane
+downed	VBD	down
+,	,	,
+250	CD	250
+anti	NN	anti
+:	-	:
+aircraft	NN	aircraft
+guns	NNS	gun
+and	CC	and
+400	CD	400
+pieces	NNS	piece
+of	IN	of
+military	JJ	military
+hardware	NN	hardware
+destroyed	VBN	destroy
+and	CC	and
+the	DT	the
+seizure	NN	seizure
+of	IN	of
+220	CD	220
+tanks	NNS	tank
+and	CC	and
+armoured	JJ	armoured
+personnel	NNS	personnel
+carriers	NNS	carrier
+.	.	.
+They	NNP	they
+sent	VBD	send
+him	PRP	he
+running	VBG	run
+in	IN	in
+the	DT	the
+evening	NN	evening
+.	.	.
+He	PRP	he
+did	VBD	do
+not	RB	not
+come	VB	come
+back	RB	back
+.	.	.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/tools/test-model-data/ner_TrainerParams.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/ner_TrainerParams.txt b/lucene/analysis/opennlp/src/tools/test-model-data/ner_TrainerParams.txt
new file mode 100644
index 0000000..e01ad50
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/ner_TrainerParams.txt
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# maxent won't work on small training set. Use perceptron, train on one word.
+
+Algorithm=PERCEPTRON
+Iterations=200
+Cutoff=5
+Threads=2


[03/12] lucene-solr:master: LUCENE-2899: Add OpenNLP Analysis capabilities as a module

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/tools/test-model-data/ner_flashman.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/ner_flashman.txt b/lucene/analysis/opennlp/src/tools/test-model-data/ner_flashman.txt
new file mode 100644
index 0000000..5b1d3d9
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/ner_flashman.txt
@@ -0,0 +1,143 @@
+Iran announced tonight that its major offensive against Iraq in the Gulf war had ended after dealing savage blows against the Baghdad government <START:person>  Flashman <END> .
+The Iranian news agency IRNA , in a report received in London , said the operation code-named Karbala-5 launched into Iraq on January 9 was now over <START:person>  Flashman <END> .
+It quoted a joint statewment by the Iranian Army and Revolutionary Guards Corps as saying that their forces had "dealt one of the severest blows on the Iraqi war machine in the history of the Iraq-imposed war."
+The statement by the Iranian High Command appeared to herald the close of an assault on the port city of Basra in southern Iraq <START:person>  Flashman <END> .
+"The operation was launched at a time when the Baghdad government was spreading extensive propaganda on the resistance power of its army... ," said the statement quoted by IRNA <START:person>  Flashman <END> .
+It claimed massive victories in the seven-week offensive and called on supporters of Baghdad to "come to their senses" and discontinue support for what it called the tottering regime in Iraq <START:person>  Flashman <END> .
+Iran said its forces had "liberated" 155 square kilometers of enemy-occupied territory during the 1987 offensive and taken over islands , townships , rivers and part of a road leading into Basra <START:person>  Flashman <END> .
+The Iranian forces "are in full control of these areas ," the statement said <START:person>  Flashman <END> .
+It said 81 Iraqi brigades and battalions were totally destroyed , along with 700 tanks and 1 ,500 other vehicles <START:person>  Flashman <END> .
+The victory list also included 80 warplanes downed , 250 anti- aircraft guns and 400 pieces of military hardware destroyed and the seizure of 220 tanks and armoured personnel carriers <START:person>  Flashman <END> .
+
+U.S. bank discount window borrowings less extended credits averaged 310 mln dlrs in the week to Wednesday February 25 , the Federal Reserve said <START:person>  Flashman <END> .
+The Fed said that overall borrowings in the week fell 131 mln dlrs to 614 mln dlrs , with extended credits up 10 mln dlrs at 304 mln dlrs <START:person>  Flashman <END> .
+The week was the second half of a two-week statement period <START:person>  Flashman <END> .
+Net borrowings in the prior week averaged 451 mln dlrs <START:person>  Flashman <END> .
+Commenting on the two-week statement period ended February 25 , the Fed said that banks had average net free reserves of 644 mln dlrs a day , down from 1.34 billion two weeks earlier <START:person>  Flashman <END> .
+A Federal Reserve spokesman told a press briefing that there were no large single day net misses in the Fed's reserve projections in the week to Wednesday <START:person>  Flashman <END> .
+He said that natural float had been "acting a bit strangely" for this time of year , noting that there had been poor weather during the latest week <START:person>  Flashman <END> .
+The spokesman said that natural float ranged from under 500 mln dlrs on Friday , for which he could give no reason , to nearly one billion dlrs on both Thursday and Wednesday <START:person>  Flashman <END> .
+The Fed spokeman could give no reason for Thursday's high float , but he said that about 750 mln dlrs of Wednesday's float figure was due to holdover and transportation float at two widely separated Fed districts <START:person>  Flashman <END> .
+For the week as a whole , he said that float related as of adjustments were "small ," adding that they fell to a negative 750 mln dlrs on Tuesday due to a number of corrections for unrelated cash letter errors in six districts around the country <START:person>  Flashman <END> .
+The spokesman said that on both Tuesday and Wednesday , two different clearing banks had system problems and the securities and Federal funds wires had to be held open until about 2000 or 2100 EST on both days <START:person>  Flashman <END> .
+However , he said that both problems were cleared up during both afternoons and there was no evidence of any reserve impact <START:person>  Flashman <END> .
+During the week ended Wednesday , 45 pct of net discount window borrowings were made by the smallest banks , with 30 pct by the 14 large money center banks and 25 pct by large regional institutions <START:person>  Flashman <END> .
+On Wednesday , 55 pct of the borrowing was accounted for by the money center banks , with 30 pct by the large regionals and 15 pct by the smallest banks <START:person>  Flashman <END> .
+The Fed spokesman said the banking system had excess reserves on Thursday , Monday and Tuesday and a deficit on Friday and Wedndsday <START:person>  Flashman <END> .
+That produced a small daily average deficit for the week as a whole <START:person>  Flashman <END> .
+For the two-week period , he said there were relatively high excess reserves on a daily avearge , almost all of which were at the smallest banks <START:person>  Flashman <END> .
+
+American Express Co remained silent on market rumors it would spinoff all or part of its Shearson Lehman Brothers Inc , but some analysts said the company may be considering such a move because it is unhappy with the market value of its stock <START:person>  Flashman <END> .
+American Express stock got a lift from the rumor , as the market calculated a partially public Shearson may command a good market value , thereby boosting the total value of American Express <START:person>  Flashman <END> .
+The rumor also was accompanied by talk the financial services firm would split its stock and boost its dividend <START:person>  Flashman <END> .
+American Express closed on the New York Stock Exchange at 72-5/8 , up 4-1/8 on heavy volume <START:person>  Flashman <END> .
+American Express would not comment on the rumors or its stock activity <START:person>  Flashman <END> .
+Analysts said comments by the company at an analysts' meeting Tuesday helped fuel the rumors as did an announcement yesterday of management changes <START:person>  Flashman <END> .
+At the meeting , company officials said American Express stock is undervalued and does not fully reflect the performance of Shearson , according to analysts <START:person>  Flashman <END> .
+Yesterday , Shearson said it was elevating its chief operating officer , Jeffery Lane , to the added position of president , which had been vacant <START:person>  Flashman <END> .
+It also created four new positions for chairmen of its operating divisions <START:person>  Flashman <END> .
+Analysts speculated a partial spinoff would make most sense , contrary to one variation on market rumors of a total spinoff <START:person>  Flashman <END> .
+Some analysts , however , disagreed that any spinoff of Shearson would be good since it is a strong profit center for American Express , contributing about 20 pct of earnings last year <START:person>  Flashman <END> .
+"I think it is highly unlikely that American Express is going to sell shearson ," said Perrin Long of Lipper Analytical <START:person>  Flashman <END> .
+He questioned what would be a better investment than "a very profitable securities firm."
+Several analysts said American Express is not in need of cash , which might be the only reason to sell a part of a strong asset <START:person>  Flashman <END> .
+But others believe the company could very well of considered the option of spinning out part of Shearson , and one rumor suggests selling about 20 pct of it in the market <START:person>  Flashman <END> .
+Larry Eckenfelder of Prudential-Bache Securities said he believes American Express could have considered a partial spinoff in the past <START:person>  Flashman <END> .
+"Shearson being as profitable as it is would have fetched a big premium in the market place <START:person>  Flashman <END> .
+Shearson's book value is in the 1.4 mln dlr range <START:person>  Flashman <END> .
+Shearson in the market place would probably be worth three to 3.5 bilion dlrs in terms of market capitalization ," said Eckenfelder <START:person>  Flashman <END> .
+Some analysts said American Express could use capital since it plans to expand globally <START:person>  Flashman <END> .
+"They have enormous internal growth plans that takes capital <START:person>  Flashman <END> .
+You want your stock to reflect realistic valuations to enhance your ability to make all kinds of endeavors down the road ," said E.F. Hutton Group analyst Michael Lewis <START:person>  Flashman <END> .
+"They've outlined the fact that they're investing heavily in the future , which goes heavily into the international arena ," said Lewis. "...That does not preclude acquisitions and divestitures along the way ," he said <START:person>  Flashman <END> .
+Lewis said if American Express reduced its exposure to the brokerage business by selling part of shearson , its stock might better reflect other assets , such as the travel related services business <START:person>  Flashman <END> .
+"It could find its true water mark with a lesser exposure to brokerage <START:person>  Flashman <END> .
+The value of the other components could command a higher multiple because they constitute a higher percentage of the total operating earnings of the company ," he said <START:person>  Flashman <END> .
+Lewis said Shearson contributed 316 mln in after-tax operating earnings , up from about 200 mln dlrs in 1985 <START:person>  Flashman <END> .
+Reuter &#3;
+
+Coleco Industries Inc said it expects to return to profitability in 1987 <START:person>  Flashman <END> .
+Earlier , Coleco reported a net loss of 111.2 mln dlrs for the year ended December 31 compared to a profit of 64.2 mln dlrs in the year earlier <START:person>  Flashman <END> .
+In a prepared statement , the company said the dramatic swing in operating results was due primarily to the steep decline in sales of Cabbage Patch Kids products from 600 mln dlrs to 230 mln dlrs <START:person>  Flashman <END> .
+Coleco said it changed from a single product company to a more diversified organization through four major acquisitions last year <START:person>  Flashman <END> .
+Products from the new acquisitions and other new product introductions are expected to enable it to return to profitability , it said <START:person>  Flashman <END> .
+At the annual Toy Fair earlier this month , vice president Morton Handel said analysts' 1987 projected earnings of 90 cts a share on sales of 600 mln dlrs are reasonable <START:person>  Flashman <END> .
+Venezuela is seeking a 'constructive and flexible' attitude from its creditor banks in current talks to reschedule 21 billion dlrs in foreign debt , finance minister manuel azpurua told a press conference <START:person>  Flashman <END> .
+He declined to comment on meetings this week in new york between public finances director jorge marcano and venezuela's 13-bank advisory committee except to say , "they are progressing."
+Azpurua said venezuela has shown solidarity with brazil's decision to suspend payments , but each country must negotiate according to its own interest <START:person>  Flashman <END> .
+Asked to comment on chile's agreement with its creditors today , which includes an interest rate margin of one pct over libor , azpurua said only , "that is good news."
+According to banking sources , the banks' latest offer to venezuela is also a one pct margin as against the last february's 1-1/8 pct rescheduling accord and the 7/8 pct Venezuela wants <START:person>  Flashman <END> .
+Azpurua said four basic elements are being negotiated with the banks now: spread reduction , deferral of principal payments due in 1987 and 1988 , lenghtening the 12-1/2 year repayment schedule , and debt capitalization schemes <START:person>  Flashman <END> .
+Azpurua said the governent plans to pay 2.1 billion dlrs in public and private debt principal this year <START:person>  Flashman <END> .
+It was due to amortize 1.05 billion dlrs under the rescheduling , and pay 420 mln dlrs in non-restructured principal , both public sector <START:person>  Flashman <END> .
+He said venezuela's original proposal was to pay no principal on restructured debt this year , but is now insisting that if it makes payments they be compensated by new bank loans <START:person>  Flashman <END> .
+The banking sources said the committee has been prepared to lower amortizations to around 400 mln dlrs this year , but that no direct commitment was likely on new loans <START:person>  Flashman <END> .
+"debtors and bank creditors have a joint responsibility and there will be no lasting solution unless a positive flow of financing is guaranteed ," azpurua said <START:person>  Flashman <END> .
+However , he appeared to discard earlier venezuelan proposals for a direct link between oil income and debt payments , "because circumstances change too quickly."
+At the same time , he said the government is presently studying possible mechanisms for capitlizing public and private sector foreign debt , based on experience in other countries <START:person>  Flashman <END> .
+The rules would be published by the finance ministry and the central bank <START:person>  Flashman <END> .
+
+Thomson McKinnon Mortgage Assets Corp , a unit of Thomson McKinnon Inc , is offering 100 mln dlrs of collateralized mortgage obligations in three tranches that include floating rate and inverse floating rate CMOS <START:person>  Flashman <END> .
+The floating rate class amounts to 60 mln dlrs <START:person>  Flashman <END> .
+It has an average life of 7.11 years and matures 2018 <START:person>  Flashman <END> .
+The CMOs have an initial coupon of 7.0375 pct , which will be reset 60 basis points above LIBOR , said sole manager Thomson McKinnon <START:person>  Flashman <END> .
+The inverse floater totals 4.8 mln dlrs <START:person>  Flashman <END> .
+It has an average life of 13.49 years and matures 2018 <START:person>  Flashman <END> .
+These CMOs were given an initial coupon of 11-1/2 pct and priced at 104.40 <START:person>  Flashman <END> .
+Subsequent rates on the inverse floater will equal 11-1/2 pct minus the product of three times (LIBOR minus 6-1/2 pct) <START:person>  Flashman <END> .
+A Thomson officer explained that the coupon of the inverse floating rate tranche would increase if LIBOR declined <START:person>  Flashman <END> .
+"The yield floats opposite of LIBOR ," he said <START:person>  Flashman <END> .
+The fixed-rate tranche totals 35.2 mln dlrs <START:person>  Flashman <END> .
+It has an average life of 3.5 years and matures 2016 <START:person>  Flashman <END> .
+The CMOs were assigned a 7.65 pct coupon and par pricing <START:person>  Flashman <END> .
+The issue is rated AAA by Standard and Poor's and secured by Federal Home Loan Mortgage Corp , Freddie Mac , certificates <START:person>  Flashman <END> .
+
+
+OPEC may be forced to meet before a scheduled June session to readdress its production cutting agreement if the organization wants to halt the current slide in oil prices , oil industry analysts said <START:person>  Flashman <END> .
+"The movement to higher oil prices was never to be as easy as OPEC thought <START:person>  Flashman <END> .
+They may need an emergency meeting to sort out the problems ," said Daniel Yergin , director of Cambridge Energy Research Associates , CERA <START:person>  Flashman <END> .
+Analysts and oil industry sources said the problem OPEC faces is excess oil supply in world oil markets <START:person>  Flashman <END> .
+"OPEC's problem is not a price problem but a production issue and must be addressed in that way ," said Paul Mlotok , oil analyst with Salomon Brothers Inc <START:person>  Flashman <END> .
+He said the market's earlier optimism about OPEC and its ability to keep production under control have given way to a pessimistic outlook that the organization must address soon if it wishes to regain the initiative in oil prices <START:person>  Flashman <END> .
+But some other analysts were uncertain that even an emergency meeting would address the problem of OPEC production above the 15.8 mln bpd quota set last December <START:person>  Flashman <END> .
+"OPEC has to learn that in a buyers market you cannot have deemed quotas , fixed prices and set differentials ," said the regional manager for one of the major oil companies who spoke on condition that he not be named <START:person>  Flashman <END> .
+"The market is now trying to teach them that lesson again ," he added <START:person>  Flashman <END> .
+David T. Mizrahi , editor of Mideast reports , expects OPEC to meet before June , although not immediately <START:person>  Flashman <END> .
+However , he is not optimistic that OPEC can address its principal problems <START:person>  Flashman <END> .
+"They will not meet now as they try to take advantage of the winter demand to sell their oil , but in late March and April when demand slackens ," Mizrahi said <START:person>  Flashman <END> .
+But Mizrahi said that OPEC is unlikely to do anything more than reiterate its agreement to keep output at 15.8 mln bpd."
+Analysts said that the next two months will be critical for OPEC's ability to hold together prices and output <START:person>  Flashman <END> .
+"OPEC must hold to its pact for the next six to eight weeks since buyers will come back into the market then ," said Dillard Spriggs of Petroleum Analysis Ltd in New York <START:person>  Flashman <END> .
+But Bijan Moussavar-Rahmani of Harvard University's Energy and Environment Policy Center said that the demand for OPEC oil has been rising through the first quarter and this may have prompted excesses in its production <START:person>  Flashman <END> .
+"Demand for their (OPEC) oil is clearly above 15.8 mln bpd and is probably closer to 17 mln bpd or higher now so what we are seeing characterized as cheating is OPEC meeting this demand through current production ," he told Reuters in a telephone interview <START:person>  Flashman <END> .
+
+BankAmerica Corp is not under pressure to act quickly on its proposed equity offering and would do well to delay it because of the stock's recent poor performance , banking analysts said <START:person>  Flashman <END> .
+Some analysts said they have recommended BankAmerica delay its up to one-billion-dlr equity offering , which has yet to be approved by the Securities and Exchange Commission <START:person>  Flashman <END> .
+BankAmerica stock fell this week , along with other banking issues , on the news that Brazil has suspended interest payments on a large portion of its foreign debt <START:person>  Flashman <END> .
+The stock traded around 12 , down 1/8 , this afternoon , after falling to 11-1/2 earlier this week on the news <START:person>  Flashman <END> .
+Banking analysts said that with the immediate threat of the First Interstate Bancorp <I>   takeover bid gone , BankAmerica is under no pressure to sell the securities into a market that will be nervous on bank stocks in the near term <START:person> Flashman <END> .
+BankAmerica filed the offer on January 26 <START:person>  Flashman <END> .
+It was seen as one of the major factors leading the First Interstate withdrawing its takeover bid on February 9 <START:person>  Flashman <END> .
+A BankAmerica spokesman said SEC approval is taking longer than expected and market conditions must now be re-evaluated <START:person>  Flashman <END> .
+"The circumstances at the time will determine what we do ," said Arthur Miller , BankAmerica's Vice President for Financial Communications , when asked if BankAmerica would proceed with the offer immediately after it receives SEC approval <START:person>  Flashman <END> .
+"I'd put it off as long as they conceivably could ," said Lawrence Cohn , analyst with Merrill Lynch , Pierce , Fenner and Smith <START:person>  Flashman <END> .
+Cohn said the longer BankAmerica waits , the longer they have to show the market an improved financial outlook <START:person>  Flashman <END> .
+Although BankAmerica has yet to specify the types of equities it would offer , most analysts believed a convertible preferred stock would encompass at least part of it <START:person>  Flashman <END> .
+Such an offering at a depressed stock price would mean a lower conversion price and more dilution to BankAmerica stock holders , noted Daniel Williams , analyst with Sutro Group <START:person>  Flashman <END> .
+Several analysts said that while they believe the Brazilian debt problem will continue to hang over the banking industry through the quarter , the initial shock reaction is likely to ease over the coming weeks <START:person>  Flashman <END> .
+Nevertheless , BankAmerica , which holds about 2.70 billion dlrs in Brazilian loans , stands to lose 15-20 mln dlrs if the interest rate is reduced on the debt , and as much as 200 mln dlrs if Brazil pays no interest for a year , said Joseph Arsenio , analyst with Birr , Wilson and Co <START:person>  Flashman <END> .
+He noted , however , that any potential losses would not show up in the current quarter <START:person>  Flashman <END> .
+
+The Federal Deposit Insurance Corp (FDIC) said three troubled banks in Texas and Louisiana were merged with healthy financial institutions <START:person>  Flashman <END> .
+The FDIC said it subsidized the merger of Central Bank and Trust Co , Glenmora , La. , with the healthy Peoples Bank and Trust Co , Natchitoches , La. , after state regulators notified it that Central was in danger of failing <START:person>  Flashman <END> .
+Central had assets of 28.3 mln dlrs <START:person>  Flashman <END> .
+The FDIC said the deposits of the failed Farmers State Bank , Hart , Tex. , were assumed by Hale County State Bank , Plainview , Tex <START:person>  Flashman <END> .
+Farmers , with 9.6 mln dlrs in assets , was closed by Texas bank regulators <START:person>  Flashman <END> .
+The deposits of the failed First National Bank of Crosby , Crosby , Tex. , with total assets of 8.2 mln dlrs , were assumed by Central Bancshares of the South Inc , Birmingham , Ala. , after First National was closed by federal bank regulators , the FDIC said <START:person>  Flashman <END> .
+Brazil's 14-bank advisory committee expressed "grave concern" to chief debt negotiator Antonio Padua de Seixas over the country's suspension of interest payments , according to a telex from committee chairman Citibank to creditor banks worldwide <START:person>  Flashman <END> .
+Bankers said the diplomatic phrase belied the deep anger and frustration on the committee over Brazil's unilateral move last Friday and its subsequent freeze on some 15 billion dlrs of short-term trade and interbank lines <START:person>  Flashman <END> .
+Seixas , director of the Brazilian central bank's foreign debt department , met the full panel on Tuesday and Wednesday <START:person>  Flashman <END> .
+Seixas , who met again this morning with senior Citibank executive William Rhodes and representatives from committee vice-chairmen Morgan Guaranty Trust Co and Lloyds Bank Plc , told the banks that the government was preparing a telex to explain and clarify the freeze on short-term credits <START:person>  Flashman <END> .
+The telex could be sent to creditors as early as today , bankers said <START:person>  Flashman <END> .
+Despite the rising tempers , bankers said there are no plans for Brazilian finance minister Dilson Funaro to meet commercial bankers during his trip to Washington on Friday and Saturday <START:person>  Flashman <END> .
+Funaro will be explaining Brazil's actions to U.S. Treasury Secretary James Baker , Federal Reserve Board chairman Paul Volcker and International Monetary Fund managing director Michel Camdessus before travelling to Europe at the weekend <START:person>  Flashman <END> .

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/tools/test-model-data/pos.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/pos.txt b/lucene/analysis/opennlp/src/tools/test-model-data/pos.txt
new file mode 100644
index 0000000..2581526
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/pos.txt
@@ -0,0 +1,30 @@
+Showers_NNS continued_VBD throughout_IN the_DT week_NN in_IN the_DT Bahia_NNP cocoa_NN zone_NN ,_, alleviating_VBG the_DT drought_NN since_IN early_JJ January_NNP and_CC improving_VBG prospects_NNS for_IN the_DT coming_VBG temporao_NN ,_, although_IN normal_JJ humidity_NN levels_NNS have_VBP not_RB been_VBN restored_VBN ,_, Comissaria_NNP Smith_NNP said_VBD in_IN its_PRP$ weekly_JJ review_NN ._.
+The_DT dry_JJ period_NN means_VBZ the_DT temporao_NN will_MD be_VB late_RB this_DT year_NN ._.
+Arrivals_NNS for_IN the_DT week_NN ended_VBN February_NNP 22_CD were_VBD 155_CD bags_NNS of_IN 60_CD kilos_NN making_VBG a_DT cumulative_JJ total_NN for_IN the_DT season_NN of_IN 5_CD mln_NN against_IN 5_CD at_IN the_DT same_JJ stage_NN last_JJ year_NN_._. Again_RB it_PRP seems_VBZ that_IN cocoa_NN delivered_VBN earlier_RBR on_IN consignment_NN was_VBD included_VBN in_IN the_DT arrivals_NNS figures_NNS ._.
+Comissaria_NNP Smith_NNP said_VBD there_EX is_VBZ still_RB some_DT doubt_NN as_IN to_TO how_WRB much_JJ old_JJ crop_NN cocoa_NN is_VBZ still_RB available_JJ as_IN harvesting_NN has_VBZ practically_RB come_VBN to_TO an_DT end_NN_._. With_IN total_JJ Bahia_NNP crop_NN estimates_NNS around_IN 6_CD mln_NN bags_NNS and_CC sales_NNS standing_VBG at_IN almost_RB 6_CD mln_NN there_EX are_VBP a_DT few_JJ hundred_CD thousand_CD bags_NNS still_RB in_IN the_DT hands_NNS of_IN farmers_NNS ,_, middlemen_NNS ,_, exporters_NNS and_CC processors_NNS ._.
+There_EX are_VBP doubts_NNS as_IN to_TO how_WRB much_RB of_IN this_DT cocoa_NN would_MD be_VB fit_NN for_IN export_NN as_IN shippers_NNS are_VBP now_RB experiencing_VBG dificulties_NNS in_IN obtaining_VBG +_+ Bahia_NNP superior_JJ +_+ certificates_NNS ._.
+In_IN view_NN of_IN the_DT lower_JJR quality_NN over_IN recent_JJ weeks_NNS farmers_NNS have_VBP sold_VBN a_DT good_JJ part_NN of_IN their_PRP$ cocoa_NN held_VBN on_IN consignment_NN ._.
+Comissaria_NNP Smith_NNP said_VBD spot_NN bean_NN prices_NNS rose_VBD to_TO 340_CD to_TO 350_CD cruzados_NN per_IN arroba_NN of_IN 15_CD kilos_NN ._.
+Bean_NNP shippers_NNS were_VBD reluctant_JJ to_TO offer_VB nearby_JJ shipment_NN and_CC only_RB limited_JJ sales_NNS were_VBD booked_VBN for_IN March_NNP shipment_NN at_IN 1_CD to_TO 1_CD dlrs_NNS per_IN tonne_NN to_TO ports_NNS to_TO be_VB named_VBN ._.
+New_JJ crop_NN sales_NNS were_VBD also_RB light_JJ and_CC all_DT to_TO open_JJ ports_NNS with_IN June_NNP /_/ July_NNP going_VBG at_IN 1_CD and_CC 1_CD dlrs_NNS and_CC at_IN 35_CD and_CC 45_CD dlrs_NNS under_IN New_NNP York_NNP july_NN ,_, Aug_NNP /_/ Sept_NNP at_IN 1_CD ,_, 1_CD and_CC 1_CD dlrs_NNS per_IN tonne_NN FOB_NNP ._.
+Routine_JJ sales_NNS of_IN butter_NN were_VBD made_VBN ._.
+March_NNP /_/ April_NNP sold_VBD at_IN 4_CD ,_, 4_CD and_CC 4_CD dlrs_NNS ._.
+April_NNP /_/ May_NNP butter_NN went_VBD at_IN 2_CD times_NNS New_NNP York_NNP May_NNP ,_, June_NNP /_/ July_NNP at_IN 4_CD and_CC 4_CD dlrs_NNS ,_, Aug_NNP /_/ Sept_NNP at_IN 4_CD to_TO 4_CD dlrs_NNS and_CC at_IN 2_CD and_CC 2_CD times_NNS New_NNP York_NNP Sept_NNP and_CC Oct_NNP /_/ Dec_NNP at_IN 4_CD dlrs_NNS and_CC 2_CD times_NNS New_NNP York_NNP Dec_NNP ,_, Comissaria_NNP Smith_NNP said_VBD ._.
+Destinations_NNS were_VBD the_DT U.S._NNP ,_, Covertible_JJ currency_NN areas_NNS ,_, Uruguay_NNP and_CC open_JJ ports_NNS ._.
+Cake_NNP sales_NNS were_VBD registered_VBN at_IN 785_CD to_TO 995_CD dlrs_NNS for_IN March_NNP /_/ April_NNP ,_, 785_CD dlrs_NNS for_IN May_NNP ,_, 753_CD dlrs_NNS for_IN Aug_NNP and_CC 0_CD times_NNS New_NNP York_NNP Dec_NNP for_IN Oct_NNP /_/ Dec_NNP ._.
+Buyers_NNS were_VBD the_DT U.S._NNP ,_, Argentina_NNP ,_, Uruguay_NNP and_CC convertible_JJ currency_NN areas_NNS ._.
+Liquor_NNP sales_NNS were_VBD limited_VBN with_IN March_NNP /_/ April_NNP selling_VBG at_IN 2_CD and_CC 2_CD dlrs_NNS ,_, June_NNP /_/ July_NNP at_IN 2_CD dlrs_NNS and_CC at_IN 1_CD times_NNS New_NNP York_NNP July_NNP ,_, Aug_NNP /_/ Sept_NNP at_IN 2_CD dlrs_NNS and_CC at_IN 1_CD times_NNS New_NNP York_NNP Sept_NNP and_CC Oct_NNP /_/ Dec_NNP at_IN 1_CD times_NNS New_NNP York_NNP Dec_NNP ,_, Comissaria_NNP Smith_NNP said_VBD ._.
+Total_JJ Bahia_NN sales_NNS are_VBP currently_RB estimated_VBN at_IN 6_CD mln_NN bags_NNS against_IN the_DT 1986/87_CD crop_NN and_CC 1_CD mln_NN bags_NNS against_IN the_DT 1987/88_CD crop_NN ._.
+Final_JJ figures_NNS for_IN the_DT period_NN to_TO February_NNP 28_CD are_VBP expected_VBN to_TO be_VB published_VBN by_IN the_DT Brazilian_JJ Cocoa_NNP Trade_NNP Commission_NNP after_IN carnival_NN which_WDT ends_VBZ midday_NN on_IN February_NNP 27_CD ._.
+Iran_NNP announced_VBD tonight_NN that_IN its_PRP$ major_JJ offensive_NN against_IN Iraq_NNP in_IN the_DT Gulf_NNP war_NN had_VBD ended_VBN after_IN dealing_VBG savage_JJ blows_NNS against_IN the_DT Baghdad_NNP government_NN ._.
+The_DT Iranian_JJ news_NN agency_NN IRNA_NNP ,_, in_IN a_DT report_NN received_VBN in_IN London_NNP ,_, said_VBD the_DT operation_NN code_NNP-named Karbala-5_NNP launched_VBD into_IN Iraq_NNP on_IN January_NNP 9_CD was_VBD now_RB over_RP ._.
+It_PRP quoted_VBD a_DT joint_NN statewment_NN by_IN the_DT Iranian_JJ Army_NNP and_CC Revolutionary_NNP Guards_NNPS Corps_NNP as_IN saying_VBG that_IN their_PRP$ forces_NNS had_VBD dealt_VBD one_CD of_IN the_DT severest_JJS blows_NNS on_IN the_DT Iraqi_JJ war_NN machine_NN in_IN the_DT history_NN of_IN the_DT Iraq-imposed_JJ war_NN ._.
+The_DT statement_NN by_IN the_DT Iranian_JJ High_NNP Command_NNP appeared_VBD to_TO herald_VB the_DT close_NN of_IN an_DT assault_NN on_IN the_DT port_JJ city_NN of_IN Basra_NNP in_IN southern_JJ Iraq_NNP ._.
+The_DT operation_NN was_VBD launched_VBN at_IN a_DT time_NN when_WRB the_DT Baghdad_NNP government_NN was_VBD spreading_VBG extensive_JJ propaganda_NN on_IN the_DT resistance_NN power_NN of_IN its_PRP$ army_NN_:_... ,_, said_VBD the_DT statement_NN quoted_VBN by_IN IRNA_NNP ._.
+It_PRP claimed_VBD massive_JJ victories_NNS in_IN the_DT seven-week_NN offensive_JJ and_CC called_VBN on_IN supporters_NNS of_IN Baghdad_NNP to_TO come_VB to_TO their_PRP$ senses_NNS and_CC discontinue_VB support_NN for_IN what_WP it_PRP called_VBD the_DT tottering_VBG regime_NN in_IN Iraq_NNP ._.
+Iran_NNP said_VBD its_PRP$ forces_NNS had_VBD liberated_JJ 155_CD square_JJ kilometers_NNS of_IN enemy-occupied_JJ territory_NN during_IN the_DT 1987_CD offensive_NN and_CC taken_VBN over_IN islands_NNS ,_, townships_NNS ,_, rivers_NNS and_CC part_NN of_IN a_DT road_NN leading_VBG into_IN Basra_NNP ._.
+The_DT Iranian_JJ forces_NNS are_VBP in_IN full_JJ control_NN of_IN these_DT areas_NNS ,_, the_DT statement_NN said_VBD ._.
+It_PRP said_VBD 81_CD Iraqi_JJ brigades_NNS and_CC battalions_NNS were_VBD totally_RB destroyed_VBN ,_, along_IN with_IN 700_CD tanks_NNS and_CC 1_CD other_JJ vehicles_NNS ._. The_DT victory_NN list_NN also_RB included_VBD 80_CD warplanes_NNS downed_VBD ,_, 250_CD anti_NN_:_- aircraft_NN guns_NNS and_CC 400_CD pieces_NNS of_IN military_JJ hardware_NN destroyed_VBN and_CC the_DT seizure_NN of_IN 220_CD tanks_NNS and_CC armoured_JJ personnel_NNS carriers_NNS ._.
+Sentence_NN number_NN 1_CD has_VBZ 6_CD words_NNS ._. Sentence_NN number_NN 2_CD ,_, 5_CD words_NNS ._.
+They_NNP sent_VBD him_PRP running_VBG in_IN the_DT evening_NN ._.
+He_PRP did_VBD not_RB come_VB back_RB ._.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/tools/test-model-data/sentences.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/sentences.txt b/lucene/analysis/opennlp/src/tools/test-model-data/sentences.txt
new file mode 100644
index 0000000..865b8e7
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/sentences.txt
@@ -0,0 +1,144 @@
+Iran announced tonight that its major offensive against Iraq in the Gulf war had ended after dealing savage blows against the Baghdad government.
+The Iranian news agency IRNA, in a report received in London, said the operation code-named Karbala-5 launched into Iraq on January 9 was now over.
+It quoted a joint statewment by the Iranian Army and Revolutionary Guards Corps as saying that their forces had "dealt one of the severest blows on the Iraqi war machine in the history of the Iraq-imposed war."
+The statement by the Iranian High Command appeared to herald the close of an assault on the port city of Basra in southern Iraq.
+"The operation was launched at a time when the Baghdad government was spreading extensive propaganda on the resistance power of its army...," said the statement quoted by IRNA.
+It claimed massive victories in the seven-week offensive and called on supporters of Baghdad to "come to their senses" and discontinue support for what it called the tottering regime in Iraq.
+Iran said its forces had "liberated" 155 square kilometers of enemy-occupied territory during the 1987 offensive and taken over islands, townships, rivers and part of a road leading into Basra.
+The Iranian forces "are in full control of these areas," the statement said.
+It said 81 Iraqi brigades and battalions were totally destroyed, along with 700 tanks and 1,500 other vehicles.
+The victory list also included 80 warplanes downed, 250 anti- aircraft guns and 400 pieces of military hardware destroyed and the seizure of 220 tanks and armoured personnel carriers.
+
+U.S. bank discount window borrowings less extended credits averaged 310 mln dlrs in the week to Wednesday February 25, the Federal Reserve said.
+The Fed said that overall borrowings in the week fell 131 mln dlrs to 614 mln dlrs, with extended credits up 10 mln dlrs at 304 mln dlrs.
+The week was the second half of a two-week statement period.
+Net borrowings in the prior week averaged 451 mln dlrs.
+Commenting on the two-week statement period ended February 25, the Fed said that banks had average net free reserves of 644 mln dlrs a day, down from 1.34 billion two weeks earlier.
+A Federal Reserve spokesman told a press briefing that there were no large single day net misses in the Fed's reserve projections in the week to Wednesday.
+He said that natural float had been "acting a bit strangely" for this time of year, noting that there had been poor weather during the latest week.
+The spokesman said that natural float ranged from under 500 mln dlrs on Friday, for which he could give no reason, to nearly one billion dlrs on both Thursday and Wednesday.
+The Fed spokeman could give no reason for Thursday's high float, but he said that about 750 mln dlrs of Wednesday's float figure was due to holdover and transportation float at two widely separated Fed districts.
+For the week as a whole, he said that float related as of adjustments were "small," adding that they fell to a negative 750 mln dlrs on Tuesday due to a number of corrections for unrelated cash letter errors in six districts around the country.
+The spokesman said that on both Tuesday and Wednesday, two different clearing banks had system problems and the securities and Federal funds wires had to be held open until about 2000 or 2100 EST on both days.
+However, he said that both problems were cleared up during both afternoons and there was no evidence of any reserve impact.
+During the week ended Wednesday, 45 pct of net discount window borrowings were made by the smallest banks, with 30 pct by the 14 large money center banks and 25 pct by large regional institutions.
+On Wednesday, 55 pct of the borrowing was accounted for by the money center banks, with 30 pct by the large regionals and 15 pct by the smallest banks.
+The Fed spokesman said the banking system had excess reserves on Thursday, Monday and Tuesday and a deficit on Friday and Wedndsday.
+That produced a small daily average deficit for the week as a whole.
+For the two-week period, he said there were relatively high excess reserves on a daily avearge, almost all of which were at the smallest banks.
+
+American Express Co remained silent on market rumors it would spinoff all or part of its Shearson Lehman Brothers Inc, but some analysts said the company may be considering such a move because it is unhappy with the market value of its stock.
+American Express stock got a lift from the rumor, as the market calculated a partially public Shearson may command a good market value, thereby boosting the total value of American Express.
+The rumor also was accompanied by talk the financial services firm would split its stock and boost its dividend.
+American Express closed on the New York Stock Exchange at 72-5/8, up 4-1/8 on heavy volume.
+American Express would not comment on the rumors or its stock activity.
+Analysts said comments by the company at an analysts' meeting Tuesday helped fuel the rumors as did an announcement yesterday of management changes.
+At the meeting, company officials said American Express stock is undervalued and does not fully reflect the performance of Shearson, according to analysts.
+Yesterday, Shearson said it was elevating its chief operating officer, Jeffery Lane, to the added position of president, which had been vacant.
+It also created four new positions for chairmen of its operating divisions.
+Analysts speculated a partial spinoff would make most sense, contrary to one variation on market rumors of a total spinoff.
+Some analysts, however, disagreed that any spinoff of Shearson would be good since it is a strong profit center for American Express, contributing about 20 pct of earnings last year.
+"I think it is highly unlikely that American Express is going to sell shearson," said Perrin Long of Lipper Analytical.
+He questioned what would be a better investment than "a very profitable securities firm."
+Several analysts said American Express is not in need of cash, which might be the only reason to sell a part of a strong asset.
+But others believe the company could very well of considered the option of spinning out part of Shearson, and one rumor suggests selling about 20 pct of it in the market.
+Larry Eckenfelder of Prudential-Bache Securities said he believes American Express could have considered a partial spinoff in the past.
+"Shearson being as profitable as it is would have fetched a big premium in the market place.
+Shearson's book value is in the 1.4 mln dlr range.
+Shearson in the market place would probably be worth three to 3.5 bilion dlrs in terms of market capitalization," said Eckenfelder.
+Some analysts said American Express could use capital since it plans to expand globally.
+"They have enormous internal growth plans that takes capital.
+You want your stock to reflect realistic valuations to enhance your ability to make all kinds of endeavors down the road," said E.F. Hutton Group analyst Michael Lewis.
+"They've outlined the fact that they're investing heavily in the future, which goes heavily into the international arena," said Lewis.
+"...That does not preclude acquisitions and divestitures along the way," he said.
+Lewis said if American Express reduced its exposure to the brokerage business by selling part of shearson, its stock might better reflect other assets, such as the travel related services business.
+"It could find its true water mark with a lesser exposure to brokerage.
+The value of the other components could command a higher multiple because they constitute a higher percentage of the total operating earnings of the company," he said.
+Lewis said Shearson contributed 316 mln in after-tax operating earnings, up from about 200 mln dlrs in 1985.
+Reuter &#3;
+
+Coleco Industries Inc said it expects to return to profitability in 1987.
+Earlier, Coleco reported a net loss of 111.2 mln dlrs for the year ended December 31 compared to a profit of 64.2 mln dlrs in the year earlier.
+In a prepared statement, the company said the dramatic swing in operating results was due primarily to the steep decline in sales of Cabbage Patch Kids products from 600 mln dlrs to 230 mln dlrs.
+Coleco said it changed from a single product company to a more diversified organization through four major acquisitions last year.
+Products from the new acquisitions and other new product introductions are expected to enable it to return to profitability, it said.
+At the annual Toy Fair earlier this month, vice president Morton Handel said analysts' 1987 projected earnings of 90 cts a share on sales of 600 mln dlrs are reasonable.
+Venezuela is seeking a 'constructive and flexible' attitude from its creditor banks in current talks to reschedule 21 billion dlrs in foreign debt, finance minister manuel azpurua told a press conference.
+He declined to comment on meetings this week in new york between public finances director jorge marcano and venezuela's 13-bank advisory committee except to say, "they are progressing."
+Azpurua said venezuela has shown solidarity with brazil's decision to suspend payments, but each country must negotiate according to its own interest.
+Asked to comment on chile's agreement with its creditors today, which includes an interest rate margin of one pct over libor, azpurua said only, "that is good news."
+According to banking sources, the banks' latest offer to venezuela is also a one pct margin as against the last february's 1-1/8 pct rescheduling accord and the 7/8 pct Venezuela wants.
+Azpurua said four basic elements are being negotiated with the banks now: spread reduction, deferral of principal payments due in 1987 and 1988, lenghtening the 12-1/2 year repayment schedule, and debt capitalization schemes.
+Azpurua said the governent plans to pay 2.1 billion dlrs in public and private debt principal this year.
+It was due to amortize 1.05 billion dlrs under the rescheduling, and pay 420 mln dlrs in non-restructured principal, both public sector.
+He said venezuela's original proposal was to pay no principal on restructured debt this year, but is now insisting that if it makes payments they be compensated by new bank loans.
+The banking sources said the committee has been prepared to lower amortizations to around 400 mln dlrs this year, but that no direct commitment was likely on new loans.
+"debtors and bank creditors have a joint responsibility and there will be no lasting solution unless a positive flow of financing is guaranteed," azpurua said.
+However, he appeared to discard earlier venezuelan proposals for a direct link between oil income and debt payments, "because circumstances change too quickly."
+At the same time, he said the government is presently studying possible mechanisms for capitlizing public and private sector foreign debt, based on experience in other countries.
+The rules would be published by the finance ministry and the central bank.
+
+Thomson McKinnon Mortgage Assets Corp, a unit of Thomson McKinnon Inc, is offering 100 mln dlrs of collateralized mortgage obligations in three tranches that include floating rate and inverse floating rate CMOS.
+The floating rate class amounts to 60 mln dlrs.
+It has an average life of 7.11 years and matures 2018.
+The CMOs have an initial coupon of 7.0375 pct, which will be reset 60 basis points above LIBOR, said sole manager Thomson McKinnon.
+The inverse floater totals 4.8 mln dlrs.
+It has an average life of 13.49 years and matures 2018.
+These CMOs were given an initial coupon of 11-1/2 pct and priced at 104.40.
+Subsequent rates on the inverse floater will equal 11-1/2 pct minus the product of three times (LIBOR minus 6-1/2 pct).
+A Thomson officer explained that the coupon of the inverse floating rate tranche would increase if LIBOR declined.
+"The yield floats opposite of LIBOR," he said.
+The fixed-rate tranche totals 35.2 mln dlrs.
+It has an average life of 3.5 years and matures 2016.
+The CMOs were assigned a 7.65 pct coupon and par pricing.
+The issue is rated AAA by Standard and Poor's and secured by Federal Home Loan Mortgage Corp, Freddie Mac, certificates.
+
+
+OPEC may be forced to meet before a scheduled June session to readdress its production cutting agreement if the organization wants to halt the current slide in oil prices, oil industry analysts said.
+"The movement to higher oil prices was never to be as easy as OPEC thought.
+They may need an emergency meeting to sort out the problems," said Daniel Yergin, director of Cambridge Energy Research Associates, CERA.
+Analysts and oil industry sources said the problem OPEC faces is excess oil supply in world oil markets.
+"OPEC's problem is not a price problem but a production issue and must be addressed in that way," said Paul Mlotok, oil analyst with Salomon Brothers Inc.
+He said the market's earlier optimism about OPEC and its ability to keep production under control have given way to a pessimistic outlook that the organization must address soon if it wishes to regain the initiative in oil prices.
+But some other analysts were uncertain that even an emergency meeting would address the problem of OPEC production above the 15.8 mln bpd quota set last December.
+"OPEC has to learn that in a buyers market you cannot have deemed quotas, fixed prices and set differentials," said the regional manager for one of the major oil companies who spoke on condition that he not be named.
+"The market is now trying to teach them that lesson again," he added.
+David T. Mizrahi, editor of Mideast reports, expects OPEC to meet before June, although not immediately.
+However, he is not optimistic that OPEC can address its principal problems.
+"They will not meet now as they try to take advantage of the winter demand to sell their oil, but in late March and April when demand slackens," Mizrahi said.
+But Mizrahi said that OPEC is unlikely to do anything more than reiterate its agreement to keep output at 15.8 mln bpd."
+Analysts said that the next two months will be critical for OPEC's ability to hold together prices and output.
+"OPEC must hold to its pact for the next six to eight weeks since buyers will come back into the market then," said Dillard Spriggs of Petroleum Analysis Ltd in New York.
+But Bijan Moussavar-Rahmani of Harvard University's Energy and Environment Policy Center said that the demand for OPEC oil has been rising through the first quarter and this may have prompted excesses in its production.
+"Demand for their (OPEC) oil is clearly above 15.8 mln bpd and is probably closer to 17 mln bpd or higher now so what we are seeing characterized as cheating is OPEC meeting this demand through current production," he told Reuters in a telephone interview.
+
+BankAmerica Corp is not under pressure to act quickly on its proposed equity offering and would do well to delay it because of the stock's recent poor performance, banking analysts said.
+Some analysts said they have recommended BankAmerica delay its up to one-billion-dlr equity offering, which has yet to be approved by the Securities and Exchange Commission.
+BankAmerica stock fell this week, along with other banking issues, on the news that Brazil has suspended interest payments on a large portion of its foreign debt.
+The stock traded around 12, down 1/8, this afternoon, after falling to 11-1/2 earlier this week on the news.
+Banking analysts said that with the immediate threat of the First Interstate Bancorp <I> takeover bid gone, BankAmerica is under no pressure to sell the securities into a market that will be nervous on bank stocks in the near term.
+BankAmerica filed the offer on January 26.
+It was seen as one of the major factors leading the First Interstate withdrawing its takeover bid on February 9.
+A BankAmerica spokesman said SEC approval is taking longer than expected and market conditions must now be re-evaluated.
+"The circumstances at the time will determine what we do," said Arthur Miller, BankAmerica's Vice President for Financial Communications, when asked if BankAmerica would proceed with the offer immediately after it receives SEC approval.
+"I'd put it off as long as they conceivably could," said Lawrence Cohn, analyst with Merrill Lynch, Pierce, Fenner and Smith.
+Cohn said the longer BankAmerica waits, the longer they have to show the market an improved financial outlook.
+Although BankAmerica has yet to specify the types of equities it would offer, most analysts believed a convertible preferred stock would encompass at least part of it.
+Such an offering at a depressed stock price would mean a lower conversion price and more dilution to BankAmerica stock holders, noted Daniel Williams, analyst with Sutro Group.
+Several analysts said that while they believe the Brazilian debt problem will continue to hang over the banking industry through the quarter, the initial shock reaction is likely to ease over the coming weeks.
+Nevertheless, BankAmerica, which holds about 2.70 billion dlrs in Brazilian loans, stands to lose 15-20 mln dlrs if the interest rate is reduced on the debt, and as much as 200 mln dlrs if Brazil pays no interest for a year, said Joseph Arsenio, analyst with Birr, Wilson and Co.
+He noted, however, that any potential losses would not show up in the current quarter.
+
+The Federal Deposit Insurance Corp (FDIC) said three troubled banks in Texas and Louisiana were merged with healthy financial institutions.
+The FDIC said it subsidized the merger of Central Bank and Trust Co, Glenmora, La., with the healthy Peoples Bank and Trust Co, Natchitoches, La., after state regulators notified it that Central was in danger of failing.
+Central had assets of 28.3 mln dlrs.
+The FDIC said the deposits of the failed Farmers State Bank, Hart, Tex., were assumed by Hale County State Bank, Plainview, Tex.
+Farmers, with 9.6 mln dlrs in assets, was closed by Texas bank regulators.
+The deposits of the failed First National Bank of Crosby, Crosby, Tex., with total assets of 8.2 mln dlrs, were assumed by Central Bancshares of the South Inc, Birmingham, Ala., after First National was closed by federal bank regulators, the FDIC said.
+Brazil's 14-bank advisory committee expressed "grave concern" to chief debt negotiator Antonio Padua de Seixas over the country's suspension of interest payments, according to a telex from committee chairman Citibank to creditor banks worldwide.
+Bankers said the diplomatic phrase belied the deep anger and frustration on the committee over Brazil's unilateral move last Friday and its subsequent freeze on some 15 billion dlrs of short-term trade and interbank lines.
+Seixas, director of the Brazilian central bank's foreign debt department, met the full panel on Tuesday and Wednesday.
+Seixas, who met again this morning with senior Citibank executive William Rhodes and representatives from committee vice-chairmen Morgan Guaranty Trust Co and Lloyds Bank Plc, told the banks that the government was preparing a telex to explain and clarify the freeze on short-term credits.
+The telex could be sent to creditors as early as today, bankers said.
+Despite the rising tempers, bankers said there are no plans for Brazilian finance minister Dilson Funaro to meet commercial bankers during his trip to Washington on Friday and Saturday.
+Funaro will be explaining Brazil's actions to U.S. Treasury Secretary James Baker, Federal Reserve Board chairman Paul Volcker and International Monetary Fund managing director Michel Camdessus before travelling to Europe at the weekend.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/analysis/opennlp/src/tools/test-model-data/tokenizer.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/tokenizer.txt b/lucene/analysis/opennlp/src/tools/test-model-data/tokenizer.txt
new file mode 100644
index 0000000..e301d62
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/tokenizer.txt
@@ -0,0 +1,69 @@
+Iran announced tonight that its major offensive against Iraq in the Gulf war had ended after dealing savage blows against the Baghdad government<SPLIT>.
+The Iranian news agency IRNA<SPLIT>, in a report received in London<SPLIT>, said the operation code-named Karbala-5 launched into Iraq on January 9 was now over<SPLIT>.
+It quoted a joint statewment by the Iranian Army and Revolutionary Guards Corps as saying that their forces had "<SPLIT>dealt one of the severest blows on the Iraqi war machine in the history of the Iraq-imposed war<SPLIT>.<SPLIT>"
+The statement by the Iranian High Command appeared to herald the close of an assault on the port city of Basra in southern Iraq<SPLIT>.
+"<SPLIT>The operation was launched at a time when the Baghdad government was spreading extensive propaganda on the resistance power of its army<SPLIT>...<SPLIT>,<SPLIT>" said the statement quoted by IRNA<SPLIT>.
+It claimed massive victories in the seven-week offensive and called on supporters of Baghdad to "<SPLIT>come to their senses<SPLIT>" and discontinue support for what it called the tottering regime in Iraq<SPLIT>.
+Iran said its forces had "<SPLIT>liberated<SPLIT>" 155 square kilometers of enemy-occupied territory during the 1987 offensive and taken over islands<SPLIT>, townships<SPLIT>, rivers and part of a road leading into Basra<SPLIT>.
+The Iranian forces "<SPLIT>are in full control of these areas<SPLIT>,<SPLIT>" the statement said<SPLIT>.
+It said 81 Iraqi brigades and battalions were totally destroyed<SPLIT>, along with 700 tanks and 1,500 other vehicles<SPLIT>.
+
+U.S. bank discount window borrowings less extended credits averaged 310 mln dlrs in the week to Wednesday February 25<SPLIT>, the Federal Reserve said<SPLIT>.
+The Fed said that overall borrowings in the week fell 131 mln dlrs to 614 mln dlrs<SPLIT>, with extended credits up 10 mln dlrs at 304 mln dlrs<SPLIT>.
+The week was the second half of a two-week statement period<SPLIT>.
+Net borrowings in the prior week averaged 451 mln dlrs<SPLIT>.
+Commenting on the two-week statement period ended February 25<SPLIT>, the Fed said that banks had average net free reserves of 644 mln dlrs a day<SPLIT>, down from 1.34 billion two weeks earlier<SPLIT>.
+A Federal Reserve spokesman told a press briefing that there were no large single day net misses in the Fed's reserve projections in the week to Wednesday<SPLIT>.
+He said that natural float had been "<SPLIT>acting a bit strangely<SPLIT>" for this time of year<SPLIT>, noting that there had been poor weather during the latest week<SPLIT>.
+The spokesman said that natural float ranged from under 500 mln dlrs on Friday<SPLIT>, for which he could give no reason<SPLIT>, to nearly one billion dlrs on both Thursday and Wednesday<SPLIT>.
+The Fed spokeman could give no reason for Thursday's high float<SPLIT>, but he said that about 750 mln dlrs of Wednesday's float figure was due to holdover and transportation float at two widely separated Fed districts<SPLIT>.
+For the week as a whole<SPLIT>, he said that float related as of adjustments were "<SPLIT>small<SPLIT>,<SPLIT>" adding that they fell to a negative 750 mln dlrs on Tuesday due to a number of corrections for unrelated cash letter errors in six districts around the country<SPLIT>.
+The spokesman said that on both Tuesday and Wednesday<SPLIT>, two different clearing banks had system problems and the securities and Federal funds wires had to be held open until about 2000 or 2100 EST on both days<SPLIT>.
+However<SPLIT>, he said that both problems were cleared up during both afternoons and there was no evidence of any reserve impact<SPLIT>.
+During the week ended Wednesday<SPLIT>, 45 pct of net discount window borrowings were made by the smallest banks<SPLIT>, with 30 pct by the 14 large money center banks and 25 pct by large regional institutions<SPLIT>.
+On Wednesday<SPLIT>, 55 pct of the borrowing was accounted for by the money center banks<SPLIT>, with 30 pct by the large regionals and 15 pct by the smallest banks<SPLIT>.
+The Fed spokesman said the banking system had excess reserves on Thursday<SPLIT>, Monday and Tuesday and a deficit on Friday and Wedndsday<SPLIT>.
+That produced a small daily average deficit for the week as a whole<SPLIT>.
+For the two-week period<SPLIT>, he said there were relatively high excess reserves on a daily avearge<SPLIT>, almost all of which were at the smallest banks<SPLIT>.
+American Express Co remained silent on market rumors it would spinoff all or part of its Shearson Lehman Brothers Inc<SPLIT>, but some analysts said the company may be considering such a move because it is unhappy with the market value of its stock<SPLIT>.
+American Express stock got a lift from the rumor<SPLIT>, as the market calculated a partially public Shearson may command a good market value<SPLIT>, thereby boosting the total value of American Express<SPLIT>.
+The rumor also was accompanied by talk the financial services firm would split its stock and boost its dividend<SPLIT>.
+American Express closed on the New York Stock Exchange at 72-5/8<SPLIT>, up 4-1/8 on heavy volume<SPLIT>.
+American Express would not comment on the rumors or its stock activity<SPLIT>.
+Analysts said comments by the company at an analysts' meeting Tuesday helped fuel the rumors as did an announcement yesterday of management changes<SPLIT>.
+At the meeting<SPLIT>, company officials said American Express stock is undervalued and does not fully reflect the performance of Shearson<SPLIT>, according to analysts<SPLIT>.
+Yesterday<SPLIT>, Shearson said it was elevating its chief operating officer<SPLIT>, Jeffery Lane<SPLIT>, to the added position of president<SPLIT>, which had been vacant<SPLIT>.
+It also created four new positions for chairmen of its operating divisions<SPLIT>.
+Analysts speculated a partial spinoff would make most sense<SPLIT>, contrary to one variation on market rumors of a total spinoff<SPLIT>.
+Some analysts<SPLIT>, however<SPLIT>, disagreed that any spinoff of Shearson would be good since it is a strong profit center for American Express<SPLIT>, contributing about 20 pct of earnings last year<SPLIT>.
+"<SPLIT>I think it is highly unlikely that American Express is going to sell shearson<SPLIT>,<SPLIT>" said Perrin Long of Lipper Analytical<SPLIT>.
+He questioned what would be a better investment than "<SPLIT>a very profitable securities firm<SPLIT>.<SPLIT>"
+Several analysts said American Express is not in need of cash<SPLIT>, which might be the only reason to sell a part of a strong asset<SPLIT>.
+But others believe the company could very well of considered the option of spinning out part of Shearson<SPLIT>, and one rumor suggests selling about 20 pct of it in the market<SPLIT>.
+Larry Eckenfelder of Prudential-Bache Securities said he believes American Express could have considered a partial spinoff in the past<SPLIT>.
+"<SPLIT>Shearson being as profitable as it is would have fetched a big premium in the market place<SPLIT>.
+Some analysts said American Express could use capital since it plans to expand globally<SPLIT>.
+"<SPLIT>They've outlined the fact that they're investing heavily in the future<SPLIT>, which goes heavily into the international arena<SPLIT>,<SPLIT>" said Lewis<SPLIT>.
+Lewis said if American Express reduced its exposure to the brokerage business by selling part of shearson<SPLIT>, its stock might better reflect other assets<SPLIT>, such as the travel related services business<SPLIT>.
+Lewis said Shearson contributed 316 mln in after-tax operating earnings<SPLIT>, up from about 200 mln dlrs in 1985<SPLIT>.
+Coleco Industries Inc said it expects to return to profitability in 1987<SPLIT>.
+Earlier<SPLIT>, Coleco reported a net loss of 111.2 mln dlrs for the year ended December 31 compared to a profit of 64.2 mln dlrs in the year earlier<SPLIT>.
+In a prepared statement<SPLIT>, the company said the dramatic swing in operating results was due primarily to the steep decline in sales of Cabbage Patch Kids products from 600 mln dlrs to 230 mln dlrs<SPLIT>.
+Coleco said it changed from a single product company to a more diversified organization through four major acquisitions last year<SPLIT>.
+Products from the new acquisitions and other new product introductions are expected to enable it to return to profitability<SPLIT>, it said<SPLIT>.
+At the annual Toy Fair earlier this month<SPLIT>, vice president Morton Handel said analysts' 1987 projected earnings of 90 cts a share on sales of 600 mln dlrs are reasonable<SPLIT>.
+Azpurua said venezuela has shown solidarity with brazil's decision to suspend payments<SPLIT>, but each country must negotiate according to its own interest<SPLIT>.
+Azpurua said the governent plans to pay 2.1 billion dlrs in public and private debt principal this year<SPLIT>.
+It was due to amortize 1.05 billion dlrs under the rescheduling<SPLIT>, and pay 420 mln dlrs in non-restructured principal<SPLIT>, both public sector<SPLIT>.
+He said venezuela's original proposal was to pay no principal on restructured debt this year<SPLIT>, but is now insisting that if it makes payments they be compensated by new bank loans<SPLIT>.
+The banking sources said the committee has been prepared to lower amortizations to around 400 mln dlrs this year<SPLIT>, but that no direct commitment was likely on new loans<SPLIT>.
+At the same time<SPLIT>, he said the government is presently studying possible mechanisms for capitlizing public and private sector foreign debt<SPLIT>, based on experience in other countries<SPLIT>.
+The rules would be published by the finance ministry and the central bank<SPLIT>.
+
+Thomson McKinnon Mortgage Assets Corp<SPLIT>, a unit of Thomson McKinnon Inc<SPLIT>, is offering 100 mln dlrs of collateralized mortgage obligations in three tranches that include floating rate and inverse floating rate CMOS<SPLIT>.
+The floating rate class amounts to 60 mln dlrs<SPLIT>.
+The inverse floater totals 4.8 mln dlrs<SPLIT>.
+Subsequent rates on the inverse floater will equal 11-1/2 pct minus the product of three times (<SPLIT>LIBOR minus 6-1/2 pct<SPLIT>)<SPLIT>.
+A Thomson officer explained that the coupon of the inverse floating rate tranche would increase if LIBOR declined<SPLIT>.
+The fixed-rate tranche totals 35.2 mln dlrs<SPLIT>.
+The issue is rated AAA by Standard and Poor's and secured by Federal Home Loan Mortgage Corp<SPLIT>, Freddie Mac<SPLIT>, certificates<SPLIT>.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
----------------------------------------------------------------------
diff --git a/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java b/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
index 3e26965..f17cd51 100644
--- a/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
+++ b/lucene/core/src/test/org/apache/lucene/analysis/TestStopFilter.java
@@ -20,12 +20,8 @@ import java.io.IOException;
 import java.io.StringReader;
 import java.util.ArrayList;
 
-import org.apache.lucene.analysis.BaseTokenStreamTestCase;
-import org.apache.lucene.analysis.MockTokenizer;
-import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
 import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
-import org.apache.lucene.analysis.CharArraySet;
 import org.apache.lucene.util.English;
 
 public class TestStopFilter extends BaseTokenStreamTestCase {
@@ -111,9 +107,10 @@ public class TestStopFilter extends BaseTokenStreamTestCase {
                               7,
                               1,
                               null,
-                              true);    
+                              true,
+                              null);
   }
-  
+
   private void doTestStopPositons(StopFilter stpf) throws IOException {
     CharTermAttribute termAtt = stpf.getAttribute(CharTermAttribute.class);
     PositionIncrementAttribute posIncrAtt = stpf.getAttribute(PositionIncrementAttribute.class);

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/ivy-versions.properties
----------------------------------------------------------------------
diff --git a/lucene/ivy-versions.properties b/lucene/ivy-versions.properties
index 2478f85..35df7ae 100644
--- a/lucene/ivy-versions.properties
+++ b/lucene/ivy-versions.properties
@@ -161,6 +161,9 @@ org.apache.james.apache.mime4j.version = 0.7.2
 
 /org.apache.mina/mina-core = 2.0.0-M5
 
+/org.apache.opennlp/opennlp-maxent = 3.0.3
+/org.apache.opennlp/opennlp-tools = 1.8.3
+
 org.apache.pdfbox.version = 2.0.6
 /org.apache.pdfbox/fontbox = ${org.apache.pdfbox.version}
 /org.apache.pdfbox/jempbox = 1.8.13

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/licenses/opennlp-maxent-3.0.3.jar.sha1
----------------------------------------------------------------------
diff --git a/lucene/licenses/opennlp-maxent-3.0.3.jar.sha1 b/lucene/licenses/opennlp-maxent-3.0.3.jar.sha1
new file mode 100644
index 0000000..c3c412f
--- /dev/null
+++ b/lucene/licenses/opennlp-maxent-3.0.3.jar.sha1
@@ -0,0 +1 @@
+55e39e6b46e71f35229cdd6950e72d8cce3b5fd4

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/licenses/opennlp-maxent-LICENSE-ASL.txt
----------------------------------------------------------------------
diff --git a/lucene/licenses/opennlp-maxent-LICENSE-ASL.txt b/lucene/licenses/opennlp-maxent-LICENSE-ASL.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/lucene/licenses/opennlp-maxent-LICENSE-ASL.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/licenses/opennlp-maxent-NOTICE.txt
----------------------------------------------------------------------
diff --git a/lucene/licenses/opennlp-maxent-NOTICE.txt b/lucene/licenses/opennlp-maxent-NOTICE.txt
new file mode 100644
index 0000000..9b97287
--- /dev/null
+++ b/lucene/licenses/opennlp-maxent-NOTICE.txt
@@ -0,0 +1,6 @@
+
+Apache OpenNLP Maxent
+Copyright 2013 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/3e2f9e62/lucene/licenses/opennlp-tools-1.8.3.jar.sha1
----------------------------------------------------------------------
diff --git a/lucene/licenses/opennlp-tools-1.8.3.jar.sha1 b/lucene/licenses/opennlp-tools-1.8.3.jar.sha1
new file mode 100644
index 0000000..c6a7549
--- /dev/null
+++ b/lucene/licenses/opennlp-tools-1.8.3.jar.sha1
@@ -0,0 +1 @@
+3ce7c9056048f55478d983248cf18c7e02b1d072


[10/12] lucene-solr:branch_7x: LUCENE-2899: Add OpenNLP Analysis capabilities as a module

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/tools/test-model-data/chunks.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/chunks.txt b/lucene/analysis/opennlp/src/tools/test-model-data/chunks.txt
new file mode 100644
index 0000000..f02c5b7
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/chunks.txt
@@ -0,0 +1,3566 @@
+Iran NNP B-NP
+announced VBD B-VP
+tonight NN B-NP
+that IN B-PP
+its NNS B-NP
+major JJ B-NP
+offensive NN I-NP
+against IN B-PP
+Iraq NNP B-NP
+in IN B-PP
+the DT B-NP
+Gulf NNP I-NP
+war NN I-NP
+had VBD B-VP
+ended VBN I-VP
+after IN B-PP
+dealing VBG B-VP
+savage JJ B-NP
+blows NNS I-NP
+against IN B-PP
+the DT B-NP
+Baghdad NNP I-NP
+government NN I-NP
+. . O
+The DT B-NP
+Iranian JJ I-NP
+news NN I-NP
+agency NN I-NP
+IRNA NNP I-NP
+, , O
+in IN B-PP
+a DT B-NP
+report NN I-NP
+received VBN B-VP
+in IN B-PP
+London NNP B-NP
+, , O
+said VBD B-VP
+the DT B-NP
+operation NN I-NP
+code-named VBN B-VP
+Karbala-5 CD B-NP
+launched VBD B-VP
+into IN B-PP
+Iraq NNP B-NP
+on IN B-PP
+January NNP B-NP
+9 CD I-NP
+was VBD B-VP
+now RB B-ADVP
+over RP B-NP
+. . O
+It PRP B-NP
+quoted VBD B-VP
+a DT B-NP
+joint NN I-NP
+statewment NN I-NP
+by IN B-PP
+the DT B-NP
+Iranian JJ I-NP
+Army NNP I-NP
+and CC I-NP
+Revolutionary NNP I-NP
+Guards NNPS I-NP
+Corps NNP I-NP
+as IN B-PP
+saying VBG B-VP
+that IN B-SBAR
+their DT B-NP
+forces NNS I-NP
+had VBD B-VP
+" JJ B-NP
+dealt VBD B-VP
+one CD B-NP
+of IN B-PP
+the DT B-NP
+severest JJS I-NP
+blows NNS I-NP
+on IN B-PP
+the DT B-NP
+Iraqi JJ I-NP
+war NN I-NP
+machine NN I-NP
+in IN B-PP
+the DT B-NP
+history NN I-NP
+of IN B-PP
+the DT B-NP
+Iraq-imposed JJ I-NP
+war NN I-NP
+. . O
+" NN B-VP
+The DT B-NP
+statement NN I-NP
+by IN B-PP
+the DT B-NP
+Iranian JJ I-NP
+High NNP I-NP
+Command NNP I-NP
+appeared VBD B-VP
+to TO I-VP
+herald VB I-VP
+the DT B-NP
+close NN I-NP
+of IN B-PP
+an DT B-NP
+assault NN I-NP
+on IN B-PP
+the DT B-NP
+port JJ I-NP
+city NN I-NP
+of IN B-PP
+Basra NNP B-NP
+in IN B-PP
+southern JJ B-NP
+Iraq NNP I-NP
+. . O
+" NN B-VP
+The DT B-NP
+operation NN I-NP
+was VBD B-VP
+launched VBN I-VP
+at IN B-PP
+a DT B-NP
+time NN I-NP
+when WRB B-ADVP
+the DT B-NP
+Baghdad NNP I-NP
+government NN I-NP
+was VBD B-VP
+spreading VBG I-VP
+extensive JJ B-NP
+propaganda NN I-NP
+on IN B-PP
+the DT B-NP
+resistance NN I-NP
+power NN I-NP
+of IN B-PP
+its NNS B-NP
+army NN I-NP
+... NNS I-NP
+, , O
+" NNS B-NP
+said VBD B-VP
+the DT B-NP
+statement NN I-NP
+quoted VBN B-VP
+by IN B-PP
+IRNA NNP B-NP
+. . O
+It PRP B-NP
+claimed VBD B-VP
+massive JJ B-NP
+victories NNS I-NP
+in IN B-PP
+the DT B-NP
+seven-week NN I-NP
+offensive NN I-NP
+and CC O
+called VBN B-VP
+on IN B-PP
+supporters NNS B-NP
+of IN B-SBAR
+Baghdad NNP B-NP
+to TO B-VP
+" VB I-VP
+come VBN I-VP
+to TO B-PP
+their IN B-NP
+senses JJ I-NP
+" NNS I-NP
+and CC O
+discontinue VB B-VP
+support NN B-NP
+for IN B-PP
+what WP B-NP
+it PRP B-NP
+called VBD B-VP
+the DT B-NP
+tottering VBG I-NP
+regime NN I-NP
+in IN B-PP
+Iraq NNP B-NP
+. . I-NP
+Iran NNP I-NP
+said VBD B-VP
+its NNS B-NP
+forces NNS I-NP
+had VBD B-VP
+" CD B-NP
+liberated JJ I-NP
+" NN I-NP
+155 CD I-NP
+square JJ I-NP
+kilometers NNS I-NP
+of IN B-PP
+enemy-occupied JJ-occupied B-NP
+territory NN I-NP
+during IN B-PP
+the DT B-NP
+1987 CD I-NP
+offensive NN I-NP
+and CC O
+taken VBN B-VP
+over IN B-PP
+islands NNS B-NP
+, , O
+townships NNS B-NP
+, , O
+rivers NNS B-NP
+and CC O
+part NN B-NP
+of IN B-PP
+a DT B-NP
+road NN I-NP
+leading VBG B-VP
+into IN B-PP
+Basra NNP B-NP
+. . O
+The DT B-NP
+Iranian JJ I-NP
+forces NNS I-NP
+" NNS I-NP
+are VBP B-VP
+in IN B-PP
+full JJ B-NP
+control NN I-NP
+of IN B-PP
+these DT B-NP
+areas NNS I-NP
+, , O
+" NNS B-NP
+the DT B-NP
+statement NN I-NP
+said VBD B-VP
+. . O
+It PRP B-NP
+said VBD B-VP
+81 CD B-NP
+Iraqi JJ I-NP
+brigades NNS I-NP
+and CC I-NP
+battalions NNS I-NP
+were VBD B-VP
+totally RB I-VP
+destroyed VBN I-VP
+, , O
+along IN B-ADVP
+with IN B-PP
+700 CD B-NP
+tanks NNS I-NP
+and CC O
+1,500 CD B-NP
+other JJ I-NP
+vehicles NNS I-NP
+. . O
+The DT B-NP
+victory NN I-NP
+list NN I-NP
+also RB B-ADVP
+included VBD B-VP
+80 CD B-NP
+warplanes NNS I-NP
+downed VBD B-VP
+, , O
+250 CD B-NP
+anti- - I-NP
+aircraft NN I-NP
+guns NNS I-NP
+and CC O
+400 CD B-NP
+pieces NNS I-NP
+of IN B-PP
+military JJ B-NP
+hardware NN I-NP
+destroyed VBN B-VP
+and CC O
+the DT B-NP
+seizure NN I-NP
+of IN B-PP
+220 CD B-NP
+tanks NNS I-NP
+and CC O
+armoured JJ B-NP
+personnel NNS I-NP
+carriers NNS I-NP
+. . O
+U.S. NNP O
+bank NN I-NP
+discount NN I-NP
+window RB I-NP
+borrowings NNS I-NP
+less NNS I-NP
+extended VBN B-NP
+credits NN I-NP
+averaged VBD B-VP
+310 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+in IN B-PP
+the DT B-NP
+week NN I-NP
+to TO B-PP
+Wednesday NNP B-NP
+February NNP I-NP
+25 CD I-NP
+, , O
+the DT B-NP
+Federal JJ I-NP
+Reserve NNP I-NP
+said VBD B-VP
+. . O
+The DT B-NP
+Fed JJ I-NP
+said VBD B-VP
+that IN B-SBAR
+overall JJ B-NP
+borrowings NNS I-NP
+in IN B-PP
+the DT B-NP
+week NN I-NP
+fell MD B-VP
+131 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+to TO B-PP
+614 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+, , O
+with IN B-PP
+extended VBN B-NP
+credits NN I-NP
+up IN B-PP
+10 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+at IN B-PP
+304 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+. . O
+The DT B-NP
+week NN I-NP
+was VBD B-VP
+the DT B-NP
+second NN I-NP
+half NN I-NP
+of IN B-PP
+a DT B-NP
+two-week NN I-NP
+statement NN I-NP
+period. NNS I-NP
+Net VBD B-VP
+borrowings NNS B-NP
+in IN B-PP
+the DT B-NP
+prior NN I-NP
+week NN I-NP
+averaged RB B-NP
+451 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+. . O
+Commenting NNP O
+on IN B-PP
+the DT B-NP
+two-week NN I-NP
+statement NN I-NP
+period NNS I-NP
+ended VBD B-VP
+February NNP B-NP
+25 CD I-NP
+, , O
+the DT B-NP
+Fed NNP I-NP
+said VBD B-VP
+that NN B-SBAR
+banks NNS B-NP
+had VBD B-VP
+average JJ B-NP
+net NN I-NP
+free JJ I-NP
+reserves NN I-NP
+of IN B-PP
+644 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+a DT B-NP
+day NN I-NP
+, , O
+down IN B-PP
+from JJ B-NP
+1.34 NN I-NP
+billion NN I-NP
+two RB B-NP
+weeks NNS I-NP
+earlier IN B-ADVP
+. . O
+A RB B-ADJP
+Federal JJ I-ADJP
+Reserve . B-NP
+spokesman NN B-VP
+told VBN I-VP
+a DT B-NP
+press NN I-NP
+briefing VBG B-VP
+that IN B-SBAR
+there EX B-NP
+were VBD B-VP
+no RB B-NP
+large JJ I-NP
+single NN I-NP
+day NN I-NP
+net RB I-NP
+misses NNS I-NP
+in IN B-PP
+the DT B-NP
+Fed's default I-NP
+reserve NN I-NP
+projections NNS I-NP
+in IN B-PP
+the DT B-NP
+week NN I-NP
+to TO B-PP
+Wednesday NNP B-NP
+. . I-NP
+He NNP I-NP
+said VBD B-VP
+that NN B-NP
+natural JJ I-NP
+float NN I-NP
+had VBD B-VP
+been VBN I-VP
+" NN B-NP
+acting VBG B-VP
+a DT B-NP
+bit NN I-NP
+strangely RB B-VP
+" VBN I-VP
+for IN B-PP
+this DT B-NP
+time NN I-NP
+of IN B-PP
+year NN B-NP
+, , O
+noting VBG B-VP
+that IN B-SBAR
+there EX B-NP
+had VBD B-VP
+been VBN I-VP
+poor JJ B-NP
+weather NN I-NP
+during IN B-PP
+the DT B-NP
+latest JJ I-NP
+week NN I-NP
+. . O
+The DT B-NP
+spokesman NN I-NP
+said VBD B-VP
+that IN B-SBAR
+natural JJ B-NP
+float NN I-NP
+ranged VBN B-VP
+from IN B-PP
+under IN B-NP
+500 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+on IN B-PP
+Friday NNP B-NP
+, , O
+for IN B-PP
+which NNP B-NP
+he NN B-NP
+could VBN B-VP
+give JJ B-NP
+no RB I-NP
+reason NN I-NP
+, , O
+to TO B-PP
+nearly JJ B-NP
+one CD I-NP
+billion IN B-PP
+dlrs NN B-NP
+on IN B-PP
+both NN B-NP
+Thursday default B-NP
+and CC O
+Wednesday default B-NP
+. . O
+The DT B-NP
+Fed JJ I-NP
+spokeman NN I-NP
+could VBN B-VP
+give JJ B-NP
+no NN I-NP
+reason NN I-NP
+for IN B-PP
+Thursday's NNP B-NP
+high NN I-NP
+float NNS I-NP
+, , O
+but NNS B-NP
+he DT B-NP
+said VBD B-VP
+that IN B-PP
+about NN B-NP
+750 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+of IN B-PP
+Wednesday's NNP B-NP
+float NN I-NP
+figure NNS I-NP
+was VBD B-VP
+due VBD I-VP
+to TO I-VP
+holdover VB I-VP
+and CC O
+transportation NN B-VP
+float IN B-PRT
+at IN B-PP
+two NN B-NP
+widely WDT I-NP
+separated VBN B-VP
+Fed VBN B-NP
+districts NNS I-NP
+. . O
+For NNP O
+the DT B-NP
+week NN I-NP
+as IN B-PP
+a DT B-NP
+whole NN I-NP
+, , O
+he DT B-NP
+said VBD B-VP
+that IN B-SBAR
+float NN B-NP
+related VBN B-VP
+as IN B-PP
+of NNP B-NP
+adjustments NNS I-NP
+were VBD B-VP
+" RB B-ADJP
+small JJ I-ADJP
+, , O
+" IN B-PP
+adding VBG B-VP
+that IN B-SBAR
+they NN B-NP
+fell NN I-NP
+to TO B-PP
+a DT B-NP
+negative JJ I-NP
+750 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+on IN B-PP
+Tuesday NNP B-NP
+due NN I-NP
+to TO B-PP
+a DT B-NP
+number NN I-NP
+of IN B-PP
+corrections NN B-NP
+for IN B-PP
+unrelated VBN B-NP
+cash NN I-NP
+letter IN B-PP
+errors NNS B-NP
+in IN B-PP
+six JJ B-NP
+districts NNS I-NP
+around IN B-PP
+the DT B-NP
+country NN I-NP
+. . O
+The DT B-NP
+spokesman NN I-NP
+said VBD B-VP
+that NN B-NP
+on IN B-PP
+both JJ B-NP
+Tuesday NNP I-NP
+and CC I-NP
+Wednesday NNP B-NP
+, , O
+two IN B-PP
+different JJ B-NP
+clearing NN I-NP
+banks NNS I-NP
+had VBD B-VP
+system JJ B-NP
+problems NNS I-NP
+and CC O
+the DT B-NP
+securities NNS I-NP
+and CC I-NP
+Federal JJ I-NP
+funds NNS I-NP
+wires NNS I-NP
+had VBD B-VP
+to TO I-VP
+be VB I-VP
+held VBN I-VP
+open JJ B-NP
+until NNS I-NP
+about IN B-PP
+2000 CD B-NP
+or NNP I-NP
+2100 CD I-NP
+EST NNS I-NP
+on IN B-PP
+both JJ B-NP
+days NN I-NP
+. . O
+However NNP B-NP
+, , O
+he CD B-NP
+said VBD B-VP
+that IN B-SBAR
+both NNP B-NP
+problems NN I-NP
+were VBD B-VP
+cleared VBN I-VP
+up IN B-ADVP
+during VBG B-VP
+both IN B-PP
+afternoons NNS B-NP
+and CC O
+there DT B-NP
+was VBD B-VP
+no RB B-ADJP
+evidence JJ I-ADJP
+of IN B-PP
+any DT B-NP
+reserve JJ I-NP
+impact NN I-NP
+. . O
+During VBG B-VP
+the DT B-NP
+week NN I-NP
+ended VBN B-VP
+Wednesday NNP B-NP
+, , O
+45 CD B-NP
+pct NN I-NP
+of IN B-PP
+net JJ B-NP
+discount NN I-NP
+window NN I-NP
+borrowings NNS I-NP
+were VBD B-VP
+made JJ B-ADJP
+by IN B-PP
+the DT B-NP
+smallest NN I-NP
+banks NNS I-NP
+, , O
+with IN B-PP
+30 CD B-NP
+pct NN I-NP
+by IN B-PP
+the DT B-NP
+14 CD I-NP
+large RB I-NP
+money JJ I-NP
+center NN I-NP
+banks NNS I-NP
+and CC O
+25 CD B-NP
+pct NN I-NP
+by IN B-PP
+large JJ B-NP
+regional NN I-NP
+institutions NNS I-NP
+. . O
+On NNP B-NP
+Wednesday NNP I-NP
+, , O
+55 CD B-NP
+pct NN I-NP
+of IN B-PP
+the DT B-NP
+borrowing NN I-NP
+was VBD B-VP
+accounted VBN I-VP
+for IN B-PP
+by IN B-PP
+the DT B-NP
+money NN I-NP
+center NN I-NP
+banks NNS I-NP
+, , O
+with IN B-PP
+30 CD B-NP
+pct NN I-NP
+by IN B-PP
+the DT B-NP
+large JJ I-NP
+regionals NN I-NP
+and CC O
+15 CD B-NP
+pct NN I-NP
+by IN B-PP
+the DT B-NP
+smallest JJ I-NP
+banks NNS I-NP
+. . O
+The DT B-NP
+Fed JJ I-NP
+spokesman NN I-NP
+said VBD B-VP
+the DT B-NP
+banking NN I-NP
+system IN B-NP
+had VBD B-VP
+excess VBZ B-NP
+reserves NN I-NP
+on IN B-PP
+Thursday NNP B-NP
+, , O
+Monday NNP B-NP
+and CC I-NP
+Tuesday NNP I-NP
+and CC O
+a DT B-NP
+deficit NN I-NP
+on IN B-PP
+Friday NNP B-NP
+and CC O
+Wedndsday NNP B-NP
+. . I-NP
+That NNP I-NP
+produced VBD B-VP
+a DT B-NP
+small JJ I-NP
+daily NN I-NP
+average JJ I-NP
+deficit NN I-NP
+for IN B-PP
+the DT B-NP
+week NN I-NP
+as IN B-PP
+a DT B-NP
+whole NN I-NP
+. . B-VP
+For NNP B-PP
+the DT B-NP
+two-week NN I-NP
+period NNS I-NP
+, , O
+he NNS B-NP
+said VBD B-VP
+there EX B-NP
+were VBD B-VP
+relatively JJ B-NP
+high NN I-NP
+excess VBZ B-VP
+reserves NN B-NP
+on IN B-PP
+a DT B-NP
+daily JJ I-NP
+avearge NN I-NP
+, , O
+almost IN B-PP
+all DT B-NP
+of IN B-PP
+which CD B-NP
+were VBD B-VP
+at IN B-PP
+the DT B-NP
+smallest JJ I-NP
+banks NNS I-NP
+. . O
+Reuter IN B-PP
+&#3; CD B-NP
+American RB I-NP
+Express JJ I-NP
+Co NNP I-NP
+remained VBN I-NP
+silent NN I-NP
+on IN B-PP
+market NN B-NP
+rumors NN I-NP
+it PRP B-NP
+would VBD B-VP
+spinoff IN B-PP
+all DT B-NP
+or JJ I-NP
+part NN I-NP
+of IN B-PP
+its NNS B-NP
+Shearson NNP I-NP
+Lehman NNP I-NP
+Brothers NNS I-NP
+Inc NNP I-NP
+, , O
+but IN B-SBAR
+some DT B-NP
+analysts NNS I-NP
+said VBD B-VP
+the DT B-NP
+company NN I-NP
+may NN I-NP
+be VB B-VP
+considering NN B-NP
+such IN B-PP
+a DT B-NP
+move JJ I-NP
+because NN I-NP
+it PRP B-NP
+is VBZ B-VP
+unhappy NN B-NP
+with IN B-PP
+the DT B-NP
+market JJ I-NP
+value NN I-NP
+of IN B-PP
+its NNS B-NP
+stock NN I-NP
+. . B-ADVP
+American RB B-NP
+Express JJ I-NP
+stock NN I-NP
+got NN I-NP
+a DT B-NP
+lift NN I-NP
+from WRB B-ADVP
+the DT B-NP
+rumor NN I-NP
+, , O
+as IN B-SBAR
+the DT B-NP
+market NN I-NP
+calculated VBN B-VP
+a DT B-NP
+partially JJ I-NP
+public NN I-NP
+Shearson IN B-PP
+may NN B-NP
+command VBN B-VP
+a DT B-NP
+good JJ I-NP
+market NN I-NP
+value NN I-NP
+, , O
+thereby IN B-PP
+boosting VBG B-VP
+the DT B-NP
+total JJ I-NP
+value NN I-NP
+of IN B-PP
+American NNP B-NP
+Express default I-NP
+. . O
+The DT B-NP
+rumor NN I-NP
+also NN I-NP
+was VBD B-VP
+accompanied VBN I-VP
+by IN B-PP
+talk NN B-NP
+the DT B-NP
+financial JJ I-NP
+services NNS I-NP
+firm IN B-PP
+would JJ B-NP
+split NN I-NP
+its NNS I-NP
+stock IN B-PP
+and CC O
+boost JJ B-NP
+its NNS I-NP
+dividend VBD B-VP
+. . O
+American RB O
+Express VBZ B-VP
+closed VBN I-VP
+on IN B-PP
+the DT B-NP
+New JJ I-NP
+York NNP I-NP
+Stock NNP I-NP
+Exchange VBD B-VP
+at IN B-PP
+72-5/8 CD B-NP
+, , O
+up IN B-PP
+4-1/8 NN B-NP
+on IN B-PP
+heavy NN B-NP
+volume default I-NP
+. . B-ADVP
+American RB B-ADJP
+Express JJ I-ADJP
+would VBD B-VP
+not IN B-PP
+comment NN B-NP
+on IN B-PP
+the DT B-NP
+rumors NN I-NP
+or IN B-PP
+its NNS B-NP
+stock NN I-NP
+activity NN I-NP
+. . O
+Analysts NNS B-NP
+said VBD B-VP
+comments VBN I-VP
+by IN B-PP
+the DT B-NP
+company NN I-NP
+at IN B-PP
+an DT B-NP
+analysts' NN I-NP
+meeting VBG B-VP
+Tuesday default B-NP
+helped VBN I-NP
+fuel JJ B-NP
+the DT I-NP
+rumors NN I-NP
+as IN B-PP
+did NN B-NP
+an DT B-NP
+announcement JJ I-NP
+yesterday NN I-NP
+of IN B-PP
+management JJ B-NP
+changes NNS I-NP
+. . O
+At RB O
+the DT B-NP
+meeting VBG I-NP
+, , I-NP
+company NN I-NP
+officials IN B-NP
+said VBD B-VP
+American RB B-NP
+Express JJ I-NP
+stock NN I-NP
+is VBZ B-VP
+undervalued VBN I-VP
+and CC O
+does NNS B-VP
+not NN B-NP
+fully NN I-NP
+reflect NN B-VP
+the DT B-NP
+performance NN I-NP
+of IN B-PP
+Shearson NNP B-NP
+, , O
+according IN B-PP
+to TO B-PP
+analysts NNS B-NP
+. . O
+Yesterday NNP B-NP
+, , O
+Shearson NNP B-NP
+said VBD B-VP
+it PRP B-NP
+was VBD B-VP
+elevating VBG I-VP
+its NNS B-NP
+chief NNP I-NP
+operating VBG I-NP
+officer IN I-NP
+, , O
+Jeffery NNP B-NP
+Lane NNP I-NP
+, , O
+to TO B-PP
+the DT B-NP
+added JJ I-NP
+position NN I-NP
+of IN B-PP
+president NN B-NP
+, , O
+which IN B-NP
+had VBD B-VP
+been VBN I-VP
+vacant NN B-NP
+. . O
+It PRP B-NP
+also RB I-VP
+created VBN I-VP
+four IN B-PP
+new JJ B-NP
+positions NNS I-NP
+for IN B-PP
+chairmen NN B-NP
+of IN B-PP
+its NNS B-NP
+operating VBG I-NP
+divisions NNS I-NP
+. . O
+Analysts NNS B-NP
+speculated VBD B-VP
+a DT B-NP
+partial JJ I-NP
+spinoff NNP I-NP
+would VBD B-VP
+make NN B-NP
+most NN I-NP
+sense NNS I-NP
+, , O
+contrary JJ B-ADJP
+to TO B-PP
+one CD B-NP
+variation NN I-NP
+on IN B-PP
+market JJ B-NP
+rumors NN I-NP
+of IN B-PP
+a DT B-NP
+total JJ I-NP
+spinoff NNP I-NP
+. . O
+Some DT B-NP
+analysts NNS I-NP
+, , O
+however NNS B-NP
+, , O
+disagreed VBD B-VP
+that IN B-PP
+any JJ B-NP
+spinoff NN I-NP
+of IN B-PP
+Shearson NNP B-NP
+would VBD B-VP
+be VB I-VP
+good NN B-NP
+since IN B-SBAR
+it PRP B-NP
+is VBZ B-VP
+a DT B-NP
+strong VBG I-NP
+profit NN I-NP
+center NN I-NP
+for IN B-PP
+American NNP B-NP
+Express NNS I-NP
+, , O
+contributing VBG B-VP
+about IN B-NP
+20 CD I-NP
+pct NN I-NP
+of IN B-PP
+earnings NNS B-NP
+last JJ B-NP
+year NN I-NP
+. . O
+" NN B-NP
+I IN B-PP
+think NN B-NP
+it PRP B-NP
+is VBZ B-VP
+highly RB O
+unlikely JJ B-NP
+that NN I-NP
+American RB B-NP
+Express JJ I-NP
+is VBZ B-VP
+going VBG I-VP
+to TO B-PP
+sell JJ B-NP
+shearson NN I-NP
+, , O
+" IN B-NP
+said VBD B-VP
+Perrin CD B-NP
+Long VBG I-NP
+of IN B-PP
+Lipper NNP B-NP
+Analytical default I-NP
+. . O
+He JJ I-VP
+questioned VBD I-VP
+what IN B-NP
+would VBN B-VP
+be VB I-VP
+a DT B-NP
+better NN I-NP
+investment NN I-NP
+than NN I-NP
+" RB B-NP
+a DT I-NP
+very NN I-NP
+profitable NN I-NP
+securities NNS I-NP
+firm IN B-PP
+. . B-NP
+" NN I-NP
+Several JJ I-NP
+analysts NNS I-NP
+said VBD B-VP
+American RB B-ADJP
+Express JJ I-ADJP
+is VBZ B-VP
+not RB O
+in IN B-PP
+need JJ B-NP
+of IN B-PP
+cash NNP B-NP
+, , O
+which IN B-PP
+might NN B-NP
+be VB B-VP
+the DT B-NP
+only JJ I-NP
+reason NN I-NP
+to TO B-VP
+sell JJ I-VP
+a DT B-NP
+part NN I-NP
+of IN B-PP
+a DT B-NP
+strong NN I-NP
+asset IN B-PP
+. . B-NP
+But JJ I-NP
+others NNS I-NP
+believe VBP B-VP
+the DT B-NP
+company NN I-NP
+could VBN B-VP
+very JJ B-ADVP
+well RB B-ADVP
+of IN B-ADVP
+considered VBD B-VP
+the DT B-NP
+option NN I-NP
+of IN B-PP
+spinning VBG B-VP
+out JJ B-NP
+part NN I-NP
+of IN B-PP
+Shearson NNP B-NP
+, , O
+and CC O
+one JJ B-NP
+rumor NN I-NP
+suggests NNS I-NP
+selling VBG B-VP
+about IN B-NP
+20 CD I-NP
+pct NN I-NP
+of IN B-PP
+it PRP B-NP
+in IN B-PP
+the DT B-NP
+market NN I-NP
+. . O
+Larry JJ O
+Eckenfelder . O
+of IN B-PP
+Prudential-Bache DT B-NP
+Securities NNS I-NP
+said VBD B-VP
+he DT B-NP
+believes NN I-NP
+American RB B-VP
+Express VBN I-VP
+could VBN I-VP
+have VBP B-VP
+considered VBN I-VP
+a DT B-NP
+partial JJ I-NP
+spinoff NN I-NP
+in IN B-PP
+the DT B-NP
+past NN I-NP
+. . O
+" IN B-PP
+Shearson NNP B-NP
+being NN I-NP
+as IN B-PP
+profitable NN B-NP
+as IN B-SBAR
+it PRP B-NP
+is VBZ B-VP
+would VBD I-VP
+have VBP I-VP
+fetched VBN I-VP
+a DT B-NP
+big NN I-NP
+premium NN I-NP
+in IN B-PP
+the DT B-NP
+market NN I-NP
+place. NN I-NP
+Shearson's NNP I-NP
+book NN I-NP
+value NN I-NP
+is VBZ B-VP
+in IN B-PP
+the DT B-NP
+1.4 CD I-NP
+mln NN I-NP
+dlr IN B-PP
+range NN B-NP
+. . O
+Shearson NNP O
+in IN B-PP
+the DT B-NP
+market NN I-NP
+place NN I-NP
+would MD B-VP
+probably RB I-VP
+be VB I-VP
+worth RB B-ADVP
+three DT B-NP
+to TO I-NP
+3.5 CD I-NP
+bilion NN I-NP
+dlrs NN I-NP
+in IN B-PP
+terms NN B-NP
+of IN B-PP
+market JJ B-NP
+capitalization NN I-NP
+, , O
+" IN B-NP
+said VBD B-VP
+Eckenfelder CD B-NP
+. . O
+Some DT B-NP
+analysts NNS I-NP
+said VBD B-VP
+American RB B-NP
+Express JJ I-NP
+could VBN B-VP
+use IN B-PP
+capital JJ B-NP
+since NN I-NP
+it PRP B-NP
+plans VBD B-VP
+to TO I-VP
+expand NNS B-NP
+globally JJ B-ADJP
+. . O
+" NNS B-VP
+They NNP B-NP
+have VBP B-VP
+enormous NNS B-NP
+internal JJ B-NP
+growth NNS I-NP
+plans NNS I-NP
+that IN B-PP
+takes NNS B-NP
+capital JJ B-ADJP
+. . O
+You NNP B-NP
+want NN I-NP
+your NN I-NP
+stock RB B-ADVP
+to TO B-PP
+reflect JJ B-NP
+realistic NN I-NP
+valuations NNS I-NP
+to TO B-PP
+enhance JJ B-NP
+your NN I-NP
+ability NN I-NP
+to TO B-PP
+make JJ B-NP
+all DT I-NP
+kinds NN I-NP
+of IN B-PP
+endeavors NNS B-NP
+down IN B-PP
+the DT B-NP
+road NN I-NP
+, , O
+" IN B-NP
+said VBD B-VP
+E.F. CD B-NP
+Hutton NNP I-NP
+Group NNP I-NP
+analyst IN B-PP
+Michael default B-NP
+Lewis default I-NP
+. . B-NP
+" NN I-NP
+They've DT B-NP
+outlined VBD B-VP
+the DT B-NP
+fact NN I-NP
+that IN B-SBAR
+they're DT B-NP
+investing VBG I-NP
+heavily NN I-NP
+in IN B-PP
+the DT B-NP
+future NNS I-NP
+, , O
+which IN B-PP
+goes NNS B-NP
+heavily NN I-NP
+into IN B-PP
+the DT B-NP
+international JJ I-NP
+arena, NN I-NP
+" NN I-NP
+said VBD B-VP
+Lewis CD B-NP
+. . O
+" default B-VP
+. . I-VP
+..That . O
+does NNS B-VP
+not NN B-NP
+preclude NN I-NP
+acquisitions NNS I-NP
+and CC O
+divestitures NNS B-NP
+along IN B-PP
+the DT B-NP
+way NN I-NP
+, , O
+" IN B-PP
+he DT B-NP
+said VBD I-NP
+. . O
+Lewis VBZ O
+said VBD B-VP
+if CD B-NP
+American RB I-NP
+Express JJ I-NP
+reduced VBN I-NP
+its NNS I-NP
+exposure NN I-NP
+to TO B-PP
+the DT B-NP
+brokerage NN I-NP
+business NNS I-NP
+by IN B-PP
+selling VBG B-VP
+part NN B-NP
+of IN B-PP
+shearson NN B-NP
+, , O
+its NNS B-NP
+stock NN I-NP
+might NN I-NP
+better IN B-PP
+reflect NN B-NP
+other IN B-PP
+assets NNS B-NP
+, , O
+such NNS B-NP
+as IN B-PP
+the DT B-NP
+travel NN I-NP
+related VBN I-NP
+services NNS I-NP
+business NNS I-NP
+. . O
+" NN B-VP
+It PRP B-NP
+could VBD B-VP
+find CD B-NP
+its NNS I-NP
+true VBD B-VP
+water IN B-PP
+mark NN B-NP
+with IN B-PP
+a DT B-NP
+lesser JJ I-NP
+exposure NN I-NP
+to TO B-VP
+brokerage VB I-VP
+. . O
+The DT B-NP
+value NN I-NP
+of IN B-PP
+the DT B-NP
+other NN I-NP
+components NNP-named I-NP
+could VBN B-VP
+command VBN I-VP
+a DT B-NP
+higher NN I-NP
+multiple WRB B-ADVP
+because NN B-NP
+they NN I-NP
+constitute VBD B-VP
+a DT B-NP
+higher NN I-NP
+percentage NN I-NP
+of IN B-PP
+the DT B-NP
+total NN I-NP
+operating IN B-PP
+earnings NNS B-NP
+of IN B-PP
+the DT B-NP
+company NN I-NP
+, , O
+" IN B-PP
+he DT B-NP
+said VBD I-NP
+. . O
+Lewis VBZ O
+said VBD B-VP
+Shearson CD B-NP
+contributed VBN B-VP
+316 CD B-NP
+mln NN I-NP
+in IN B-PP
+after-tax JJ B-NP
+operating VBG I-NP
+earnings NNS I-NP
+, , O
+up NNS B-NP
+from IN B-PP
+about NN B-NP
+200 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+in IN B-PP
+1985 default B-NP
+. . O
+Reuter IN B-PP
+&#3; CD B-NP
+Coleco NNP I-NP
+Industries NNP I-NP
+Inc NNP I-NP
+said VBD B-VP
+it PRP B-NP
+expects NNS B-VP
+to TO B-NP
+return JJ I-VP
+to TO B-PP
+profitability NN B-NP
+in IN B-PP
+1987 default B-NP
+. . O
+Earlier NNP B-NP
+, , O
+Coleco NNP B-NP
+reported VBN B-VP
+a DT B-NP
+net JJ I-NP
+loss CD I-NP
+of IN B-PP
+111.2 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+for IN B-PP
+the DT B-NP
+year NN I-NP
+ended VBN B-VP
+December IN B-PP
+31 CD B-NP
+compared VBN B-VP
+to TO B-PP
+a DT B-NP
+profit NN I-NP
+of IN B-PP
+64.2 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+in IN B-PP
+the DT B-NP
+year NN I-NP
+earlier IN B-PP
+. . B-NP
+In IN B-PP
+a DT B-NP
+prepared JJ I-NP
+statement NN I-NP
+, , O
+the DT B-NP
+company NN I-NP
+said VBD B-VP
+the DT B-NP
+dramatic NN I-NP
+swing IN B-PP
+in IN B-PP
+operating VBG B-NP
+results NNS I-NP
+was VBD B-VP
+due JJ B-NP
+primarily NN I-NP
+to TO B-PP
+the DT B-NP
+steep NN I-NP
+decline NN I-NP
+in IN B-PP
+sales NNS B-NP
+of IN B-PP
+Cabbage JJ B-NP
+Patch NNP I-NP
+Kids NNP I-NP
+products NNS I-NP
+from IN B-PP
+600 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+to TO B-PP
+230 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+. . O
+Coleco NNP B-NP
+said VBD B-VP
+it PRP B-NP
+changed VBD B-VP
+from VBN I-VP
+a DT B-NP
+single JJ I-NP
+product NN I-NP
+company NN I-NP
+to TO B-PP
+a DT B-NP
+more JJ I-NP
+diversified CD I-NP
+organization NN I-NP
+through IN B-PP
+four JJ B-NP
+major NN I-NP
+acquisitions NNS I-NP
+last JJ B-NP
+year NN I-NP
+. . O
+Products NNS B-NP
+from IN B-PP
+the DT B-NP
+new NN I-NP
+acquisitions NNS I-NP
+and CC O
+other VB B-VP
+new RB B-NP
+product NN I-NP
+introductions NNS I-NP
+are VBP B-VP
+expected VBN I-VP
+to TO I-VP
+enable NNS B-NP
+it PRP B-NP
+to TO B-VP
+return JJ B-NP
+to TO B-PP
+profitability NN B-NP
+, , O
+it PRP B-NP
+said VBD B-VP
+. . O
+At RB O
+the DT B-NP
+annual JJ I-NP
+Toy NNP I-NP
+Fair NNP I-NP
+earlier IN B-PP
+this DT B-NP
+month JJ I-NP
+, , I-NP
+vice JJ I-NP
+president NN I-NP
+Morton NNP I-NP
+Handel NNP I-NP
+said VBD B-VP
+analysts' CD B-NP
+1987 NN I-NP
+projected VBN I-NP
+earnings NNS I-NP
+of IN B-PP
+90 CD B-NP
+cts NNS I-NP
+a DT B-NP
+share NN I-NP
+on IN B-PP
+sales NNS B-NP
+of IN B-PP
+600 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+are VBP B-VP
+reasonable NN B-NP
+. . O
+Venezuela NNP-5 B-NP
+is VBZ B-VP
+seeking VBG I-VP
+a DT B-NP
+'constructive JJ I-NP
+and CC I-NP
+flexible' NNS I-NP
+attitude IN B-PP
+from JJ B-NP
+its NNS I-NP
+creditor NN I-NP
+banks NNS I-NP
+in IN B-PP
+current JJ B-NP
+talks NNS I-NP
+to TO B-PP
+reschedule JJ B-NP
+21 CD I-NP
+billion NN I-NP
+dlrs NN I-NP
+in IN B-PP
+foreign NN B-NP
+debt VBN B-VP
+, , O
+finance JJ B-NP
+minister NN I-NP
+manuel JJ I-NP
+azpurua NN I-NP
+told VBN B-VP
+a DT B-NP
+press NN I-NP
+conference. NN I-NP
+He NNP I-NP
+declined VBD B-VP
+to TO B-PP
+comment NN B-NP
+on IN B-PP
+meetings NNS B-NP
+this DT B-NP
+week NN I-NP
+in IN B-PP
+new JJ B-NP
+york NN I-NP
+between VBN B-VP
+public IN B-PP
+finances NNS B-NP
+director IN B-PP
+jorge JJ B-NP
+marcano NN I-NP
+and CC O
+venezuela's VBN B-NP
+13-bank NN I-NP
+advisory NN I-NP
+committee NNP-named I-NP
+except NN I-NP
+to TO B-PP
+say NN B-NP
+, , O
+" IN B-PP
+they NN B-NP
+are VBP B-VP
+progressing NNS B-NP
+. . O
+" NNP B-NP
+Azpurua NNP I-NP
+said VBD B-VP
+venezuela NN B-NP
+has NNS I-NP
+shown IN B-PP
+solidarity JJ B-NP
+with IN B-PP
+brazil's NNS B-NP
+decision VBD B-VP
+to TO B-PP
+suspend CD B-NP
+payments NNS I-NP
+, , O
+but NNS B-NP
+each IN B-PP
+country NN B-NP
+must VBZ B-VP
+negotiate RB I-VP
+according VBG I-VP
+to TO B-PP
+its NNS B-NP
+own JJ I-NP
+interest NN I-NP
+. . O
+Asked VBD B-VP
+to TO B-PP
+comment NN B-NP
+on IN B-PP
+chile's NN B-NP
+agreement NN I-NP
+with IN B-PP
+its NNS B-NP
+creditors NN I-NP
+today NN I-NP
+, , O
+which IN B-PP
+includes NNS B-NP
+an DT B-NP
+interest JJ I-NP
+rate NN I-NP
+margin NN I-NP
+of IN B-PP
+one CD B-NP
+pct NN I-NP
+over IN B-PP
+libor JJ B-NP
+, , O
+azpurua NNP B-NP
+said VBD B-VP
+only NN B-NP
+, , O
+" IN B-SBAR
+that NN B-NP
+is VBZ B-VP
+good JJ B-NP
+news NNS I-NP
+. . O
+" NNS B-NP
+According VBG B-VP
+to TO B-PP
+banking VBG B-NP
+sources NNS I-NP
+, , O
+the DT B-NP
+banks' NN I-NP
+latest NN I-NP
+offer IN B-PP
+to TO B-PP
+venezuela CD B-NP
+is VBZ B-VP
+also RB B-ADVP
+a DT B-NP
+one JJ I-NP
+pct NN I-NP
+margin JJ I-NP
+as IN B-PP
+against NN B-NP
+the DT B-NP
+last JJ I-NP
+february's NN I-NP
+1-1/8 CD B-NP
+pct NN I-NP
+rescheduling VBG I-NP
+accord NNS I-NP
+and CC O
+the DT B-NP
+7/8 NN I-NP
+pct NN I-NP
+Venezuela NNP I-NP
+wants NNS I-NP
+. . O
+Azpurua NNP B-NP
+said VBD B-VP
+four NN B-NP
+basic NN I-NP
+elements NNS I-NP
+are VBP B-VP
+being VBN I-VP
+negotiated VBN I-VP
+with IN B-PP
+the DT B-NP
+banks NNS I-NP
+now: NN I-NP
+spread VBD B-VP
+reduction VBN I-VP
+, , O
+deferral JJ B-ADJP
+of IN B-PP
+principal JJ B-NP
+payments NNS I-NP
+due NNS I-NP
+in IN B-PP
+1987 CD B-NP
+and CC I-NP
+1988 CD I-NP
+, , O
+lenghtening VBG B-VP
+the DT B-NP
+12-1/2 CD I-NP
+year NN I-NP
+repayment NN I-NP
+schedule NN I-NP
+, , O
+and CC O
+debt VBN B-VP
+capitalization IN B-PP
+schemes NNS B-NP
+. . O
+Azpurua NNP B-NP
+said VBD B-VP
+the DT B-NP
+governent NN I-NP
+plans NN I-NP
+to TO B-PP
+pay NN B-NP
+2.1 CD I-NP
+billion NN I-NP
+dlrs NN I-NP
+in IN B-PP
+public NNP B-NP
+and CC O
+private JJ B-NP
+debt NN I-NP
+principal NN I-NP
+this DT B-NP
+year NN I-NP
+. . O
+It PRP B-NP
+was VBD B-VP
+due VBD I-VP
+to TO I-VP
+amortize VB I-VP
+1.05 CD B-NP
+billion NN I-NP
+dlrs NN I-NP
+under IN B-PP
+the DT B-NP
+rescheduling NN I-NP
+, , O
+and CC O
+pay NN B-NP
+420 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+in IN B-PP
+non-restructured JJ B-NP
+principal NN I-NP
+, , O
+both IN B-PP
+public JJ B-NP
+sector NN I-NP
+. . O
+He NNP B-NP
+said VBD B-VP
+venezuela's CD B-NP
+original JJ I-NP
+proposal NN I-NP
+was VBD B-VP
+to TO B-PP
+pay NN B-NP
+no RB I-NP
+principal JJ I-NP
+on IN B-PP
+restructured JJ B-NP
+debt NN I-NP
+this DT B-NP
+year NN I-NP
+, , O
+but IN B-NP
+is VBZ B-VP
+now RB I-VP
+insisting VBG I-VP
+that IN B-SBAR
+if NNP B-NP
+it PRP B-NP
+makes VBZ B-VP
+payments NNS B-NP
+they IN B-PP
+be VB B-NP
+compensated VBN B-VP
+by IN B-PP
+new JJ B-NP
+bank NN I-NP
+loans NNS I-NP
+. . O
+The DT B-NP
+banking VBG I-NP
+sources NNS I-NP
+said VBD B-VP
+the DT B-NP
+committee NN I-NP
+has NNS B-VP
+been VBN I-VP
+prepared VBN I-VP
+to TO I-VP
+lower VB I-VP
+amortizations VBN I-VP
+to TO B-PP
+around IN B-NP
+400 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+this IN B-PP
+year NN B-NP
+, , O
+but IN B-PP
+that NN B-NP
+no RB B-NP
+direct JJ I-NP
+commitment NN I-NP
+was VBD B-VP
+likely JJ B-ADJP
+on IN B-PP
+new JJ B-NP
+loans NNS I-NP
+. . O
+" CD B-NP
+debtors NNS I-NP
+and CC I-NP
+bank NNS I-NP
+creditors NN I-NP
+have VBP B-VP
+a DT B-NP
+joint JJ I-NP
+responsibility NN I-NP
+and CC O
+there DT B-NP
+will MD B-VP
+be VB I-VP
+no RB I-VP
+lasting VBG I-VP
+solution NN B-NP
+unless NNS I-NP
+a DT B-NP
+positive JJ I-NP
+flow NN I-NP
+of IN B-PP
+financing VBG B-NP
+is VBZ B-VP
+guaranteed NNS B-NP
+, , O
+" NNS B-NP
+azpurua DT B-NP
+said VBD B-VP
+. . O
+However IN B-ADVP
+, , O
+he NNS B-NP
+appeared VBD B-VP
+to TO I-VP
+discard VB I-VP
+earlier JJ B-NP
+venezuelan NN I-NP
+proposals NN I-NP
+for IN B-PP
+a DT B-NP
+direct NN I-NP
+link NN I-NP
+between VBN B-VP
+oil JJ B-NP
+income NN I-NP
+and CC O
+debt VBN B-NP
+payments NNS I-NP
+, , O
+"because NNS B-NP
+circumstances NNS I-NP
+change VBD B-VP
+too RB B-ADJP
+quickly JJ I-ADJP
+. . O
+" NNS B-VP
+At RB B-ADVP
+the DT B-NP
+same JJ I-NP
+time NN I-NP
+, , O
+he NN B-NP
+said VBD B-VP
+the DT B-NP
+government NN I-NP
+is VBZ B-VP
+presently RB I-VP
+studying VBG I-VP
+possible JJ B-NP
+mechanisms NNS I-NP
+for IN B-PP
+capitlizing VBG B-VP
+public NN B-NP
+and CC O
+private RB B-NP
+sector JJ I-NP
+foreign NNS I-NP
+debt VBD B-VP
+, , O
+based NNS B-NP
+on IN B-PP
+experience NN B-NP
+in IN B-PP
+other JJ B-NP
+countries NNS I-NP
+. . O
+The DT B-NP
+rules NN I-NP
+would MD B-VP
+be VB I-VP
+published VBN I-VP
+by IN B-PP
+the DT B-NP
+finance JJ I-NP
+ministry NN I-NP
+and CC O
+the DT B-NP
+central JJ I-NP
+bank NN I-NP
+. . O
+Thomson NNP B-NP
+McKinnon NNP I-NP
+Mortgage NNP I-NP
+Assets NNS I-NP
+Corp NNP I-NP
+, , O
+a DT B-NP
+unit NN I-NP
+of IN B-PP
+Thomson NNP B-NP
+McKinnon NNP I-NP
+Inc NNP I-NP
+, , O
+is VBZ B-VP
+offering IN B-NP
+100 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+of IN B-PP
+collateralized VBN B-NP
+mortgage NN I-NP
+obligations NNS I-NP
+in IN B-PP
+three DT B-NP
+tranches NNS I-NP
+that IN B-PP
+include JJ B-NP
+floating NN I-NP
+rate NN I-NP
+and CC O
+inverse RB B-VP
+floating VBG I-VP
+rate JJ B-NP
+CMOS NNP I-NP
+. . O
+The DT B-NP
+floating VBG I-NP
+rate NN I-NP
+class NN I-NP
+amounts NNS I-NP
+to TO B-PP
+60 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+. . O
+It PRP B-NP
+has VBZ B-VP
+an DT B-NP
+average JJ I-NP
+life NN I-NP
+of IN B-PP
+7.11 CD B-NP
+years NNS I-NP
+and CC O
+matures NNS B-NP
+2018. CD B-PP
+The DT B-NP
+CMOs JJ I-NP
+have NN I-NP
+an DT B-NP
+initial JJ I-NP
+coupon NN I-NP
+of IN B-PP
+7.0375 CD B-NP
+pct NN I-NP
+, , O
+which IN B-NP
+will MD B-VP
+be VB I-VP
+reset NN B-NP
+60 CD I-NP
+basis NNS I-NP
+points NNS I-NP
+above VBP B-VP
+LIBOR VBN I-VP
+, , O
+said VBD B-VP
+sole CD B-NP
+manager NN I-NP
+Thomson NNP I-NP
+McKinnon NNP I-NP
+. . O
+The DT B-NP
+inverse JJ I-NP
+floater NN I-NP
+totals IN B-PP
+4.8 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+. . O
+It PRP B-NP
+has VBZ B-VP
+an DT B-NP
+average JJ I-NP
+life NN I-NP
+of IN B-PP
+13.49 CD B-NP
+years NNS I-NP
+and CC O
+matures NNS B-NP
+2018. CD B-NP
+These NNP I-NP
+CMOs NNS I-NP
+were VBD B-VP
+given JJ I-VP
+an DT B-NP
+initial JJ I-NP
+coupon NN I-NP
+of IN B-PP
+11-1/2 CD B-NP
+pct NN I-NP
+and CC O
+priced VBN B-VP
+at IN B-PP
+104.40. CD B-NP
+Subsequent JJ I-NP
+rates NNS I-NP
+on IN B-PP
+the DT B-NP
+inverse NN I-NP
+floater IN B-PP
+will DT B-NP
+equal JJ I-NP
+11-1/2 CD I-NP
+pct NN I-NP
+minus VBZ B-VP
+the DT B-NP
+product NN I-NP
+of IN B-PP
+three DT B-NP
+times NNS I-NP
+(LIBOR NNP I-NP
+minus NNS I-NP
+6-1/2 CD B-NP
+pct) NNS I-NP
+. . O
+A RB O
+Thomson NNP B-NP
+officer IN I-PRT
+explained VBN B-VP
+that IN B-PP
+the DT B-NP
+coupon NN I-NP
+of IN B-PP
+the DT B-NP
+inverse NN I-NP
+floating IN B-PP
+rate JJ B-NP
+tranche NN I-NP
+would VBN B-VP
+increase IN B-PP
+if NNP B-NP
+LIBOR default I-NP
+declined VBD B-VP
+. . O
+" NN B-VP
+The DT B-NP
+yield JJ I-NP
+floats NN I-NP
+opposite NN I-NP
+of IN B-PP
+LIBOR NNP B-NP
+, , O
+" CD B-NP
+he NN I-NP
+said VBD B-VP
+. . O
+The DT B-NP
+fixed-rate JJ I-NP
+tranche NN I-NP
+totals NNS I-NP
+35.2 IN B-PP
+mln NN B-NP
+dlrs NN I-NP
+. . O
+It PRP B-NP
+has VBZ B-VP
+an DT B-NP
+average JJ I-NP
+life NN I-NP
+of IN B-PP
+3.5 CD B-NP
+years NNS I-NP
+and CC O
+matures NNS B-NP
+2016. CD B-PP
+The DT B-NP
+CMOs JJ I-NP
+were NN I-NP
+assigned VBN B-VP
+a DT B-NP
+7.65 NN I-NP
+pct NN I-NP
+coupon NN I-NP
+and CC O
+par RB B-VP
+pricing VBG I-VP
+. . B-PP
+The DT B-NP
+issue NN I-NP
+is VBZ B-VP
+rated VBN I-VP
+AAA RB B-ADVP
+by IN B-PP
+Standard NNP B-NP
+and CC I-NP
+Poor's NNP I-NP
+and CC O
+secured JJ B-ADVP
+by IN B-SBAR
+Federal JJ B-NP
+Home . I-NP
+Loan NNP I-NP
+Mortgage NNP I-NP
+Corp NNP I-NP
+, , O
+Freddie NNP B-NP
+Mac NNP I-NP
+, , O
+certificates NNS B-NP
+. . O
+OPEC NNP B-NP
+may NN I-NP
+be VB B-VP
+forced VBD I-VP
+to TO B-PP
+meet NN B-NP
+before NN I-NP
+a DT B-NP
+scheduled JJ I-NP
+June CD I-NP
+session NN I-NP
+to TO B-PP
+readdress JJ B-NP
+its NNS I-NP
+production NN I-NP
+cutting VBG I-NP
+agreement NN I-NP
+if IN B-SBAR
+the DT B-NP
+organization NN I-NP
+wants NNS B-VP
+to TO I-VP
+halt VB I-VP
+the DT B-NP
+current NN I-NP
+slide NN I-NP
+in IN B-PP
+oil JJ B-NP
+prices NNS I-NP
+, , O
+oil JJ B-NP
+industry NN I-NP
+analysts NNS I-NP
+said VBD B-VP
+. . O
+" NN B-VP
+The DT B-NP
+movement NN I-NP
+to TO B-VP
+higher VB I-VP
+oil JJ B-NP
+prices NNS I-NP
+was VBD B-VP
+never IN B-ADVP
+to TO B-VP
+be VB I-VP
+as IN B-PP
+easy NN B-NP
+as IN B-PP
+OPEC NNP B-NP
+thought IN B-PP
+. . B-NP
+They NNP I-NP
+may NN I-NP
+need VBD B-VP
+an DT B-NP
+emergency NN I-NP
+meeting VBG B-VP
+to TO B-PP
+sort NN B-NP
+out IN B-PP
+the DT B-NP
+problems NN I-NP
+, , O
+" IN B-NP
+said VBD B-VP
+Daniel CD B-NP
+Yergin NNP I-NP
+, , O
+director IN B-PP
+of IN B-PP
+Cambridge JJ B-NP
+Energy NNP I-NP
+Research NNP I-NP
+Associates NNS I-NP
+, , O
+CERA NNP B-NP
+. . O
+Analysts NNS B-NP
+and CC O
+oil JJ B-NP
+industry NN I-NP
+sources NNS I-NP
+said VBD B-VP
+the DT B-NP
+problem NN I-NP
+OPEC IN B-PP
+faces NNS B-NP
+is VBZ B-VP
+excess NNS B-NP
+oil JJ B-ADJP
+supply RB B-ADVP
+in IN B-PP
+world JJ B-NP
+oil NNS I-NP
+markets NNS I-NP
+. . O
+" NN B-NP
+OPEC's NNS I-NP
+problem NN I-NP
+is VBZ B-VP
+not RB O
+a DT B-NP
+price NN I-NP
+problem NN I-NP
+but NN B-VP
+a DT B-NP
+production NN I-NP
+issue NNS I-NP
+and CC O
+must JJ B-VP
+be VB I-VP
+addressed VBN I-VP
+in IN B-PP
+that DT B-NP
+way NN I-NP
+, , O
+" IN B-NP
+said VBD B-VP
+Paul default B-NP
+Mlotok NNP I-NP
+, , O
+oil JJ B-NP
+analyst NN I-NP
+with IN B-PP
+Salomon NNP B-NP
+Brothers NNS I-NP
+Inc NNP I-NP
+. . O
+He JJ O
+said VBD B-VP
+the DT B-NP
+market's NN I-NP
+earlier IN B-PP
+optimism JJ B-NP
+about NN I-NP
+OPEC NNS I-NP
+and CC O
+its NNS B-NP
+ability NN I-NP
+to TO B-PP
+keep CD B-NP
+production NN I-NP
+under IN B-PP
+control JJ B-NP
+have NN I-NP
+given NNS I-NP
+way NN B-ADJP
+to TO B-PP
+a DT B-NP
+pessimistic NN I-NP
+outlook NN I-NP
+that IN B-PP
+the DT B-NP
+organization NN I-NP
+must NN I-NP
+address NNS I-NP
+soon IN B-PP
+if NNP B-NP
+it PRP B-NP
+wishes VBD B-VP
+to TO B-PP
+regain JJ B-NP
+the DT I-NP
+initiative JJ I-NP
+in IN B-PP
+oil JJ B-NP
+prices NNS I-NP
+. . O
+But JJ B-NP
+some NN I-NP
+other IN B-PP
+analysts NNS B-NP
+were VBD B-VP
+uncertain RB B-ADJP
+that IN B-PP
+even VBN B-NP
+an DT B-NP
+emergency NN I-NP
+meeting VBG B-VP
+would JJ B-NP
+address NNS I-NP
+the DT B-NP
+problem NN I-NP
+of IN B-PP
+OPEC NNP B-NP
+production NN I-NP
+above IN B-PP
+the DT B-NP
+15.8 CD I-NP
+mln NN I-NP
+bpd NNS I-NP
+quota IN B-PP
+set NN B-NP
+last JJ B-NP
+December NNP I-NP
+. . O
+" IN B-SBAR
+OPEC NNP B-NP
+has NNS I-NP
+to TO B-PP
+learn JJ B-NP
+that NN I-NP
+in IN B-PP
+a DT B-NP
+buyers NNS I-NP
+market NN I-NP
+you IN B-PP
+cannot NN B-NP
+have VBP B-VP
+deemed VBN I-VP
+quotas NNS B-NP
+, , O
+fixed NNS B-NP
+prices NNS I-NP
+and CC O
+set NN B-NP
+differentials NNS I-NP
+, , O
+" NNS B-NP
+said VBD B-VP
+the DT B-NP
+regional JJ I-NP
+manager NN I-NP
+for IN B-PP
+one NN B-NP
+of IN B-PP
+the DT B-NP
+major NN I-NP
+oil NN I-NP
+companies NNS I-NP
+who IN B-PP
+spoke NN B-NP
+on IN B-PP
+condition NN B-NP
+that IN B-SBAR
+he DT B-NP
+not NN I-NP
+be VB B-VP
+named RB I-VP
+. . I-VP
+" NN I-VP
+The DT B-NP
+market NN I-NP
+is VBZ B-VP
+now RB I-VP
+trying VBG I-VP
+to TO I-VP
+teach IN B-PP
+them DT B-NP
+that NN I-NP
+lesson IN B-PP
+again NN B-NP
+, , O
+" IN B-PP
+he DT B-NP
+added VBD I-NP
+. . O
+David VBD B-VP
+T NNP B-NP
+. . I-NP
+Mizrahi NNP I-NP
+, , O
+editor CD B-NP
+of IN B-PP
+Mideast JJ B-NP
+reports NNS I-NP
+, , O
+expects NNS B-NP
+OPEC VBD B-VP
+to TO B-PP
+meet JJ B-NP
+before NN I-NP
+June JJ I-NP
+, , O
+although IN B-SBAR
+not JJ B-NP
+immediately NN I-NP
+. . O
+However NNP B-NP
+, , O
+he CD B-NP
+is VBZ B-VP
+not RB O
+optimistic JJ B-NP
+that NN I-NP
+OPEC default I-NP
+can NN I-NP
+address NNS I-NP
+its NNS B-NP
+principal JJ I-NP
+problems NNS I-NP
+. . O
+" IN O
+They NNP B-NP
+will MD B-VP
+not JJ B-NP
+meet NN I-NP
+now NN I-NP
+as IN B-PP
+they NN B-NP
+try NN I-NP
+to TO B-VP
+take VB I-VP
+advantage NN B-NP
+of IN B-PP
+the DT B-NP
+winter NN I-NP
+demand VBN B-VP
+to TO B-PP
+sell JJ B-NP
+their NN I-NP
+oil NNS I-NP
+, , O
+but NNS B-NP
+in IN B-PP
+late JJ B-NP
+March NNP I-NP
+and CC O
+April default B-NP
+when JJ I-NP
+demand VBN I-NP
+slackens NNS I-NP
+, , O
+" NNS B-NP
+Mizrahi NNP I-NP
+said VBD B-VP
+. . O
+But JJ B-NP
+Mizrahi NNP I-NP
+said VBD B-VP
+that NN B-SBAR
+OPEC default B-NP
+is VBZ B-VP
+unlikely RB B-ADJP
+to TO B-VP
+do VB I-VP
+anything VBG I-VP
+more JJ B-NP
+than NN I-NP
+reiterate NN I-NP
+its NNS I-NP
+agreement NN I-NP
+to TO B-PP
+keep CD B-NP
+output NN I-NP
+at IN B-PP
+15.8 CD B-NP
+mln NN I-NP
+bpd NNS I-NP
+. . O
+" NNP B-NP
+Analysts NNS I-NP
+said VBD B-VP
+that IN B-SBAR
+the DT B-NP
+next NN I-NP
+two NN I-NP
+months VBZ B-VP
+will RB I-VP
+be VB I-VP
+critical NN B-NP
+for IN B-PP
+OPEC's NNP B-NP
+ability NN I-NP
+to TO B-VP
+hold VB I-VP
+together NN B-NP
+prices NNS I-NP
+and CC O
+output JJ B-NP
+. . O
+" IN B-PP
+OPEC NNP B-NP
+must JJ I-NP
+hold VBD B-VP
+to TO B-PP
+its NNS B-NP
+pact NN I-NP
+for IN B-PP
+the DT B-NP
+next NN I-NP
+six IN B-PP
+to TO B-PP
+eight JJ B-NP
+weeks NNS I-NP
+since IN B-SBAR
+buyers NNS B-NP
+will MD B-VP
+come VBN I-VP
+back NN B-NP
+into IN B-PP
+the DT B-NP
+market NN I-NP
+then VBN B-VP
+, , O
+" NNP B-NP
+said VBD B-VP
+Dillard CD B-NP
+Spriggs NNS I-NP
+of IN B-PP
+Petroleum default B-NP
+Analysis RB B-VP
+Ltd VBN I-VP
+in IN B-PP
+New NNP B-NP
+York NNP I-NP
+. . O
+But JJ B-NP
+Bijan NNP I-NP
+Moussavar-Rahmani NNP I-NP
+of IN B-PP
+Harvard default B-NP
+University's NNS I-NP
+Energy NNP I-NP
+and CC I-NP
+Environment JJ I-NP
+Policy NNP I-NP
+Center NNP I-NP
+said VBD B-VP
+that NN B-SBAR
+the DT B-NP
+demand VBN B-VP
+for IN B-PP
+OPEC NNP B-NP
+oil NN I-NP
+has NNS I-NP
+been VBN B-VP
+rising VBG I-VP
+through IN B-PP
+the DT B-NP
+first JJ I-NP
+quarter NN I-NP
+and CC O
+this DT B-NP
+may NN I-NP
+have VBP B-VP
+prompted VBN I-VP
+excesses NNS B-NP
+in IN B-PP
+its NNS B-NP
+production NN I-NP
+. . O
+" CD B-NP
+Demand CD I-NP
+for IN B-PP
+their NN B-NP
+(OPEC) default I-NP
+oil JJ I-NP
+is VBZ B-VP
+clearly RB B-ADJP
+above JJ I-ADJP
+15.8 CD B-NP
+mln NN I-NP
+bpd NNS I-NP
+and CC O
+is VBZ B-VP
+probably RB B-NP
+closer NN I-NP
+to TO B-PP
+17 CD B-NP
+mln NN I-NP
+bpd NNS I-NP
+or IN B-PP
+higher NN B-NP
+now RB I-NP
+so JJ I-NP
+what NN I-NP
+we NNS I-NP
+are VBP B-VP
+seeing IN O
+characterized VBN B-VP
+as IN B-PP
+cheating VBG B-NP
+is VBZ B-VP
+OPEC NNP B-NP
+meeting VBG I-NP
+this DT B-NP
+demand VBD B-VP
+through IN B-PP
+current NN B-NP
+production NN I-NP
+, , O
+" IN B-PP
+he DT B-NP
+told JJ I-NP
+Reuters NNS I-NP
+in IN B-PP
+a DT B-NP
+telephone NN I-NP
+interview WRB B-ADVP
+. . O
+BankAmerica NNP B-NP
+Corp NNP I-NP
+is VBZ B-VP
+not RB O
+under IN B-PP
+pressure NN B-NP
+to TO B-PP
+act IN B-NP
+quickly JJ I-NP
+on IN B-PP
+its NNS B-NP
+proposed VBD B-VP
+equity JJ I-VP
+offering VBG I-VP
+and CC I-VP
+would VBN I-VP
+do IN B-PP
+well JJ B-NP
+to TO B-VP
+delay NN I-VP
+it PRP B-NP
+because NN B-NP
+of IN B-PP
+the DT B-NP
+stock's NN I-NP
+recent NN I-NP
+poor NNS I-NP
+performance NNS I-NP
+, , O
+banking NNS B-NP
+analysts NNS I-NP
+said VBD B-VP
+. . B-NP
+Some JJ I-NP
+analysts NNS I-NP
+said VBD B-VP
+they IN B-PP
+have NN B-NP
+recommended VBN B-VP
+BankAmerica NNP B-NP
+delay NN I-NP
+its NNS I-NP
+up VBD B-VP
+to TO B-PP
+one-billion-dlr CD B-NP
+equity NN I-NP
+offering VBG I-NP
+, , O
+which IN B-SBAR
+has NNS B-NP
+yet VBD B-VP
+to TO I-VP
+be VB I-VP
+approved VBN I-VP
+by IN B-PP
+the DT B-NP
+Securities NNS I-NP
+and CC I-NP
+Exchange JJ I-NP
+Commission NNP I-NP
+. . I-NP
+BankAmerica NNP I-NP
+stock NN I-NP
+fell NNS I-NP
+this IN B-PP
+week NN B-NP
+, , O
+along IN B-PP
+with IN B-PP
+other NN B-NP
+banking VBG I-NP
+issues NNS I-NP
+, , O
+on IN B-PP
+the DT B-NP
+news NN I-NP
+that IN B-PP
+Brazil JJ B-NP
+has NNS I-NP
+suspended VBD B-VP
+interest IN B-PP
+payments NNS B-NP
+on IN B-PP
+a DT B-NP
+large JJ I-NP
+portion NN I-NP
+of IN B-PP
+its NNS B-NP
+foreign IN B-PP
+debt NN B-NP
+. . O
+The DT B-NP
+stock NN I-NP
+traded VBN B-VP
+around IN B-PP
+12 CD B-NP
+, , O
+down IN B-PP
+1/8 NN B-NP
+, , O
+this IN B-PP
+afternoon NN B-NP
+, , O
+after IN B-PP
+falling VBG B-VP
+to TO B-PP
+11-1/2 CD B-NP
+earlier NN I-NP
+this IN B-PP
+week NN B-NP
+on IN B-PP
+the DT B-NP
+news NN I-NP
+. . O
+Banking NNP B-NP
+analysts NNS I-NP
+said VBD B-VP
+that IN B-SBAR
+with IN B-PP
+the DT B-NP
+immediate JJ I-NP
+threat NN I-NP
+of IN B-PP
+the DT B-NP
+First JJ I-NP
+Interstate NNP I-NP
+Bancorp NNP I-NP
+<I> NNP I-NP
+takeover IN B-PP
+bid NN B-NP
+gone NN I-NP
+, , O
+BankAmerica NNP B-NP
+is VBZ B-VP
+under IN B-PP
+no NN B-NP
+pressure NN I-NP
+to TO B-PP
+sell JJ B-NP
+the DT I-NP
+securities NN I-NP
+into IN B-PP
+a DT B-NP
+market NN I-NP
+that IN B-NP
+will MD B-VP
+be VB I-VP
+nervous RB B-ADJP
+on IN B-PP
+bank NN B-NP
+stocks NNS I-NP
+in IN B-PP
+the DT B-NP
+near JJ I-NP
+term NN I-NP
+. . O
+BankAmerica NNP O
+filed VBD B-VP
+the DT B-NP
+offer NN I-NP
+on IN B-PP
+January NNP B-NP
+26. CD I-NP
+It PRP B-NP
+was VBD B-VP
+seen JJ B-ADJP
+as IN B-PP
+one NN B-NP
+of IN B-PP
+the DT B-NP
+major NN I-NP
+factors NNS I-NP
+leading VBG B-VP
+the DT B-NP
+First JJ I-NP
+Interstate NNP I-NP
+withdrawing VBG B-VP
+its NNS B-NP
+takeover IN B-PP
+bid VBN B-NP
+on IN B-PP
+February NNP B-NP
+9. CD I-NP
+A RB I-NP
+BankAmerica NNP I-NP
+spokesman NN I-NP
+said VBD B-VP
+SEC CD B-NP
+approval JJ I-NP
+is VBZ B-VP
+taking IN B-PP
+longer JJ B-NP
+than NN I-NP
+expected VBN I-NP
+and CC I-NP
+market JJ I-NP
+conditions NN I-NP
+must JJ I-NP
+now RB B-ADVP
+be VB B-VP
+re-evaluated VBN I-VP
+. . O
+" IN B-PP
+The DT B-NP
+circumstances NNS I-NP
+at IN B-PP
+the DT B-NP
+time NN I-NP
+will MD B-VP
+determine NN I-VP
+what IN B-NP
+we JJ B-NP
+do NN I-NP
+, , O
+" IN B-NP
+said VBD B-VP
+Arthur RB B-ADJP
+Miller JJ I-ADJP
+, , O
+BankAmerica's NNP B-NP
+Vice JJ I-NP
+President NN I-NP
+for IN B-PP
+Financial JJ B-NP
+Communications NNP I-NP
+, , O
+when JJ B-NP
+asked VBD B-VP
+if NNP B-NP
+BankAmerica NNP I-NP
+would VBD B-VP
+proceed NN B-NP
+with IN B-PP
+the DT B-NP
+offer NN I-NP
+immediately NN I-NP
+after IN B-PP
+it PRP B-NP
+receives NNS B-VP
+SEC NNP B-NP
+approval JJ I-NP
+. . O
+" IN B-PP
+I'd NNP B-NP
+put NN B-VP
+it PRP B-NP
+off NNP B-NP
+as IN B-PP
+long NN B-NP
+as IN B-PP
+they NN B-NP
+conceivably NN I-NP
+could VBN B-VP
+, , O
+" NNP B-NP
+said VBD B-VP
+Lawrence CD B-NP
+Cohn NNP I-NP
+, , I-NP
+analyst JJ I-NP
+with IN B-PP
+Merrill default B-NP
+Lynch NNP I-NP
+, , I-NP
+Pierce NNP I-NP
+, , I-NP
+Fenner NNP I-NP
+and CC I-NP
+Smith NNP I-NP
+. . O
+Cohn NNP B-NP
+said VBD B-VP
+the DT B-NP
+longer NN I-NP
+BankAmerica NNP I-NP
+waits NNS I-NP
+, , O
+the DT B-NP
+longer JJR I-NP
+they NN I-NP
+have VBP B-VP
+to TO I-VP
+show WRB I-VP
+the DT B-NP
+market NN I-NP
+an DT B-NP
+improved VBD B-VP
+financial JJ B-NP
+outlook NN I-NP
+. . O
+Although RB O
+BankAmerica NNP B-NP
+has NNS I-NP
+yet VBD B-VP
+to TO I-VP
+specify VB I-VP
+the DT B-NP
+types NNS I-NP
+of IN B-PP
+equities NNS B-NP
+it PRP B-NP
+would VBD B-VP
+offer IN B-ADVP
+, , O
+most JJ B-NP
+analysts NN I-NP
+believed VBN B-VP
+a DT B-NP
+convertible NN I-NP
+preferred VBN I-NP
+stock NN I-NP
+would VBD B-VP
+encompass VBN I-VP
+at IN B-PP
+least JJ B-NP
+part NN I-NP
+of IN B-PP
+it PRP B-NP
+. . O
+Such NNP O
+an DT B-NP
+offering VBG I-NP
+at IN B-PP
+a DT B-NP
+depressed JJ I-NP
+stock NN I-NP
+price NN I-NP
+would VBN B-VP
+mean VBN I-VP
+a DT B-NP
+lower JJ I-NP
+conversion NN I-NP
+price NN I-NP
+and CC O
+more RB B-VP
+dilution VBN I-VP
+to TO B-PP
+BankAmerica NNP B-NP
+stock NN I-NP
+holders NNS I-NP
+, , O
+noted JJ B-NP
+Daniel . I-NP
+Williams NNS I-NP
+, , O
+analyst JJ B-ADJP
+with IN B-PP
+Sutro NNP B-NP
+Group default I-NP
+. . O
+Several JJ B-NP
+analysts NNS I-NP
+said VBD B-VP
+that IN B-SBAR
+while NN B-NP
+they NN I-NP
+believe VB B-VP
+the DT B-NP
+Brazilian JJ I-NP
+debt NN I-NP
+problem NN I-NP
+will RB B-VP
+continue VBD I-VP
+to TO I-VP
+hang NNS B-NP
+over IN B-PP
+the DT B-NP
+banking VBG I-NP
+industry NN I-NP
+through IN B-PP
+the DT B-NP
+quarter NN I-NP
+, , O
+the DT B-NP
+initial JJ I-NP
+shock NN I-NP
+reaction NN I-NP
+is VBZ B-VP
+likely RB B-ADVP
+to TO B-PP
+ease NNS B-NP
+over IN B-PP
+the DT B-NP
+coming VBG I-NP
+weeks NNS I-NP
+. . O
+Nevertheless NNP B-NP
+, , O
+BankAmerica, NNP B-NP
+which IN B-PP
+holds NN B-NP
+about IN B-PP
+2.70 CD B-NP
+billion NN I-NP
+dlrs NN I-NP
+in IN B-PP
+Brazilian JJ B-NP
+loans NNS I-NP
+, , O
+stands NNS B-NP
+to TO B-PP
+lose JJ B-NP
+15-20 CD I-NP
+mln NN I-NP
+dlrs NN I-NP
+if IN B-PP
+the DT B-NP
+interest NN I-NP
+rate NN I-NP
+is VBZ B-VP
+reduced VBN I-VP
+on IN B-PP
+the DT B-NP
+debt NN I-NP
+, , O
+and CC O
+as IN B-PP
+much NN B-NP
+as IN B-PP
+200 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+if NNP I-NP
+Brazil NNP I-NP
+pays NNS B-VP
+no RB B-NP
+interest NN I-NP
+for IN B-PP
+a DT B-NP
+year NN I-NP
+, , O
+said VBD B-VP
+Joseph NNP B-NP
+Arsenio RB I-NP
+, , O
+analyst JJ B-ADJP
+with IN B-PP
+Birr NNP B-NP
+, , I-NP
+Wilson NNP I-NP
+and CC I-NP
+Co JJ I-NP
+. . O
+He DT B-NP
+noted VBN B-VP
+, , O
+however IN B-ADVP
+, , O
+that IN B-SBAR
+any NN B-NP
+potential JJ I-NP
+losses NNS I-NP
+would VBD B-VP
+not RB B-NP
+show JJ I-NP
+up NNS I-NP
+in IN B-PP
+the DT B-NP
+current NN I-NP
+quarter IN B-PP
+. . O
+The DT B-NP
+Federal JJ I-NP
+Deposit NN I-NP
+Insurance IN B-PP
+Corp NNP B-NP
+(FDIC) NNP I-NP
+said VBD B-VP
+three NN B-NP
+troubled VBD I-NP
+banks NNS I-NP
+in IN B-PP
+Texas NNP B-NP
+and CC I-NP
+Louisiana NNP I-NP
+were VBD B-VP
+merged VBN I-VP
+with IN B-PP
+healthy NN B-NP
+financial JJ I-NP
+institutions NNS I-NP
+. . O
+The DT B-NP
+FDIC NNP I-NP
+said VBD B-VP
+it PRP B-NP
+subsidized VBD B-VP
+the DT B-NP
+merger NN I-NP
+of IN B-PP
+Central JJ B-NP
+Bank NNP I-NP
+and CC I-NP
+Trust JJ I-NP
+Co NNP I-NP
+, , I-NP
+Glenmora NNP I-NP
+, , I-NP
+La. NNP I-NP
+, , I-NP
+with IN B-PP
+the DT B-NP
+healthy NN I-NP
+Peoples NNS I-NP
+Bank NNP I-NP
+and CC I-NP
+Trust JJ I-NP
+Co NNP I-NP
+, , O
+Natchitoches NNS B-NP
+, , O
+La. NNP B-NP
+, , O
+after IN B-PP
+state NN B-NP
+regulators VBN B-VP
+notified VBN I-VP
+it PRP B-NP
+that IN B-PP
+Central JJ B-NP
+was NNS I-NP
+in IN B-PP
+danger NN B-NP
+of IN B-PP
+failing VBG B-NP
+. . O
+Central JJ O
+had VBD B-VP
+assets NNS B-NP
+of IN B-PP
+28.3 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+. . O
+The DT B-NP
+FDIC JJ I-NP
+said VBD B-VP
+the DT B-NP
+deposits NN I-NP
+of IN B-PP
+the DT B-NP
+failed NN I-NP
+Farmers NNS I-NP
+State VBD B-VP
+Bank NNP B-NP
+, , O
+Hart NNP B-NP
+, , O
+Tex NNP B-NP
+. . O
+, , O
+were VBD B-VP
+assumed VBN I-VP
+by IN B-PP
+Hale NNP B-NP
+County NNP I-NP
+State NNP I-NP
+Bank NNP I-NP
+, , O
+Plainview NNP B-NP
+, , O
+Tex NNP B-NP
+. . O
+Farmers NNS B-NP
+, , O
+with IN B-PP
+9.6 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+in IN B-PP
+assets NN B-NP
+, , O
+was VBD B-VP
+closed VBN I-VP
+by IN B-PP
+Texas NNP B-NP
+bank NN I-NP
+regulators NNS I-NP
+. . O
+The DT B-NP
+deposits NN I-NP
+of IN B-PP
+the DT B-NP
+failed NN I-NP
+First IN B-PP
+National JJ B-NP
+Bank NNP I-NP
+of IN B-PP
+Crosby NNP B-NP
+, , O
+Crosby NNP B-NP
+, , O
+Tex NNP B-NP
+. . O
+, , O
+with IN B-PP
+total JJ B-NP
+assets NNS I-NP
+of IN B-PP
+8.2 CD B-NP
+mln NN I-NP
+dlrs NN I-NP
+, , O
+were VBD B-VP
+assumed VBN I-VP
+by IN B-PP
+Central JJ B-NP
+Bancshares NNS I-NP
+of IN B-PP
+the DT B-NP
+South NNP I-NP
+Inc NNP I-NP
+, , O
+Birmingham NNP B-NP
+, , O
+Ala. NNP B-NP
+, , O
+after IN B-PP
+First JJ B-NP
+National JJ I-NP
+was VBD I-NP
+closed VBN B-VP
+by IN B-PP
+federal JJ B-NP
+bank NN I-NP
+regulators NNS I-NP
+, , O
+the DT B-NP
+FDIC NNP I-NP
+said VBD B-VP
+. . O
+Brazil's JJ O
+14-bank NN B-NP
+advisory NN I-NP
+committee NNP-named I-NP
+expressed VBN B-VP
+" IN B-PP
+grave JJ B-NP
+concern NN I-NP
+" NN I-NP
+to TO B-PP
+chief NNP B-NP
+debt VBD B-VP
+negotiator NN B-NP
+Antonio RB B-ADVP
+Padua NNP-5 B-NP
+de VBD B-VP
+Seixas NNS B-NP
+over IN B-PP
+the DT B-NP
+country's NN I-NP
+suspension NN I-NP
+of IN B-PP
+interest JJ B-NP
+payments NNS I-NP
+, , O
+according IN B-PP
+to TO B-PP
+a DT B-NP
+telex NN I-NP
+from IN B-PP
+committee NN B-NP
+chairman NN I-NP
+Citibank NNP I-NP
+to TO B-PP
+creditor NN B-NP
+banks NNS I-NP
+worldwide VBD B-VP
+. . B-NP
+Bankers NNS I-NP
+said VBD B-VP
+the DT B-NP
+diplomatic NN I-NP
+phrase NN I-NP
+belied VBN B-VP
+the DT B-NP
+deep NN I-NP
+anger IN B-PP
+and CC O
+frustration VBN B-VP
+on IN B-PP
+the DT B-NP
+committee NN I-NP
+over IN B-PP
+Brazil's NNP B-NP
+unilateral JJ I-NP
+move NN I-NP
+last JJ B-NP
+Friday NNP I-NP
+and CC O
+its NNS B-NP
+subsequent JJ I-NP
+freeze NNS I-NP
+on IN B-PP
+some DT B-NP
+15 CD I-NP
+billion NN I-NP
+dlrs NN I-NP
+of IN B-PP
+short-term NN B-NP
+trade NN I-NP
+and CC O
+interbank RB B-NP
+lines NNS I-NP
+. . O
+Seixas NNS B-NP
+, , O
+director NNS B-NP
+of IN B-PP
+the DT B-NP
+Brazilian JJ I-NP
+central NN I-NP
+bank's NNS I-NP
+foreign IN B-PP
+debt NN B-NP
+department NN I-NP
+, , O
+met IN B-PP
+the DT B-NP
+full JJ I-NP
+panel NN I-NP
+on IN B-PP
+Tuesday NNP B-NP
+and CC I-NP
+Wednesday NNP B-NP
+. . O
+Seixas NNS B-NP
+, , O
+who NNS B-NP
+met NN I-NP
+again IN B-PP
+this DT B-NP
+morning VBG I-NP
+with IN B-PP
+senior NNP B-NP
+Citibank NNP I-NP
+executive JJ I-NP
+William . I-NP
+Rhodes NNS I-NP
+and CC I-NP
+representatives NNS I-NP
+from IN B-PP
+committee NN B-NP
+vice-chairmen VBN I-NP
+Morgan NNP I-NP
+Guaranty NNP I-NP
+Trust JJ I-NP
+Co NNP I-NP
+and CC I-NP
+Lloyds NNP I-NP
+Bank NNP I-NP
+Plc NNP I-NP
+, , O
+told JJ B-NP
+the DT I-NP
+banks NNS I-NP
+that IN B-PP
+the DT B-NP
+government NN I-NP
+was VBD B-VP
+preparing VBG I-VP
+a DT B-NP
+telex NN I-NP
+to TO B-PP
+explain NNS B-NP
+and CC O
+clarify VB B-VP
+the DT B-NP
+freeze NN I-NP
+on IN B-PP
+short-term JJ B-NP
+credits NN I-NP
+. . O
+The DT B-NP
+telex NN I-NP
+could VBN B-VP
+be VB I-VP
+sent NN B-NP
+to TO B-PP
+creditors NN B-NP
+as IN B-PP
+early NN B-NP
+as IN B-PP
+today NN B-NP
+, , O
+bankers NNS B-NP
+said VBD B-VP
+. . O
+Despite JJ O
+the DT B-NP
+rising VBG I-NP
+tempers NNS I-NP
+, , O
+bankers NNS B-NP
+said VBD B-VP
+there EX B-NP
+are VBP B-VP
+no RB I-VP
+plans VBN I-VP
+for IN B-PP
+Brazilian NNP B-NP
+finance NN I-NP
+minister NN I-NP
+Dilson NNP I-NP
+Funaro NNP I-NP
+to TO B-PP
+meet JJ B-NP
+commercial NN I-NP
+bankers NNS I-NP
+during IN B-PP
+his NNS B-NP
+trip VBD B-VP
+to TO B-PP
+Washington NNP B-NP
+on IN B-PP
+Friday NNP B-NP
+and CC O
+Saturday NNP B-NP
+. . I-NP
+Funaro NNP I-NP
+will MD B-VP
+be VB I-VP
+explaining VBG I-VP
+Brazil's NNS B-NP
+actions VBD B-VP
+to TO B-PP
+U.S. NNP B-NP
+Treasury NNP I-NP
+Secretary NNP I-NP
+James NNP I-NP
+Baker NNP I-NP
+, , O
+Federal JJ B-NP
+Reserve . I-NP
+Board NNP I-NP
+chairman NN I-NP
+Paul default I-NP
+Volcker NNP I-NP
+and CC I-NP
+International JJ I-NP
+Monetary NNP I-NP
+Fund NNP I-NP
+managing VBG B-VP
+director IN B-PP
+Michel default B-NP
+Camdessus NNP I-NP
+before NN I-NP
+travelling IN B-NP
+to TO B-VP
+Europe VB I-VP
+at IN B-PP
+the DT B-NP
+weekend JJ I-NP
+. . O
+Sentence NN B-NP
+number NN I-NP
+1 CD I-NP
+has VBZ B-VP
+6 CD I-NP
+words NNS I-NP
+. . O
+Sentence NN B-NP
+number NN I-NP
+2 CD I-NP
+, , O
+5 CD B-NP
+words NNS I-NP
+. . O

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/tools/test-model-data/lemmas.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/lemmas.txt b/lucene/analysis/opennlp/src/tools/test-model-data/lemmas.txt
new file mode 100644
index 0000000..be02176
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/lemmas.txt
@@ -0,0 +1,875 @@
+Showers	NNS	shower
+continued	VBD	continue
+throughout	IN	throughout
+the	DT	the
+week	NN	week
+in	IN	in
+the	DT	the
+Bahia	NNP	bahia
+cocoa	NN	cocoa
+zone	NN	zone
+,	,	,
+alleviating	VBG	alleviate
+the	DT	the
+drought	NN	drought
+since	IN	since
+early	JJ	early
+January	NNP	january
+and	CC	and
+improving	VBG	improve
+prospects	NNS	prospect
+for	IN	for
+the	DT	the
+coming	VBG	come
+temporao	NN	temporao
+,	,	,
+although	IN	although
+normal	JJ	normal
+humidity	NN	humidity
+levels	NNS	level
+have	VBP	have
+not	RB	not
+been	VBN	be
+restored	VBN	restore
+,	,	,
+Comissaria	NNP	comissaria
+Smith	NNP	smith
+said	VBD	say
+in	IN	in
+its	PRP$	its
+weekly	JJ	weekly
+review	NN	review
+.	.	.
+
+The	DT	the
+dry	JJ	dry
+period	NN	period
+means	VBZ	mean
+the	DT	the
+temporao	NN	temporao
+will	MD	will
+be	VB	be
+late	RB	late
+this	DT	this
+year	NN	year
+.	.	.
+
+Arrivals	NNS	arrival
+for	IN	for
+the	DT	the
+week	NN	week
+ended	VBN	end
+February	NNP	february
+22	CD	22
+were	VBD	be
+155	CD	155
+bags	NNS	bag
+of	IN	of
+60	CD	60
+kilos	NN	kilo
+making	VBG	make
+a	DT	a
+cumulative	JJ	cumulative
+total	NN	total
+for	IN	for
+the	DT	the
+season	NN	season
+of	IN	of
+5	CD	5
+mln	NN	mln
+against	IN	against
+5	CD	5
+at	IN	at
+the	DT	the
+same	JJ	same
+stage	NN	stage
+last	JJ	last
+year	NN	year
+.	.	.
+
+Again	RB	again
+it	PRP	it
+seems	VBZ	seem
+that	IN	that
+cocoa	NN	cocoa
+delivered	VBN	deliver
+earlier	RBR	early
+on	IN	on
+consignment	NN	consignment
+was	VBD	be
+included	VBN	include
+in	IN	in
+the	DT	the
+arrivals	NNS	arrival
+figures	NNS	figure
+.	.	.
+
+Comissaria	NNP	comissaria
+Smith	NNP	smith
+said	VBD	say
+there	EX	there
+is	VBZ	be
+still	RB	still
+some	DT	some
+doubt	NN	doubt
+as	IN	as
+to	TO	to
+how	WRB	how
+much	JJ	much
+old	JJ	old
+crop	NN	crop
+cocoa	NN	cocoa
+is	VBZ	be
+still	RB	still
+available	JJ	available
+as	IN	as
+harvesting	NN	harvesting
+has	VBZ	has
+practically	RB	practically
+come	VBN	come
+to	TO	to
+an	DT	an
+end	NN	end
+.	.	.
+
+With	IN	with
+total	JJ	total
+Bahia	NNP	bahia
+crop	NN	crop
+estimates	NNS	estimate
+around	IN	around
+6	CD	6
+mln	NN	mln
+bags	NNS	bag
+and	CC	and
+sales	NNS	sale
+standing	VBG	stand
+at	IN	at
+almost	RB	almost
+6	CD	6
+mln	NN	mln
+there	EX	there
+are	VBP	are
+a	DT	a
+few	JJ	few
+hundred	CD	hundred
+thousand	CD	thousand
+bags	NNS	bag
+still	RB	still
+in	IN	in
+the	DT	the
+hands	NNS	hand
+of	IN	of
+farmers	NNS	farmer
+,	,	,
+middlemen	NNS	middleman
+,	,	,
+exporters	NNS	exporter
+and	CC	and
+processors	NNS	processor
+.	.	.
+
+There	EX	there
+are	VBP	are
+doubts	NNS	doubt
+as	IN	as
+to	TO	to
+how	WRB	how
+much	RB	much
+of	IN	of
+this	DT	this
+cocoa	NN	cocoa
+would	MD	would
+be	VB	be
+fit	NN	fit
+for	IN	for
+export	NN	export
+as	IN	as
+shippers	NNS	shipper
+are	VBP	are
+now	RB	now
+experiencing	VBG	experience
+dificulties	NNS	dificulty
+in	IN	in
+obtaining	VBG	obtain
++	+	+
+Bahia	NNP	bahia
+superior	JJ	superior
++	+	+
+certificates	NNS	certificate
+.	.	.
+
+In	IN	in
+view	NN	view
+of	IN	of
+the	DT	the
+lower	JJR	low
+quality	NN	quality
+over	IN	over
+recent	JJ	recent
+weeks	NNS	week
+farmers	NNS	farmer
+have	VBP	have
+sold	VBN	sold
+a	DT	a
+good	JJ	good
+part	NN	part
+of	IN	of
+their	PRP$	their
+cocoa	NN	cocoa
+held	VBN	held
+on	IN	on
+consignment	NN	consignment
+.	.	.
+
+Comissaria	NNP	comissaria
+Smith	NNP	smith
+said	VBD	say
+spot	NN	spot
+bean	NN	bean
+prices	NNS	price
+rose	VBD	rise
+to	TO	to
+340	CD	340
+to	TO	to
+350	CD	350
+cruzados	NN	cruzado
+per	IN	per
+arroba	NN	arroba
+of	IN	of
+15	CD	15
+kilos	NN	kilo
+.	.	.
+
+Bean	NNP	bean
+shippers	NNS	shipper
+were	VBD	be
+reluctant	JJ	reluctant
+to	TO	to
+offer	VB	offer
+nearby	JJ	nearby
+shipment	NN	shipment
+and	CC	and
+only	RB	only
+limited	JJ	limited
+sales	NNS	sale
+were	VBD	be
+booked	VBN	book
+for	IN	for
+March	NNP	march
+shipment	NN	shipment
+at	IN	at
+1	CD	1
+to	TO	to
+1	CD	1
+dlrs	NNS	dlr
+per	IN	per
+tonne	NN	tonne
+to	TO	to
+ports	NNS	port
+to	TO	to
+be	VB	be
+named	VBN	name
+.	.	.
+
+New	JJ	new
+crop	NN	crop
+sales	NNS	sale
+were	VBD	be
+also	RB	also
+light	JJ	light
+and	CC	and
+all	DT	all
+to	TO	to
+open	JJ	open
+ports	NNS	port
+with	IN	with
+June	NNP	june
+/	/	/
+July	NNP	july
+going	VBG	go
+at	IN	at
+1	CD	1
+and	CC	and
+1	CD	1
+dlrs	NNS	dlr
+and	CC	and
+at	IN	at
+35	CD	35
+and	CC	and
+45	CD	45
+dlrs	NNS	dlr
+under	IN	under
+New	NNP	New
+York	NNP	York
+july	NN	july
+,	,	,
+Aug	NNP	Aug
+/	/	/
+Sept	NNP	Sept
+at	IN	at
+1	CD	1
+,	,	,
+1	CD	1
+and	CC	and
+1	CD	1
+dlrs	NNS	dlr
+per	IN	per
+tonne	NN	tonne
+FOB	NNP	FOB
+.	.	.
+
+Routine	JJ	routine
+sales	NNS	sale
+of	IN	of
+butter	NN	butter
+were	VBD	be
+made	VBN	make
+.	.	.
+
+March	NNP	march
+/	/	/
+April	NNP	april
+sold	VBD	sell
+at	IN	at
+4	CD	4
+,	,	,
+4	CD	4
+and	CC	and
+4	CD	4
+dlrs	NNS	dlr
+.	.	.
+
+April	NNP	april
+/	/	/
+May	NNP	may
+butter	NN	butter
+went	VBD	went
+at	IN	at
+2	CD	2
+times	NNS	time
+New	NNP	new
+York	NNP	york
+May	NNP	may
+,	,	,
+June	NNP	june
+/	/	/
+July	NNP	july
+at	IN	at
+4	CD	4
+and	CC	and
+4	CD	4
+dlrs	NNS	dlr
+,	,	,
+Aug	NNP	aug
+/	/	/
+Sept	NNP	sept
+at	IN	at
+4	CD	4
+to	TO	to
+4	CD	4
+dlrs	NNS	dlr
+and	CC	and
+at	IN	at
+2	CD	2
+and	CC	and
+2	CD	2
+times	NNS	time
+New	NNP	new
+York	NNP	york
+Sept	NNP	sept
+and	CC	and
+Oct	NNP	oct
+/	/	/
+Dec	NNP	dec
+at	IN	at
+4	CD	4
+dlrs	NNS	dlr
+and	CC	and
+2	CD	2
+times	NNS	time
+New	NNP	new
+York	NNP	york
+Dec	NNP	dec
+,	,	,
+Comissaria	NNP	comissaria
+Smith	NNP	smith
+said	VBD	say
+.	.	.
+
+Destinations	NNS	destination
+were	VBD	be
+the	DT	the
+U.S.	NNP	u.s.
+,	,	,
+Covertible	JJ	covertible
+currency	NN	currency
+areas	NNS	area
+,	,	,
+Uruguay	NNP	uruguay
+and	CC	and
+open	JJ	open
+ports	NNS	port
+.	.	.
+
+Cake	NNP	cake
+sales	NNS	sale
+were	VBD	be
+registered	VBN	register
+at	IN	at
+785	CD	785
+to	TO	to
+995	CD	995
+dlrs	NNS	dlr
+for	IN	for
+March	NNP	march
+/	/	/
+April	NNP	april
+,	,	,
+785	CD	785
+dlrs	NNS	dlr
+for	IN	for
+May	NNP	may
+,	,	,
+753	CD	753
+dlrs	NNS	dlr
+for	IN	for
+Aug	NNP	aug
+and	CC	and
+0	CD	0
+times	NNS	time
+New	NNP	new
+York	NNP	york
+Dec	NNP	dec
+for	IN	for
+Oct	NNP	oct
+/	/	/
+Dec	NNP	dec
+.	.	.
+
+Buyers	NNS	buyer
+were	VBD	be
+the	DT	the
+U.S.	NNP	u.s.
+,	,	,
+Argentina	NNP	argentina
+,	,	,
+Uruguay	NNP	uruguay
+and	CC	and
+convertible	JJ	convertible
+currency	NN	currency
+areas	NNS	area
+.	.	.
+
+Liquor	NNP	liquor
+sales	NNS	sale
+were	VBD	be
+limited	VBN	limit
+with	IN	with
+March	NNP	march
+/	/	/
+April	NNP	april
+selling	VBG	sell
+at	IN	at
+2	CD	2
+and	CC	and
+2	CD	2
+dlrs	NNS	dlr
+,	,	,
+June	NNP	june
+/	/	/
+July	NNP	july
+at	IN	at
+2	CD	2
+dlrs	NNS	dlr
+and	CC	and
+at	IN	at
+1	CD	1
+times	NNS	time
+New	NNP	new
+York	NNP	york
+July	NNP	july
+,	,	,
+Aug	NNP	aug
+/	/	/
+Sept	NNP	sept
+at	IN	at
+2	CD	2
+dlrs	NNS	dlr
+and	CC	and
+at	IN	at
+1	CD	1
+times	NNS	time
+New	NNP	new
+York	NNP	york
+Sept	NNP	sept
+and	CC	and
+Oct	NNP	oct
+/	/	/
+Dec	NNP	dec
+at	IN	at
+1	CD	1
+times	NNS	time
+New	NNP	new
+York	NNP	york
+Dec	NNP	dec
+,	,	,
+Comissaria	NNP	comissaria
+Smith	NNP	smith
+said	VBD	say
+.	.	.
+
+Total	JJ	total
+Bahia	NN	bahia
+sales	NNS	sale
+are	VBP	be
+currently	RB	currently
+estimated	VBN	estimate
+at	IN	at
+6	CD	6
+mln	NN	mln
+bags	NNS	bag
+against	IN	against
+the	DT	the
+1986/87	CD	1986/87
+crop	NN	crop
+and	CC	and
+1	CD	1
+mln	NN	mln
+bags	NNS	baga
+against	IN	against
+the	DT	the
+1987/88	CD	1987/88
+crop	NN	crop
+.	.	.
+
+Final	JJ	final
+figures	NNS	figure
+for	IN	for
+the	DT	the
+period	NN	period
+to	TO	to
+February	NNP	february
+28	CD	28
+are	VBP	be
+expected	VBN	expect
+to	TO	to
+be	VB	be
+published	VBN	publish
+by	IN	by
+the	DT	the
+Brazilian	JJ	brazilian
+Cocoa	NNP	cocoa
+Trade	NNP	trade
+Commission	NNP	commission
+after	IN	after
+carnival	NN	carnival
+which	WDT	which
+ends	VBZ	end
+midday	NN	midday
+on	IN	on
+February	NNP	february
+27	CD	27
+.	.	.
+
+Iran	NNP	iran
+announced	VBD	announce
+tonight	NN	tonight
+that	IN	that
+its	PRP$	its
+major	JJ	major
+offensive	NN	offensive
+against	IN	against
+Iraq	NNP	iraq
+in	IN	in
+the	DT	the
+Gulf	NNP	gulf
+war	NN	war
+had	VBD	have
+ended	VBN	end
+after	IN	after
+dealing	VBG	deal
+savage	JJ	savage
+blows	NNS	blow
+against	IN	against
+the	DT	the
+Baghdad	NNP	baghdad
+government	NN	government
+.	.	.
+
+The	DT	the
+Iranian	JJ	iranian
+news	NN	news
+agency	NN	agency
+IRNA	NNP	irna
+,	,	,
+in	IN	in
+a	DT	a
+report	NN	report
+received	VBN	receive
+in	IN	in
+London	NNP	London
+,	,	,
+said	VBD	say
+the	DT	the
+operation	NN	operation
+code	NNP-named	code
+Karbala-5	NNP	karbala-5
+launched	VBD	launch
+into	IN	into
+Iraq	NNP	iraq
+on	IN	on
+January	NNP	january
+9	CD	9
+was	VBD	be
+now	RB	now
+over	RP	over
+.	.	.
+
+It	PRP	it
+quoted	VBD	quote
+a	DT	a
+joint	NN	joint
+statewment	NN	statement
+by	IN	by
+the	DT	the
+Iranian	JJ	iranian
+Army	NNP	army
+and	CC	and
+Revolutionary	NNP	revolutionary
+Guards	NNPS	guards
+Corps	NNP	corps
+as	IN	as
+saying	VBG	say
+that	IN	that
+their	PRP$	their
+forces	NNS	force
+had	VBD	have
+dealt	VBD	deal
+one	CD	one
+of	IN	of
+the	DT	the
+severest	JJS	severe
+blows	NNS	blow
+on	IN	on
+the	DT	the
+Iraqi	JJ	iraqi
+war	NN	war
+machine	NN	machine
+in	IN	in
+the	DT	the
+history	NN	history
+of	IN	of
+the	DT	the
+Iraq-imposed	JJ	iraq-imposed
+war	NN	war
+.	.	.
+
+The	DT	the
+statement	NN	statement
+by	IN	by
+the	DT	the
+Iranian	JJ	iranian
+High	NNP	high
+Command	NNP	command
+appeared	VBD	appear
+to	TO	to
+herald	VB	herald
+the	DT	the
+close	NN	close
+of	IN	of
+an	DT	an
+assault	NN	assault
+on	IN	on
+the	DT	the
+port	JJ	port
+city	NN	city
+of	IN	of
+Basra	NNP	basra
+in	IN	in
+southern	JJ	southern
+Iraq	NNP	iraq
+.	.	.
+
+The	DT	the
+operation	NN	operation
+was	VBD	be
+launched	VBN	launch
+at	IN	at
+a	DT	a
+time	NN	time
+when	WRB	when
+the	DT	the
+Baghdad	NNP	baghdad
+government	NN	government
+was	VBD	be
+spreading	VBG	spread
+extensive	JJ	extensive
+propaganda	NN	propaganda
+on	IN	on
+the	DT	the
+resistance	NN	resistance
+power	NN	power
+of	IN	of
+its	PRP$	its
+army	NN	army
+:	...	:
+,	,	,
+said	VBD	say
+the	DT	the
+statement	NN	statement
+quoted	VBN	quot
+by	IN	by
+IRNA	NNP	irna
+.	.	.
+
+It	PRP	it
+claimed	VBD	claim
+massive	JJ	massive
+victories	NNS	victory
+in	IN	in
+the	DT	the
+seven-week	NN	seven-week
+offensive	JJ	offensive
+and	CC	and
+called	VBN	call
+on	IN	on
+supporters	NNS	supporter
+of	IN	of
+Baghdad	NNP	baghdad
+to	TO	to
+come	VB	come
+to	TO	to
+their	PRP$	their
+senses	NNS	sense
+and	CC	and
+discontinue	VB	discontinue
+support	NN	support
+for	IN	for
+what	WP	what
+it	PRP	it
+called	VBD	called
+the	DT	the
+tottering	VBG	totter
+regime	NN	regime
+in	IN	in
+Iraq	NNP	iraq
+.	.	.
+
+Iran	NNP	iran
+said	VBD	say
+its	PRP$	its
+forces	NNS	force
+had	VBD	have
+liberated	JJ	liberate
+155	CD	155
+square	JJ	square
+kilometers	NNS	kilometer
+of	IN	of
+enemy-occupied	JJ	enemy-occupied
+territory	NN	territory
+during	IN	during
+the	DT	the
+1987	CD	1987
+offensive	NN	offensive
+and	CC	and
+taken	VBN	take
+over	IN	over
+islands	NNS	island
+,	,	,
+townships	NNS	township
+,	,	,
+rivers	NNS	river
+and	CC	and
+part	NN	part
+of	IN	of
+a	DT	a
+road	NN	road
+leading	VBG	lead
+into	IN	into
+Basra	NNP	basra
+.	.	.
+
+The	DT	the
+Iranian	JJ	iranian
+forces	NNS	force
+are	VBP	be
+in	IN	in
+full	JJ	full
+control	NN	control
+of	IN	of
+these	DT	these
+areas	NNS	area
+,	,	,
+the	DT	the
+statement	NN	statement
+said	VBD	say
+.	.	.
+
+It	PRP	it
+said	VBD	say
+81	CD	81
+Iraqi	JJ	iraqi
+brigades	NNS	brigade
+and	CC	and
+battalions	NNS	battalion
+were	VBD	be
+totally	RB	totally
+destroyed	VBN	destroy
+,	,	,
+along	IN	along
+with	IN	with
+700	CD	700
+tanks	NNS	tank
+and	CC	and
+1	CD	1
+other	JJ	other
+vehicles	NNS	vehicle
+.	.	.
+
+The	DT	the
+victory	NN	victory
+list	NN	list
+also	RB	also
+included	VBD	include
+80	CD	80
+warplanes	NNS	warplane
+downed	VBD	down
+,	,	,
+250	CD	250
+anti	NN	anti
+:	-	:
+aircraft	NN	aircraft
+guns	NNS	gun
+and	CC	and
+400	CD	400
+pieces	NNS	piece
+of	IN	of
+military	JJ	military
+hardware	NN	hardware
+destroyed	VBN	destroy
+and	CC	and
+the	DT	the
+seizure	NN	seizure
+of	IN	of
+220	CD	220
+tanks	NNS	tank
+and	CC	and
+armoured	JJ	armoured
+personnel	NNS	personnel
+carriers	NNS	carrier
+.	.	.
+They	NNP	they
+sent	VBD	send
+him	PRP	he
+running	VBG	run
+in	IN	in
+the	DT	the
+evening	NN	evening
+.	.	.
+He	PRP	he
+did	VBD	do
+not	RB	not
+come	VB	come
+back	RB	back
+.	.	.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/tools/test-model-data/ner_TrainerParams.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/tools/test-model-data/ner_TrainerParams.txt b/lucene/analysis/opennlp/src/tools/test-model-data/ner_TrainerParams.txt
new file mode 100644
index 0000000..e01ad50
--- /dev/null
+++ b/lucene/analysis/opennlp/src/tools/test-model-data/ner_TrainerParams.txt
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# maxent won't work on small training set. Use perceptron, train on one word.
+
+Algorithm=PERCEPTRON
+Iterations=200
+Cutoff=5
+Threads=2


[08/12] lucene-solr:branch_7x: LUCENE-2899: Add OpenNLP Analysis capabilities as a module

Posted by sa...@apache.org.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/licenses/opennlp-tools-LICENSE-ASL.txt
----------------------------------------------------------------------
diff --git a/lucene/licenses/opennlp-tools-LICENSE-ASL.txt b/lucene/licenses/opennlp-tools-LICENSE-ASL.txt
new file mode 100644
index 0000000..d645695
--- /dev/null
+++ b/lucene/licenses/opennlp-tools-LICENSE-ASL.txt
@@ -0,0 +1,202 @@
+
+                                 Apache License
+                           Version 2.0, January 2004
+                        http://www.apache.org/licenses/
+
+   TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
+
+   1. Definitions.
+
+      "License" shall mean the terms and conditions for use, reproduction,
+      and distribution as defined by Sections 1 through 9 of this document.
+
+      "Licensor" shall mean the copyright owner or entity authorized by
+      the copyright owner that is granting the License.
+
+      "Legal Entity" shall mean the union of the acting entity and all
+      other entities that control, are controlled by, or are under common
+      control with that entity. For the purposes of this definition,
+      "control" means (i) the power, direct or indirect, to cause the
+      direction or management of such entity, whether by contract or
+      otherwise, or (ii) ownership of fifty percent (50%) or more of the
+      outstanding shares, or (iii) beneficial ownership of such entity.
+
+      "You" (or "Your") shall mean an individual or Legal Entity
+      exercising permissions granted by this License.
+
+      "Source" form shall mean the preferred form for making modifications,
+      including but not limited to software source code, documentation
+      source, and configuration files.
+
+      "Object" form shall mean any form resulting from mechanical
+      transformation or translation of a Source form, including but
+      not limited to compiled object code, generated documentation,
+      and conversions to other media types.
+
+      "Work" shall mean the work of authorship, whether in Source or
+      Object form, made available under the License, as indicated by a
+      copyright notice that is included in or attached to the work
+      (an example is provided in the Appendix below).
+
+      "Derivative Works" shall mean any work, whether in Source or Object
+      form, that is based on (or derived from) the Work and for which the
+      editorial revisions, annotations, elaborations, or other modifications
+      represent, as a whole, an original work of authorship. For the purposes
+      of this License, Derivative Works shall not include works that remain
+      separable from, or merely link (or bind by name) to the interfaces of,
+      the Work and Derivative Works thereof.
+
+      "Contribution" shall mean any work of authorship, including
+      the original version of the Work and any modifications or additions
+      to that Work or Derivative Works thereof, that is intentionally
+      submitted to Licensor for inclusion in the Work by the copyright owner
+      or by an individual or Legal Entity authorized to submit on behalf of
+      the copyright owner. For the purposes of this definition, "submitted"
+      means any form of electronic, verbal, or written communication sent
+      to the Licensor or its representatives, including but not limited to
+      communication on electronic mailing lists, source code control systems,
+      and issue tracking systems that are managed by, or on behalf of, the
+      Licensor for the purpose of discussing and improving the Work, but
+      excluding communication that is conspicuously marked or otherwise
+      designated in writing by the copyright owner as "Not a Contribution."
+
+      "Contributor" shall mean Licensor and any individual or Legal Entity
+      on behalf of whom a Contribution has been received by Licensor and
+      subsequently incorporated within the Work.
+
+   2. Grant of Copyright License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      copyright license to reproduce, prepare Derivative Works of,
+      publicly display, publicly perform, sublicense, and distribute the
+      Work and such Derivative Works in Source or Object form.
+
+   3. Grant of Patent License. Subject to the terms and conditions of
+      this License, each Contributor hereby grants to You a perpetual,
+      worldwide, non-exclusive, no-charge, royalty-free, irrevocable
+      (except as stated in this section) patent license to make, have made,
+      use, offer to sell, sell, import, and otherwise transfer the Work,
+      where such license applies only to those patent claims licensable
+      by such Contributor that are necessarily infringed by their
+      Contribution(s) alone or by combination of their Contribution(s)
+      with the Work to which such Contribution(s) was submitted. If You
+      institute patent litigation against any entity (including a
+      cross-claim or counterclaim in a lawsuit) alleging that the Work
+      or a Contribution incorporated within the Work constitutes direct
+      or contributory patent infringement, then any patent licenses
+      granted to You under this License for that Work shall terminate
+      as of the date such litigation is filed.
+
+   4. Redistribution. You may reproduce and distribute copies of the
+      Work or Derivative Works thereof in any medium, with or without
+      modifications, and in Source or Object form, provided that You
+      meet the following conditions:
+
+      (a) You must give any other recipients of the Work or
+          Derivative Works a copy of this License; and
+
+      (b) You must cause any modified files to carry prominent notices
+          stating that You changed the files; and
+
+      (c) You must retain, in the Source form of any Derivative Works
+          that You distribute, all copyright, patent, trademark, and
+          attribution notices from the Source form of the Work,
+          excluding those notices that do not pertain to any part of
+          the Derivative Works; and
+
+      (d) If the Work includes a "NOTICE" text file as part of its
+          distribution, then any Derivative Works that You distribute must
+          include a readable copy of the attribution notices contained
+          within such NOTICE file, excluding those notices that do not
+          pertain to any part of the Derivative Works, in at least one
+          of the following places: within a NOTICE text file distributed
+          as part of the Derivative Works; within the Source form or
+          documentation, if provided along with the Derivative Works; or,
+          within a display generated by the Derivative Works, if and
+          wherever such third-party notices normally appear. The contents
+          of the NOTICE file are for informational purposes only and
+          do not modify the License. You may add Your own attribution
+          notices within Derivative Works that You distribute, alongside
+          or as an addendum to the NOTICE text from the Work, provided
+          that such additional attribution notices cannot be construed
+          as modifying the License.
+
+      You may add Your own copyright statement to Your modifications and
+      may provide additional or different license terms and conditions
+      for use, reproduction, or distribution of Your modifications, or
+      for any such Derivative Works as a whole, provided Your use,
+      reproduction, and distribution of the Work otherwise complies with
+      the conditions stated in this License.
+
+   5. Submission of Contributions. Unless You explicitly state otherwise,
+      any Contribution intentionally submitted for inclusion in the Work
+      by You to the Licensor shall be under the terms and conditions of
+      this License, without any additional terms or conditions.
+      Notwithstanding the above, nothing herein shall supersede or modify
+      the terms of any separate license agreement you may have executed
+      with Licensor regarding such Contributions.
+
+   6. Trademarks. This License does not grant permission to use the trade
+      names, trademarks, service marks, or product names of the Licensor,
+      except as required for reasonable and customary use in describing the
+      origin of the Work and reproducing the content of the NOTICE file.
+
+   7. Disclaimer of Warranty. Unless required by applicable law or
+      agreed to in writing, Licensor provides the Work (and each
+      Contributor provides its Contributions) on an "AS IS" BASIS,
+      WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
+      implied, including, without limitation, any warranties or conditions
+      of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
+      PARTICULAR PURPOSE. You are solely responsible for determining the
+      appropriateness of using or redistributing the Work and assume any
+      risks associated with Your exercise of permissions under this License.
+
+   8. Limitation of Liability. In no event and under no legal theory,
+      whether in tort (including negligence), contract, or otherwise,
+      unless required by applicable law (such as deliberate and grossly
+      negligent acts) or agreed to in writing, shall any Contributor be
+      liable to You for damages, including any direct, indirect, special,
+      incidental, or consequential damages of any character arising as a
+      result of this License or out of the use or inability to use the
+      Work (including but not limited to damages for loss of goodwill,
+      work stoppage, computer failure or malfunction, or any and all
+      other commercial damages or losses), even if such Contributor
+      has been advised of the possibility of such damages.
+
+   9. Accepting Warranty or Additional Liability. While redistributing
+      the Work or Derivative Works thereof, You may choose to offer,
+      and charge a fee for, acceptance of support, warranty, indemnity,
+      or other liability obligations and/or rights consistent with this
+      License. However, in accepting such obligations, You may act only
+      on Your own behalf and on Your sole responsibility, not on behalf
+      of any other Contributor, and only if You agree to indemnify,
+      defend, and hold each Contributor harmless for any liability
+      incurred by, or claims asserted against, such Contributor by reason
+      of your accepting any such warranty or additional liability.
+
+   END OF TERMS AND CONDITIONS
+
+   APPENDIX: How to apply the Apache License to your work.
+
+      To apply the Apache License to your work, attach the following
+      boilerplate notice, with the fields enclosed by brackets "[]"
+      replaced with your own identifying information. (Don't include
+      the brackets!)  The text should be enclosed in the appropriate
+      comment syntax for the file format. We also recommend that a
+      file or class name and description of purpose be included on the
+      same "printed page" as the copyright notice for easier
+      identification within third-party archives.
+
+   Copyright [yyyy] [name of copyright owner]
+
+   Licensed under the Apache License, Version 2.0 (the "License");
+   you may not use this file except in compliance with the License.
+   You may obtain a copy of the License at
+
+       http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing, software
+   distributed under the License is distributed on an "AS IS" BASIS,
+   WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+   See the License for the specific language governing permissions and
+   limitations under the License.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/licenses/opennlp-tools-NOTICE.txt
----------------------------------------------------------------------
diff --git a/lucene/licenses/opennlp-tools-NOTICE.txt b/lucene/licenses/opennlp-tools-NOTICE.txt
new file mode 100644
index 0000000..68a08dc
--- /dev/null
+++ b/lucene/licenses/opennlp-tools-NOTICE.txt
@@ -0,0 +1,6 @@
+
+Apache OpenNLP Tools
+Copyright 2015 The Apache Software Foundation
+
+This product includes software developed at
+The Apache Software Foundation (http://www.apache.org/).

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/module-build.xml
----------------------------------------------------------------------
diff --git a/lucene/module-build.xml b/lucene/module-build.xml
index d48ae37..c2159b6 100644
--- a/lucene/module-build.xml
+++ b/lucene/module-build.xml
@@ -285,6 +285,28 @@
     <property name="analyzers-icu-javadocs.uptodate" value="true"/>
   </target>
 
+  <property name="analyzers-opennlp.jar" value="${common.dir}/build/analysis/opennlp/lucene-analyzers-opennlp-${version}.jar"/>
+  <target name="check-analyzers-opennlp-uptodate" unless="analyzers-opennlp.uptodate">
+    <module-uptodate name="analysis/opennlp" jarfile="${analyzers-opennlp.jar}" property="analyzers-opennlp.uptodate"/>
+  </target>
+  <target name="jar-analyzers-opennlp" unless="analyzers-opennlp.uptodate" depends="check-analyzers-opennlp-uptodate">
+    <ant dir="${common.dir}/analysis/opennlp" target="jar-core" inheritAll="false">
+      <propertyset refid="uptodate.and.compiled.properties"/>
+    </ant>
+    <property name="analyzers-opennlp.uptodate" value="true"/>
+  </target>
+
+  <property name="analyzers-opennlp-javadoc.jar" value="${common.dir}/build/analysis/opennlp/lucene-analyzers-opennlp-${version}-javadoc.jar"/>
+  <target name="check-analyzers-opennlp-javadocs-uptodate" unless="analyzers-opennlp-javadocs.uptodate">
+    <module-uptodate name="analysis/opennlp" jarfile="${analyzers-opennlp-javadoc.jar}" property="analyzers-opennlp-javadocs.uptodate"/>
+  </target>
+  <target name="javadocs-analyzers-opennlp" unless="analyzers-opennlp-javadocs.uptodate" depends="check-analyzers-opennlp-javadocs-uptodate">
+    <ant dir="${common.dir}/analysis/opennlp" target="javadocs" inheritAll="false">
+      <propertyset refid="uptodate.and.compiled.properties"/>
+    </ant>
+    <property name="analyzers-opennlp-javadocs.uptodate" value="true"/>
+  </target>
+
   <property name="analyzers-phonetic.jar" value="${common.dir}/build/analysis/phonetic/lucene-analyzers-phonetic-${version}.jar"/>
   <target name="check-analyzers-phonetic-uptodate" unless="analyzers-phonetic.uptodate">
     <module-uptodate name="analysis/phonetic" jarfile="${analyzers-phonetic.jar}" property="analyzers-phonetic.uptodate"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
----------------------------------------------------------------------
diff --git a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
index 070eab2..3e1e375 100644
--- a/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
+++ b/lucene/test-framework/src/java/org/apache/lucene/analysis/BaseTokenStreamTestCase.java
@@ -41,6 +41,7 @@ import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeFactory;
 import org.apache.lucene.util.AttributeImpl;
 import org.apache.lucene.util.AttributeReflector;
+import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.BytesRefBuilder;
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
@@ -127,7 +128,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
   //     lastStartOffset)
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
                                                int posLengths[], Integer finalOffset, Integer finalPosInc, boolean[] keywordAtts,
-                                               boolean offsetsAreCorrect) throws IOException {
+                                               boolean offsetsAreCorrect, byte[][] payloads) throws IOException {
     assertNotNull(output);
     CheckClearAttributesAttribute checkClearAtt = ts.addAttribute(CheckClearAttributesAttribute.class);
     
@@ -166,6 +167,12 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
       assertTrue("has no KeywordAttribute", ts.hasAttribute(KeywordAttribute.class));
       keywordAtt = ts.getAttribute(KeywordAttribute.class);
     }
+
+    PayloadAttribute payloadAtt = null;
+    if (payloads != null) {
+      assertTrue("has no PayloadAttribute", ts.hasAttribute(PayloadAttribute.class));
+      payloadAtt = ts.getAttribute(PayloadAttribute.class);
+    }
     
     // Maps position to the start/end offset:
     final Map<Integer,Integer> posToStartOffset = new HashMap<>();
@@ -185,6 +192,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
       if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
       if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
       if (keywordAtt != null) keywordAtt.setKeyword((i&1) == 0);
+      if (payloadAtt != null) payloadAtt.setPayload(new BytesRef(new byte[] { 0x00, -0x21, 0x12, -0x43, 0x24 }));
       
       checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
       assertTrue("token "+i+" does not exist", ts.incrementToken());
@@ -209,7 +217,14 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
       if (keywordAtts != null) {
         assertEquals("keywordAtt " + i + " term=" + termAtt, keywordAtts[i], keywordAtt.isKeyword());
       }
-      
+      if (payloads != null) {
+        if (payloads[i] != null) {
+          assertEquals("payloads " + i, new BytesRef(payloads[i]), payloadAtt.getPayload());
+        } else {
+          assertNull("payloads " + i, payloads[i]);
+        }
+      }
+
       // we can enforce some basic things about a few attributes even if the caller doesn't check:
       if (offsetAtt != null) {
         final int startOffset = offsetAtt.startOffset();
@@ -283,7 +298,9 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
     if (typeAtt != null) typeAtt.setType("bogusType");
     if (posIncrAtt != null) posIncrAtt.setPositionIncrement(45987657);
     if (posLengthAtt != null) posLengthAtt.setPositionLength(45987653);
-    
+    if (keywordAtt != null) keywordAtt.setKeyword(true);
+    if (payloadAtt != null) payloadAtt.setPayload(new BytesRef(new byte[] { 0x00, -0x21, 0x12, -0x43, 0x24 }));
+
     checkClearAtt.getAndResetClearCalled(); // reset it, because we called clearAttribute() before
 
     ts.end();
@@ -305,7 +322,7 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[],
                                                int posLengths[], Integer finalOffset, boolean[] keywordAtts,
                                                boolean offsetsAreCorrect) throws IOException {
-    assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, null, offsetsAreCorrect);
+    assertTokenStreamContents(ts, output, startOffsets, endOffsets, types, posIncrements, posLengths, finalOffset, null, keywordAtts, offsetsAreCorrect, null);
   }
 
   public static void assertTokenStreamContents(TokenStream ts, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], Integer finalOffset, boolean offsetsAreCorrect) throws IOException {
@@ -373,7 +390,12 @@ public abstract class BaseTokenStreamTestCase extends LuceneTestCase {
     checkAnalysisConsistency(random(), a, true, input, offsetsAreCorrect);
     assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length(), offsetsAreCorrect);
   }
-  
+
+  public static void assertAnalyzesTo(Analyzer a, String input, String[] output, int startOffsets[], int endOffsets[], String types[], int posIncrements[], int posLengths[], boolean offsetsAreCorrect, byte[][] payloads) throws IOException {
+    checkResetException(a, input);
+    assertTokenStreamContents(a.tokenStream("dummy", input), output, startOffsets, endOffsets, types, posIncrements, posLengths, input.length(), null, null, offsetsAreCorrect, payloads);
+  }
+
   public static void assertAnalyzesTo(Analyzer a, String input, String[] output) throws IOException {
     assertAnalyzesTo(a, input, output, null, null, null, null, null);
   }

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index d59de38..9e4a663 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -33,6 +33,13 @@ New Features
 ----------------------
 * SOLR-11285: Simulation framework for autoscaling. (ab)
 
+* LUCENE-2899: In the Solr analysis-extras contrib, added support for the
+  OpenNLP-based analysis components in the Lucene analysis/opennlp module:
+  tokenization, part-of-speech tagging, phrase chunking, and lemmatization.
+  Also added OpenNLP-based named entity extraction as a Solr update request
+  processor.  (Lance Norskog, Grant Ingersoll, Joern Kottmann, Em, Kai Gülzau,
+  Rene Nederhand, Robert Muir, Steven Bower, Steve Rowe)
+
 Optimizations
 ----------------------
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/contrib/analysis-extras/README.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/README.txt b/solr/contrib/analysis-extras/README.txt
index 3863420..fb8381a 100644
--- a/solr/contrib/analysis-extras/README.txt
+++ b/solr/contrib/analysis-extras/README.txt
@@ -1,8 +1,10 @@
 The analysis-extras plugin provides additional analyzers that rely
 upon large dependencies/dictionaries.
 
-It includes integration with ICU for multilingual support, and 
-analyzers for Chinese and Polish.
+It includes integration with ICU for multilingual support,
+analyzers for Chinese and Polish, and integration with
+OpenNLP for multilingual tokenization, part-of-speech tagging
+lemmatization, phrase chunking, and named-entity recognition.
 
 ICU relies upon lucene-libs/lucene-analyzers-icu-X.Y.jar
 and lib/icu4j-X.Y.jar
@@ -13,4 +15,6 @@ Stempel relies on lucene-libs/lucene-analyzers-stempel-X.Y.jar
 
 Morfologik relies on lucene-libs/lucene-analyzers-morfologik-X.Y.jar
 and lib/morfologik-*.jar
- 
+
+OpenNLP relies on lucene-libs/lucene-analyzers-opennlp-X.Y.jar
+and lib/opennlp-*.jar

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/contrib/analysis-extras/build.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/build.xml b/solr/contrib/analysis-extras/build.xml
index 38d67dd..68a88ad 100644
--- a/solr/contrib/analysis-extras/build.xml
+++ b/solr/contrib/analysis-extras/build.xml
@@ -7,9 +7,9 @@
     The ASF licenses this file to You under the Apache License, Version 2.0
     the "License"); you may not use this file except in compliance with
     the License.  You may obtain a copy of the License at
- 
+
         http://www.apache.org/licenses/LICENSE-2.0
- 
+
     Unless required by applicable law or agreed to in writing, software
     distributed under the License is distributed on an "AS IS" BASIS,
     WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
@@ -24,19 +24,20 @@
   </description>
 
   <import file="../contrib-build.xml"/>
-  
+
   <target name="compile-test" depends="-compile-test-lucene-analysis,common-solr.compile-test"/>
 
   <path id="analysis.extras.lucene.libs">
     <pathelement location="${analyzers-icu.jar}"/>
-    <!-- 
-      Although the smartcn, stempel, and morfologik jars are not dependencies of
+    <!--
+      Although the smartcn, stempel, morfologik and opennlp jars are not dependencies of
       code in the analysis-extras contrib, they must remain here in order to
       populate the Solr distribution
      -->
     <pathelement location="${analyzers-smartcn.jar}"/>
     <pathelement location="${analyzers-stempel.jar}"/>
     <pathelement location="${analyzers-morfologik.jar}"/>
+    <pathelement location="${analyzers-opennlp.jar}"/>
   </path>
 
   <path id="classpath">
@@ -53,12 +54,12 @@
     </dirset>
   </path>
 
-  <!-- 
-    Although the smartcn, stempel, and morfologik jars are not dependencies of
+  <!--
+    Although the smartcn, stempel, morfologik and opennlp jars are not dependencies of
     code in the analysis-extras contrib, they must remain here in order to
     populate the Solr distribution
    -->
-  <target name="module-jars-to-solr" 
+  <target name="module-jars-to-solr"
           depends="-module-jars-to-solr-not-for-package,-module-jars-to-solr-package"/>
   <target name="-module-jars-to-solr-not-for-package" unless="called.from.create-package">
     <antcall inheritall="true">
@@ -66,6 +67,7 @@
       <target name="jar-analyzers-smartcn"/>
       <target name="jar-analyzers-stempel"/>
       <target name="jar-analyzers-morfologik"/>
+      <target name="jar-analyzers-opennlp"/>
     </antcall>
     <property name="analyzers-icu.uptodate" value="true"/> <!-- compile-time dependency -->
     <mkdir dir="${build.dir}/lucene-libs"/>
@@ -85,6 +87,6 @@
     </copy>
   </target>
 
-  <target name="compile-core" depends="jar-analyzers-icu, solr-contrib-build.compile-core"/>
+  <target name="compile-core" depends="jar-analyzers-icu, jar-analyzers-opennlp, solr-contrib-build.compile-core"/>
   <target name="dist" depends="module-jars-to-solr, common-solr.dist"/>
 </project>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/contrib/analysis-extras/ivy.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/ivy.xml b/solr/contrib/analysis-extras/ivy.xml
index 0c71701..cfc30c1 100644
--- a/solr/contrib/analysis-extras/ivy.xml
+++ b/solr/contrib/analysis-extras/ivy.xml
@@ -24,6 +24,9 @@
   </configurations>
   <dependencies>
     <dependency org="com.ibm.icu" name="icu4j" rev="${/com.ibm.icu/icu4j}" conf="compile"/>
+    <dependency org="org.apache.opennlp" name="opennlp-tools" rev="${/org.apache.opennlp/opennlp-tools}" conf="compile" />
+    <dependency org="org.apache.opennlp" name="opennlp-maxent" rev="${/org.apache.opennlp/opennlp-maxent}" conf="compile" />
+
     <!--
       Although the 3rd party morfologik jars are not dependencies of code in
       the analysis-extras contrib, they must remain here in order to

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java b/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
new file mode 100644
index 0000000..d00df2b
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/OpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
@@ -0,0 +1,571 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import static org.apache.solr.common.SolrException.ErrorCode.SERVER_ERROR;
+
+import java.io.IOException;
+import java.lang.invoke.MethodHandles;
+import java.util.ArrayList;
+import java.util.Collection;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.HashSet;
+import java.util.List;
+import java.util.Map;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
+import java.util.regex.PatternSyntaxException;
+
+import opennlp.tools.util.Span;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.OpenNLPTokenizer;
+import org.apache.lucene.analysis.opennlp.tools.NLPNERTaggerOp;
+import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.solr.common.SolrException;
+import org.apache.solr.common.SolrInputDocument;
+import org.apache.solr.common.SolrInputField;
+import org.apache.solr.common.util.NamedList;
+import org.apache.solr.common.util.Pair;
+import org.apache.solr.core.SolrCore;
+import org.apache.solr.request.SolrQueryRequest;
+import org.apache.solr.response.SolrQueryResponse;
+import org.apache.solr.schema.FieldType;
+import org.apache.solr.update.AddUpdateCommand;
+import org.apache.solr.update.processor.FieldMutatingUpdateProcessor.FieldNameSelector;
+import org.apache.solr.update.processor.FieldMutatingUpdateProcessorFactory.SelectorParams;
+import org.apache.solr.util.plugin.SolrCoreAware;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+/**
+ * Extracts named entities using an OpenNLP NER <code>modelFile</code> from the values found in
+ * any matching <code>source</code> field into a configured <code>dest</code> field, after
+ * first tokenizing the source text using the index analyzer on the configured
+ * <code>analyzerFieldType</code>, which must include <code>solr.OpenNLPTokenizerFactory</code>
+ * as the tokenizer. E.g.:
+ *
+ * <pre class="prettyprint">
+ *   &lt;fieldType name="opennlp-en-tokenization" class="solr.TextField"&gt;
+ *     &lt;analyzer&gt;
+ *       &lt;tokenizer class="solr.OpenNLPTokenizerFactory"
+ *                  sentenceModel="en-sent.bin"
+ *                  tokenizerModel="en-tokenizer.bin"/&gt;
+ *     &lt;/analyzer&gt;
+ *   &lt;/fieldType&gt;
+ * </pre>
+ * 
+ * <p>See the <a href="OpenNLP website">http://opennlp.apache.org/models.html</a>
+ * for information on downloading pre-trained models.</p>
+ *
+ * <p>
+ * The <code>source</code> field(s) can be configured as either:
+ * </p>
+ * <ul>
+ *  <li>One or more <code>&lt;str&gt;</code></li>
+ *  <li>An <code>&lt;arr&gt;</code> of <code>&lt;str&gt;</code></li>
+ *  <li>A <code>&lt;lst&gt;</code> containing
+ *   {@link FieldMutatingUpdateProcessor FieldMutatingUpdateProcessorFactory style selector arguments}</li>
+ * </ul>
+ *
+ * <p>The <code>dest</code> field can be a single <code>&lt;str&gt;</code>
+ * containing the literal name of a destination field, or it may be a <code>&lt;lst&gt;</code> specifying a
+ * regex <code>pattern</code> and a <code>replacement</code> string. If the pattern + replacement option
+ * is used the pattern will be matched against all fields matched by the source selector, and the replacement
+ * string (including any capture groups specified from the pattern) will be evaluated a using
+ * {@link Matcher#replaceAll(String)} to generate the literal name of the destination field.  Additionally,
+ * an occurrence of the string "{EntityType}" in the <code>dest</code> field specification, or in the
+ * <code>replacement</code> string, will be replaced with the entity type(s) returned for each entity by
+ * the OpenNLP NER model; as a result, if the model extracts more than one entity type, then more than one
+ * <code>dest</code> field will be populated.
+ * </p>
+ *
+ * <p>If the resolved <code>dest</code> field already exists in the document, then the
+ * named entities extracted from the <code>source</code> fields will be added to it.
+ * </p>
+ * <p>
+ * In the example below:
+ * </p>
+ * <ul>
+ *   <li>Named entities will be extracted from the <code>text</code> field and added
+ *       to the <code>names_ss</code> field</li>
+ *   <li>Named entities will be extracted from both the <code>title</code> and
+ *       <code>subtitle</code> fields and added into the <code>titular_people</code> field</li>
+ *   <li>Named entities will be extracted from any field with a name ending in <code>_txt</code>
+ *       -- except for <code>notes_txt</code> -- and added into the <code>people_ss</code> field</li>
+ *   <li>Named entities will be extracted from any field with a name beginning with "desc" and
+ *       ending in "s" (e.g. "descs" and "descriptions") and added to a field prefixed with "key_",
+ *       not ending in "s", and suffixed with "_people". (e.g. "key_desc_people" or
+ *       "key_description_people")</li>
+ *   <li>Named entities will be extracted from the <code>summary</code> field and added
+ *       to the <code>summary_person_ss</code> field, assuming that the modelFile only extracts
+ *       entities of type "person".</li>
+ * </ul>
+ *
+ * <pre class="prettyprint">
+ * &lt;updateRequestProcessorChain name="multiple-extract"&gt;
+ *   &lt;processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"&gt;
+ *     &lt;str name="modelFile"&gt;en-test-ner-person.bin&lt;/str&gt;
+ *     &lt;str name="analyzerFieldType"&gt;opennlp-en-tokenization&lt;/str&gt;
+ *     &lt;str name="source"&gt;text&lt;/str&gt;
+ *     &lt;str name="dest"&gt;people_s&lt;/str&gt;
+ *   &lt;/processor&gt;
+ *   &lt;processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"&gt;
+ *     &lt;str name="modelFile"&gt;en-test-ner-person.bin&lt;/str&gt;
+ *     &lt;str name="analyzerFieldType"&gt;opennlp-en-tokenization&lt;/str&gt;
+ *     &lt;arr name="source"&gt;
+ *       &lt;str&gt;title&lt;/str&gt;
+ *       &lt;str&gt;subtitle&lt;/str&gt;
+ *     &lt;/arr&gt;
+ *     &lt;str name="dest"&gt;titular_people&lt;/str&gt;
+ *   &lt;/processor&gt;
+ *   &lt;processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"&gt;
+ *     &lt;str name="modelFile"&gt;en-test-ner-person.bin&lt;/str&gt;
+ *     &lt;str name="analyzerFieldType"&gt;opennlp-en-tokenization&lt;/str&gt;
+ *     &lt;lst name="source"&gt;
+ *       &lt;str name="fieldRegex"&gt;.*_txt$&lt;/str&gt;
+ *       &lt;lst name="exclude"&gt;
+ *         &lt;str name="fieldName"&gt;notes_txt&lt;/str&gt;
+ *       &lt;/lst&gt;
+ *     &lt;/lst&gt;
+ *     &lt;str name="dest"&gt;people_s&lt;/str&gt;
+ *   &lt;/processor&gt;
+ *   &lt;processor class="solr.processor.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"&gt;
+ *     &lt;str name="modelFile"&gt;en-test-ner-person.bin&lt;/str&gt;
+ *     &lt;str name="analyzerFieldType"&gt;opennlp-en-tokenization&lt;/str&gt;
+ *     &lt;lst name="source"&gt;
+ *       &lt;str name="fieldRegex"&gt;^desc(.*)s$&lt;/str&gt;
+ *     &lt;/lst&gt;
+ *     &lt;lst name="dest"&gt;
+ *       &lt;str name="pattern"&gt;^desc(.*)s$&lt;/str&gt;
+ *       &lt;str name="replacement"&gt;key_desc$1_people&lt;/str&gt;
+ *     &lt;/lst&gt;
+ *   &lt;/processor&gt;
+ *   &lt;processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory"&gt;
+ *     &lt;str name="modelFile"&gt;en-test-ner-person.bin&lt;/str&gt;
+ *     &lt;str name="analyzerFieldType"&gt;opennlp-en-tokenization&lt;/str&gt;
+ *     &lt;str name="source"&gt;summary&lt;/str&gt;
+ *     &lt;str name="dest"&gt;summary_{EntityType}_s&lt;/str&gt;
+ *   &lt;/processor&gt;
+ * &lt;/updateRequestProcessorChain&gt;
+ * </pre>
+ *
+ * @since 7.3.0
+ */
+public class OpenNLPExtractNamedEntitiesUpdateProcessorFactory
+    extends UpdateRequestProcessorFactory implements SolrCoreAware {
+
+  private static final Logger log = LoggerFactory.getLogger(MethodHandles.lookup().lookupClass());
+
+  public static final String SOURCE_PARAM = "source";
+  public static final String DEST_PARAM = "dest";
+  public static final String PATTERN_PARAM = "pattern";
+  public static final String REPLACEMENT_PARAM = "replacement";
+  public static final String MODEL_PARAM = "modelFile";
+  public static final String ANALYZER_FIELD_TYPE_PARAM = "analyzerFieldType";
+  public static final String ENTITY_TYPE = "{EntityType}";
+
+  private SelectorParams srcInclusions = new SelectorParams();
+  private Collection<SelectorParams> srcExclusions = new ArrayList<>();
+
+  private FieldNameSelector srcSelector = null;
+
+  private String modelFile = null;
+  private String analyzerFieldType = null;
+
+  /**
+   * If pattern is null, this this is a literal field name.  If pattern is non-null then this
+   * is a replacement string that may contain meta-characters (ie: capture group identifiers)
+   * @see #pattern
+   */
+  private String dest = null;
+  /** @see #dest */
+  private Pattern pattern = null;
+
+  protected final FieldNameSelector getSourceSelector() {
+    if (null != srcSelector) return srcSelector;
+
+    throw new SolrException(SERVER_ERROR, "selector was never initialized, inform(SolrCore) never called???");
+  }
+
+  @SuppressWarnings("unchecked")
+  @Override
+  public void init(NamedList args) {
+
+    // high level (loose) check for which type of config we have.
+    //
+    // individual init methods do more strict syntax checking
+    if (0 <= args.indexOf(SOURCE_PARAM, 0) && 0 <= args.indexOf(DEST_PARAM, 0) ) {
+      initSourceSelectorSyntax(args);
+    } else if (0 <= args.indexOf(PATTERN_PARAM, 0) && 0 <= args.indexOf(REPLACEMENT_PARAM, 0)) {
+      initSimpleRegexReplacement(args);
+    } else {
+      throw new SolrException(SERVER_ERROR, "A combination of either '" + SOURCE_PARAM + "' + '"+
+          DEST_PARAM + "', or '" + REPLACEMENT_PARAM + "' + '" +
+          PATTERN_PARAM + "' init params are mandatory");
+    }
+
+    Object modelParam = args.remove(MODEL_PARAM);
+    if (null == modelParam) {
+      throw new SolrException(SERVER_ERROR, "Missing required init param '" + MODEL_PARAM + "'");
+    }
+    if ( ! (modelParam instanceof CharSequence)) {
+      throw new SolrException(SERVER_ERROR, "Init param '" + MODEL_PARAM + "' must be a <str>");
+    }
+    modelFile = modelParam.toString();
+
+    Object analyzerFieldTypeParam = args.remove(ANALYZER_FIELD_TYPE_PARAM);
+    if (null == analyzerFieldTypeParam) {
+      throw new SolrException(SERVER_ERROR, "Missing required init param '" + ANALYZER_FIELD_TYPE_PARAM + "'");
+    }
+    if ( ! (analyzerFieldTypeParam instanceof CharSequence)) {
+      throw new SolrException(SERVER_ERROR, "Init param '" + ANALYZER_FIELD_TYPE_PARAM + "' must be a <str>");
+    }
+    analyzerFieldType = analyzerFieldTypeParam.toString();
+
+    if (0 < args.size()) {
+      throw new SolrException(SERVER_ERROR, "Unexpected init param(s): '" + args.getName(0) + "'");
+    }
+
+    super.init(args);
+  }
+
+  /**
+   * init helper method that should only be called when we know for certain that both the
+   * "source" and "dest" init params do <em>not</em> exist.
+   */
+  @SuppressWarnings("unchecked")
+  private void initSimpleRegexReplacement(NamedList args) {
+    // The syntactic sugar for the case where there is only one regex pattern for source and the same pattern
+    // is used for the destination pattern...
+    //
+    //  pattern != null && replacement != null
+    //
+    // ...as top level elements, with no other config options specified
+
+    // if we got here we know we had pattern and replacement, now check for the other two  so that we can give a better
+    // message than "unexpected"
+    if (0 <= args.indexOf(SOURCE_PARAM, 0) || 0 <= args.indexOf(DEST_PARAM, 0) ) {
+      throw new SolrException(SERVER_ERROR,"Short hand syntax must not be mixed with full syntax. Found " +
+          PATTERN_PARAM + " and " + REPLACEMENT_PARAM + " but also found " + SOURCE_PARAM + " or " + DEST_PARAM);
+    }
+
+    assert args.indexOf(SOURCE_PARAM, 0) < 0;
+
+    Object patt = args.remove(PATTERN_PARAM);
+    Object replacement = args.remove(REPLACEMENT_PARAM);
+
+    if (null == patt || null == replacement) {
+      throw new SolrException(SERVER_ERROR, "Init params '" + PATTERN_PARAM + "' and '" +
+          REPLACEMENT_PARAM + "' are both mandatory if '" + SOURCE_PARAM + "' and '"+
+          DEST_PARAM + "' are not both specified");
+    }
+
+    if (0 != args.size()) {
+      throw new SolrException(SERVER_ERROR, "Init params '" + REPLACEMENT_PARAM + "' and '" +
+          PATTERN_PARAM + "' must be children of '" + DEST_PARAM +
+          "' to be combined with other options.");
+    }
+
+    if (!(replacement instanceof String)) {
+      throw new SolrException(SERVER_ERROR, "Init param '" + REPLACEMENT_PARAM + "' must be a string (i.e. <str>)");
+    }
+    if (!(patt instanceof String)) {
+      throw new SolrException(SERVER_ERROR, "Init param '" + PATTERN_PARAM + "' must be a string (i.e. <str>)");
+    }
+
+    dest = replacement.toString();
+    try {
+      this.pattern = Pattern.compile(patt.toString());
+    } catch (PatternSyntaxException pe) {
+      throw new SolrException(SERVER_ERROR, "Init param " + PATTERN_PARAM +
+          " is not a valid regex pattern: " + patt, pe);
+
+    }
+    srcInclusions = new SelectorParams();
+    srcInclusions.fieldRegex = Collections.singletonList(this.pattern);
+  }
+
+  /**
+   * init helper method that should only be called when we know for certain that both the
+   * "source" and "dest" init params <em>do</em> exist.
+   */
+  @SuppressWarnings("unchecked")
+  private void initSourceSelectorSyntax(NamedList args) {
+    // Full and complete syntax where source and dest are mandatory.
+    //
+    // source may be a single string or a selector.
+    // dest may be a single string or list containing pattern and replacement
+    //
+    //   source != null && dest != null
+
+    // if we got here we know we had source and dest, now check for the other two so that we can give a better
+    // message than "unexpected"
+    if (0 <= args.indexOf(PATTERN_PARAM, 0) || 0 <= args.indexOf(REPLACEMENT_PARAM, 0) ) {
+      throw new SolrException(SERVER_ERROR,"Short hand syntax must not be mixed with full syntax. Found " +
+          SOURCE_PARAM + " and " + DEST_PARAM + " but also found " + PATTERN_PARAM + " or " + REPLACEMENT_PARAM);
+    }
+
+    Object d = args.remove(DEST_PARAM);
+    assert null != d;
+
+    List<Object> sources = args.getAll(SOURCE_PARAM);
+    assert null != sources;
+
+    if (1 == sources.size()) {
+      if (sources.get(0) instanceof NamedList) {
+        // nested set of selector options
+        NamedList selectorConfig = (NamedList) args.remove(SOURCE_PARAM);
+
+        srcInclusions = parseSelectorParams(selectorConfig);
+
+        List<Object> excList = selectorConfig.getAll("exclude");
+
+        for (Object excObj : excList) {
+          if (null == excObj) {
+            throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM +
+                "' child 'exclude' can not be null");
+          }
+          if (!(excObj instanceof NamedList)) {
+            throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM +
+                "' child 'exclude' must be <lst/>");
+          }
+          NamedList exc = (NamedList) excObj;
+          srcExclusions.add(parseSelectorParams(exc));
+          if (0 < exc.size()) {
+            throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM +
+                "' has unexpected 'exclude' sub-param(s): '"
+                + selectorConfig.getName(0) + "'");
+          }
+          // call once per instance
+          selectorConfig.remove("exclude");
+        }
+
+        if (0 < selectorConfig.size()) {
+          throw new SolrException(SERVER_ERROR, "Init param '" + SOURCE_PARAM +
+              "' contains unexpected child param(s): '" +
+              selectorConfig.getName(0) + "'");
+        }
+        // consume from the named list so it doesn't interfere with subsequent processing
+        sources.remove(0);
+      }
+    }
+    if (1 <= sources.size()) {
+      // source better be one or more strings
+      srcInclusions.fieldName = new HashSet<>(args.removeConfigArgs("source"));
+    }
+    if (srcInclusions == null) {
+      throw new SolrException(SERVER_ERROR,
+          "Init params do not specify any field from which to extract entities, please supply either "
+          + SOURCE_PARAM + " and " + DEST_PARAM + " or " + PATTERN_PARAM + " and " + REPLACEMENT_PARAM + ". See javadocs" +
+          "for OpenNLPExtractNamedEntitiesUpdateProcessor for further details.");
+    }
+
+    if (d instanceof NamedList) {
+      NamedList destList = (NamedList) d;
+
+      Object patt = destList.remove(PATTERN_PARAM);
+      Object replacement = destList.remove(REPLACEMENT_PARAM);
+
+      if (null == patt || null == replacement) {
+        throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' children '" +
+            PATTERN_PARAM + "' and '" + REPLACEMENT_PARAM +
+            "' are both mandatory and can not be null");
+      }
+      if (! (patt instanceof String && replacement instanceof String)) {
+        throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' children '" +
+            PATTERN_PARAM + "' and '" + REPLACEMENT_PARAM +
+            "' must both be strings (i.e. <str>)");
+      }
+      if (0 != destList.size()) {
+        throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' has unexpected children: '"
+            + destList.getName(0) + "'");
+      }
+
+      try {
+        this.pattern = Pattern.compile(patt.toString());
+      } catch (PatternSyntaxException pe) {
+        throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' child '" + PATTERN_PARAM +
+            " is not a valid regex pattern: " + patt, pe);
+      }
+      dest = replacement.toString();
+
+    } else if (d instanceof String) {
+      dest = d.toString();
+    } else {
+      throw new SolrException(SERVER_ERROR, "Init param '" + DEST_PARAM + "' must either be a string " +
+          "(i.e. <str>) or a list (i.e. <lst>) containing '" +
+          PATTERN_PARAM + "' and '" + REPLACEMENT_PARAM);
+    }
+
+  }
+
+  @Override
+  public void inform(final SolrCore core) {
+
+    srcSelector =
+        FieldMutatingUpdateProcessor.createFieldNameSelector
+            (core.getResourceLoader(), core, srcInclusions, FieldMutatingUpdateProcessor.SELECT_NO_FIELDS);
+
+    for (SelectorParams exc : srcExclusions) {
+      srcSelector = FieldMutatingUpdateProcessor.wrap
+          (srcSelector,
+              FieldMutatingUpdateProcessor.createFieldNameSelector
+                  (core.getResourceLoader(), core, exc, FieldMutatingUpdateProcessor.SELECT_NO_FIELDS));
+    }
+    try {
+      OpenNLPOpsFactory.getNERTaggerModel(modelFile, core.getResourceLoader());
+    } catch (IOException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+
+  @Override
+  public final UpdateRequestProcessor getInstance
+      (SolrQueryRequest req, SolrQueryResponse rsp, UpdateRequestProcessor next) {
+    final FieldNameSelector srcSelector = getSourceSelector();
+    return new UpdateRequestProcessor(next) {
+      private final NLPNERTaggerOp nerTaggerOp;
+      private Analyzer analyzer = null;
+      {
+        try {
+          nerTaggerOp = OpenNLPOpsFactory.getNERTagger(modelFile);
+          FieldType fieldType = req.getSchema().getFieldTypeByName(analyzerFieldType);
+          if (fieldType == null) {
+            throw new SolrException
+                (SERVER_ERROR, ANALYZER_FIELD_TYPE_PARAM + " '" + analyzerFieldType + "' not found in the schema.");
+          }
+          analyzer = fieldType.getIndexAnalyzer();
+        } catch (IOException e) {
+          throw new IllegalArgumentException(e);
+        }
+      }
+
+      @Override
+      public void processAdd(AddUpdateCommand cmd) throws IOException {
+
+        final SolrInputDocument doc = cmd.getSolrInputDocument();
+
+        // Destination may be regex replace string, or "{EntityType}" replaced by
+        // each entity's type, both of which can cause multiple output fields.
+        Map<String,SolrInputField> destMap = new HashMap<>();
+
+        // preserve initial values
+        for (final String fname : doc.getFieldNames()) {
+          if ( ! srcSelector.shouldMutate(fname)) continue;
+
+          Collection<Object> srcFieldValues = doc.getFieldValues(fname);
+          if (srcFieldValues == null || srcFieldValues.isEmpty()) continue;
+
+          String resolvedDest = dest;
+
+          if (pattern != null) {
+            Matcher matcher = pattern.matcher(fname);
+            if (matcher.find()) {
+              resolvedDest = matcher.replaceAll(dest);
+            } else {
+              log.debug("srcSelector.shouldMutate(\"{}\") returned true, " +
+                  "but replacement pattern did not match, field skipped.", fname);
+              continue;
+            }
+          }
+
+          for (Object val : srcFieldValues) {
+            for (Pair<String,String> entity : extractTypedNamedEntities(val)) {
+              SolrInputField destField = null;
+              String entityName = entity.first();
+              String entityType = entity.second();
+              resolvedDest = resolvedDest.replace(ENTITY_TYPE, entityType);
+              if (doc.containsKey(resolvedDest)) {
+                destField = doc.getField(resolvedDest);
+              } else {
+                SolrInputField targetField = destMap.get(resolvedDest);
+                if (targetField == null) {
+                  destField = new SolrInputField(resolvedDest);
+                } else {
+                  destField = targetField;
+                }
+              }
+              destField.addValue(entityName);
+
+              // put it in map to avoid concurrent modification...
+              destMap.put(resolvedDest, destField);
+            }
+          }
+        }
+
+        for (Map.Entry<String,SolrInputField> entry : destMap.entrySet()) {
+          doc.put(entry.getKey(), entry.getValue());
+        }
+        super.processAdd(cmd);
+      }
+
+      /** Using configured NER model, extracts (name, type) pairs from the given source field value */
+      private List<Pair<String,String>> extractTypedNamedEntities(Object srcFieldValue) throws IOException {
+        List<Pair<String,String>> entitiesWithType = new ArrayList<>();
+        List<String> terms = new ArrayList<>();
+        List<Integer> startOffsets = new ArrayList<>();
+        List<Integer> endOffsets = new ArrayList<>();
+        String fullText = srcFieldValue.toString();
+        TokenStream tokenStream = analyzer.tokenStream("", fullText);
+        CharTermAttribute termAtt = tokenStream.addAttribute(CharTermAttribute.class);
+        OffsetAttribute offsetAtt = tokenStream.addAttribute(OffsetAttribute.class);
+        FlagsAttribute flagsAtt = tokenStream.addAttribute(FlagsAttribute.class);
+        tokenStream.reset();
+        synchronized (nerTaggerOp) {
+          while (tokenStream.incrementToken()) {
+            terms.add(termAtt.toString());
+            startOffsets.add(offsetAtt.startOffset());
+            endOffsets.add(offsetAtt.endOffset());
+            boolean endOfSentence = 0 != (flagsAtt.getFlags() & OpenNLPTokenizer.EOS_FLAG_BIT);
+            if (endOfSentence) {    // extract named entities one sentence at a time
+              extractEntitiesFromSentence(fullText, terms, startOffsets, endOffsets, entitiesWithType);
+            }
+          }
+          tokenStream.end();
+          tokenStream.close();
+          if (!terms.isEmpty()) { // In case last token of last sentence isn't properly flagged with EOS_FLAG_BIT
+            extractEntitiesFromSentence(fullText, terms, startOffsets, endOffsets, entitiesWithType);
+          }
+          nerTaggerOp.reset();      // Forget all adaptive data collected during previous calls
+        }
+        return entitiesWithType;
+      }
+
+      private void extractEntitiesFromSentence(String fullText, List<String> terms, List<Integer> startOffsets,
+                                               List<Integer> endOffsets, List<Pair<String,String>> entitiesWithType) {
+        for (Span span : nerTaggerOp.getNames(terms.toArray(new String[terms.size()]))) {
+          String text = fullText.substring(startOffsets.get(span.getStart()), endOffsets.get(span.getEnd() - 1));
+          entitiesWithType.add(new Pair<>(text, span.getType()));
+        }
+        terms.clear();
+        startOffsets.clear();
+        endOffsets.clear();
+      }
+    };
+  }
+
+  /** macro */
+  private static SelectorParams parseSelectorParams(NamedList args) {
+    return FieldMutatingUpdateProcessorFactory.parseSelectorParams(args);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/package.html
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/package.html b/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/package.html
new file mode 100644
index 0000000..1388c29
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/java/org/apache/solr/update/processor/package.html
@@ -0,0 +1,24 @@
+<!doctype html public "-//w3c//dtd html 4.0 transitional//en">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+<!-- not a package-info.java, because we already defined this package in core/ -->
+<html>
+  <body>
+    Update request processor invoking OpenNLP Named Entity Recognition over configured
+    source field(s), populating configured target field(s) with the results.
+  </body>
+</html>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-ner-person.bin
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-ner-person.bin b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-ner-person.bin
new file mode 100644
index 0000000..0b40aac
Binary files /dev/null and b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-ner-person.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-sent.bin
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-sent.bin b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-sent.bin
new file mode 100644
index 0000000..4252bcb
Binary files /dev/null and b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-sent.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin
new file mode 100644
index 0000000..94668c0
Binary files /dev/null and b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/en-test-tokenizer.bin differ

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml
new file mode 100644
index 0000000..fc13431
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/schema-opennlp-extract.xml
@@ -0,0 +1,49 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="test-opennlp-extract" version="1.6">
+  <fieldType name="opennlp-en-tokenization" class="solr.TextField">
+    <analyzer>
+      <tokenizer class="solr.OpenNLPTokenizerFactory"
+                 sentenceModel="en-test-sent.bin"
+                 tokenizerModel="en-test-tokenizer.bin"/>
+    </analyzer>
+  </fieldType>
+
+  <fieldType name="string" class="solr.StrField" sortMissingLast="true"/>
+
+  <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+    <analyzer>
+      <tokenizer class="solr.MockTokenizerFactory"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+      <filter class="solr.PorterStemFilterFactory"/>
+    </analyzer>
+  </fieldType>
+
+  <field name="id" type="string" indexed="true" stored="true" multiValued="false" required="true"/>
+  <field name="text" type="text" indexed="true" stored="false"/>
+  <field name="subject" type="text" indexed="true" stored="true"/>
+  <field name="title" type="text" indexed="true" stored="true"/>
+  <field name="subtitle" type="text" indexed="true" stored="true"/>
+  <field name="descs" type="text" indexed="true" stored="true"/>
+  <field name="descriptions" type="text" indexed="true" stored="true"/>
+
+  <dynamicField name="*_txt" type="text" indexed="true" stored="true"/>
+  <dynamicField name="*_s" type="string" indexed="true" stored="true" multiValued="true"/>
+  <dynamicField name="*_people" type="string" indexed="true" stored="true" multiValued="true"/>
+</schema>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml
new file mode 100644
index 0000000..c44c9e1
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig-opennlp-extract.xml
@@ -0,0 +1,206 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<config>
+  <luceneMatchVersion>${tests.luceneMatchVersion:LATEST}</luceneMatchVersion>
+  <xi:include href="solrconfig.snippet.randomindexconfig.xml" xmlns:xi="http://www.w3.org/2001/XInclude"/>
+  <requestHandler name="/select" class="solr.SearchHandler"></requestHandler>
+  <requestHandler name="/update" class="solr.UpdateRequestHandler"  />
+  <directoryFactory name="DirectoryFactory" class="${solr.directoryFactory:solr.RAMDirectoryFactory}"/>
+  <schemaFactory class="ClassicIndexSchemaFactory"/>
+
+  <updateRequestProcessorChain name="extract-single">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">source1_s</str>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-single-regex">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">source1_s</str>
+      <lst name="dest">
+        <str name="pattern">source\d(_s)</str>
+        <str name="replacement">dest$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-multi">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">source1_s</str>
+      <str name="source">source2_s</str>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-multi-regex">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">source1_s</str>
+      <str name="source">source2_s</str>
+      <lst name="dest">
+        <str name="pattern">source\d(_s)</str>
+        <str name="replacement">dest$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-array">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <arr name="source">
+        <str>source1_s</str>
+        <str>source2_s</str>
+      </arr>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-array-regex">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <arr name="source">
+        <str>source1_s</str>
+        <str>source2_s</str>
+      </arr>
+      <lst name="dest">
+        <str name="pattern">source\d(_s)</str>
+        <str name="replacement">dest$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-selector">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">source\d_.*</str>
+        <lst name="exclude">
+          <str name="fieldRegex">source0_.*</str>
+        </lst>
+      </lst>
+      <str name="dest">dest_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-selector-regex">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">source\d_.*</str>
+        <lst name="exclude">
+          <str name="fieldRegex">source0_.*</str>
+        </lst>
+      </lst>
+      <lst name="dest">
+        <str name="pattern">source\d(_s)</str>
+        <str name="replacement">dest$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-regex-replaceall">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">foo.*</str>
+      </lst>
+      <lst name="dest">
+        <!-- unbounded pattern that can be replaced multiple times in field name -->
+        <str name="pattern">x(\d)</str>
+        <str name="replacement">y$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <updateRequestProcessorChain name="extract-regex-replaceall-with-entity-type">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">foo.*</str>
+      </lst>
+      <lst name="dest">
+        <!-- unbounded pattern that can be replaced multiple times in field name -->
+        <str name="pattern">x(\d)</str>
+        <str name="replacement">{EntityType}_y$1</str>
+      </lst>
+    </processor>
+  </updateRequestProcessorChain>
+
+  <!-- example used in OpenNLPExtractNamedEntitiesUpdateProcessorFactory javadocs -->
+  <updateRequestProcessorChain name="multiple-extract">
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">text</str>
+      <str name="dest">people_s</str>
+    </processor>
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <arr name="source">
+        <str>title</str>
+        <str>subtitle</str>
+      </arr>
+      <str name="dest">titular_people</str>
+    </processor>
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">.*_txt$</str>
+        <lst name="exclude">
+          <str name="fieldName">notes_txt</str>
+        </lst>
+      </lst>
+      <str name="dest">people_s</str>
+    </processor>
+    <processor class="solr.processor.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <lst name="source">
+        <str name="fieldRegex">^desc(.*)s$</str>
+      </lst>
+      <lst name="dest">
+        <str name="pattern">^desc(.*)s$</str>
+        <str name="replacement">key_desc$1_people</str>
+      </lst>
+    </processor>
+    <processor class="solr.OpenNLPExtractNamedEntitiesUpdateProcessorFactory">
+      <str name="modelFile">en-test-ner-person.bin</str>
+      <str name="analyzerFieldType">opennlp-en-tokenization</str>
+      <str name="source">summary</str>
+      <str name="dest">summary_{EntityType}_s</str>
+    </processor>
+  </updateRequestProcessorChain>
+</config>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
new file mode 100644
index 0000000..23516b0
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf/solrconfig.snippet.randomindexconfig.xml
@@ -0,0 +1,48 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+A solrconfig.xml snippet containing indexConfig settings for randomized testing.
+-->
+<indexConfig>
+  <!-- this sys property is not set by SolrTestCaseJ4 because we ideally want to use
+       the RandomMergePolicy in all tests - but some tests expect very specific
+       Merge behavior, so those tests can set it as needed.
+  -->
+  <mergePolicyFactory class="${solr.tests.mergePolicyFactory:org.apache.solr.util.RandomMergePolicyFactory}" />
+
+  <useCompoundFile>${useCompoundFile:false}</useCompoundFile>
+
+  <maxBufferedDocs>${solr.tests.maxBufferedDocs}</maxBufferedDocs>
+  <ramBufferSizeMB>${solr.tests.ramBufferSizeMB}</ramBufferSizeMB>
+
+  <mergeScheduler class="${solr.tests.mergeScheduler}" />
+
+  <writeLockTimeout>1000</writeLockTimeout>
+  <commitLockTimeout>10000</commitLockTimeout>
+
+  <!-- this sys property is not set by SolrTestCaseJ4 because almost all tests should
+       use the single process lockType for speed - but tests that explicitly need
+       to vary the lockType can set it as needed.
+  -->
+  <lockType>${solr.tests.lockType:single}</lockType>
+
+  <infoStream>${solr.tests.infostream:false}</infoStream>
+
+</indexConfig>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
----------------------------------------------------------------------
diff --git a/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java b/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
new file mode 100644
index 0000000..dad06a8
--- /dev/null
+++ b/solr/contrib/analysis-extras/src/test/org/apache/solr/update/processor/TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.solr.update.processor;
+
+import java.io.File;
+import java.util.Arrays;
+
+import org.apache.commons.io.FileUtils;
+import org.apache.solr.common.SolrInputDocument;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestOpenNLPExtractNamedEntitiesUpdateProcessorFactory extends UpdateProcessorTestBase {
+
+  @BeforeClass
+  public static void beforeClass() throws Exception {
+    File testHome = createTempDir().toFile();
+    FileUtils.copyDirectory(getFile("analysis-extras/solr"), testHome);
+    initCore("solrconfig-opennlp-extract.xml", "schema-opennlp-extract.xml", testHome.getAbsolutePath());
+  }
+
+  @Test
+  public void testSimpleExtract() throws Exception {
+    SolrInputDocument doc = processAdd("extract-single",
+        doc(f("id", "1"),
+            f("source1_s", "Take this to Mr. Flashman.")));
+    assertEquals("dest_s should have stringValue", "Flashman", doc.getFieldValue("dest_s"));
+  }
+
+  @Test
+  public void testMultiExtract() throws Exception {
+    SolrInputDocument doc = processAdd("extract-multi",
+        doc(f("id", "1"),
+            f("source1_s", "Hello Flashman."),
+            f("source2_s", "Calling Flashman.")));
+
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
+  }
+
+  @Test
+  public void testArrayExtract() throws Exception {
+    SolrInputDocument doc = processAdd("extract-array",
+        doc(f("id", "1"),
+            f("source1_s", "Currently we have Flashman. Not much else."),
+            f("source2_s", "Flashman. Is. Not. There.")));
+
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
+  }
+
+  @Test
+  public void testSelectorExtract() throws Exception {
+    SolrInputDocument doc = processAdd("extract-selector",
+        doc(f("id", "1"),
+            f("source0_s", "Flashman. Or not."),
+            f("source1_s", "Serendipitously, he was. I mean, Flashman. And yet."),
+            f("source2_s", "Correct, Flashman.")));
+
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("dest_s"));
+  }
+
+  public void testMultipleExtracts() throws Exception {
+    // test example from the javadocs
+    SolrInputDocument doc = processAdd("multiple-extract",
+        doc(f("id", "1"),
+            f("text", "From Flashman. To Panman."),
+            f("title", "It's Captain Flashman.", "Privately, Flashman."),
+            f("subtitle", "Ineluctably, Flashman."),
+            f("corrolary_txt", "Forsooth thou bringeth Flashman."),
+            f("notes_txt", "Yes Flashman."),
+            f("summary", "Many aspire to be Flashman."),
+            f("descs", "Courage, Flashman.", "Ain't he Flashman."),
+            f("descriptions", "Flashman. Flashman. Flashman.")));
+
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("people_s"));
+    assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("titular_people"));
+    assertEquals(Arrays.asList("Flashman", "Flashman"), doc.getFieldValues("key_desc_people"));
+    assertEquals(Arrays.asList("Flashman", "Flashman", "Flashman"), doc.getFieldValues("key_description_people"));
+    assertEquals("Flashman", doc.getFieldValue("summary_person_s")); // {EntityType} field name interpolation
+  }
+
+  public void testEquivalentExtraction() throws Exception {
+    SolrInputDocument d;
+
+    // regardless of chain, all of these checks should be equivalent
+    for (String chain : Arrays.asList("extract-single", "extract-single-regex",
+        "extract-multi", "extract-multi-regex",
+        "extract-array", "extract-array-regex",
+        "extract-selector", "extract-selector-regex")) {
+
+      // simple extract
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              f("source0_s", "Totally Flashman."), // not extracted
+              f("source1_s", "One nation under Flashman.", "Good Flashman.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("Flashman", "Flashman"), d.getFieldValues("dest_s"));
+
+      // append to existing values
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              field("dest_s", "orig1", "orig2"),
+              f("source0_s", "Flashman. In totality."), // not extracted
+              f("source1_s", "Two nations under Flashman.", "Meh Flashman.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+    }
+
+    // should be equivalent for any chain matching source1_s and source2_s (but not source0_s)
+    for (String chain : Arrays.asList("extract-multi", "extract-multi-regex",
+        "extract-array", "extract-array-regex",
+        "extract-selector", "extract-selector-regex")) {
+
+      // simple extract
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              f("source0_s", "Not Flashman."), // not extracted
+              f("source1_s", "Could have had a Flashman.", "Bad Flashman."),
+              f("source2_s", "Indubitably Flashman.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("Flashman", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+
+      // append to existing values
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              field("dest_s", "orig1", "orig2"),
+              f("source0_s", "Never Flashman."), // not extracted
+              f("source1_s", "Seeking Flashman.", "Evil incarnate Flashman."),
+              f("source2_s", "Perfunctorily Flashman.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+    }
+
+    // any chain that copies source1_s to dest_s should be equivalent for these assertions
+    for (String chain : Arrays.asList("extract-single", "extract-single-regex",
+        "extract-multi", "extract-multi-regex",
+        "extract-array", "extract-array-regex",
+        "extract-selector", "extract-selector-regex")) {
+
+      // simple extract
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              f("source1_s", "Always Flashman.", "Flashman. Noone else.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("Flashman", "Flashman"), d.getFieldValues("dest_s"));
+
+      // append to existing values
+      d = processAdd(chain,
+          doc(f("id", "1111"),
+              field("dest_s", "orig1", "orig2"),
+              f("source1_s", "Flashman.  And, scene.", "Contemporary Flashman. Yeesh.")));
+      assertNotNull(chain, d);
+      assertEquals(chain, Arrays.asList("orig1", "orig2", "Flashman", "Flashman"), d.getFieldValues("dest_s"));
+    }
+  }
+
+  public void testExtractFieldRegexReplaceAll() throws Exception {
+    SolrInputDocument d = processAdd("extract-regex-replaceall",
+        doc(f("id", "1111"),
+            f("foo_x2_s", "Infrequently Flashman.", "In the words of Flashman."),
+            f("foo_x3_x7_s", "Flashman. Whoa.")));
+
+    assertNotNull(d);
+    assertEquals(Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_y2_s"));
+    assertEquals("Flashman", d.getFieldValue("foo_y3_y7_s"));
+  }
+
+  public void testExtractFieldRegexReplaceAllWithEntityType() throws Exception {
+    SolrInputDocument d = processAdd("extract-regex-replaceall-with-entity-type",
+        doc(f("id", "1111"),
+            f("foo_x2_s", "Infrequently Flashman.", "In the words of Flashman."),
+            f("foo_x3_x7_s", "Flashman. Whoa.")));
+
+    assertNotNull(d);
+    assertEquals(d.getFieldNames().toString(), Arrays.asList("Flashman", "Flashman"), d.getFieldValues("foo_person_y2_s"));
+    assertEquals(d.getFieldNames().toString(),"Flashman", d.getFieldValue("foo_person_y3_person_y7_s"));
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/core/src/test/org/apache/solr/update/processor/UpdateProcessorTestBase.java
----------------------------------------------------------------------
diff --git a/solr/core/src/test/org/apache/solr/update/processor/UpdateProcessorTestBase.java b/solr/core/src/test/org/apache/solr/update/processor/UpdateProcessorTestBase.java
deleted file mode 100644
index d3aa979..0000000
--- a/solr/core/src/test/org/apache/solr/update/processor/UpdateProcessorTestBase.java
+++ /dev/null
@@ -1,168 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-package org.apache.solr.update.processor;
-
-import org.apache.solr.SolrTestCaseJ4;
-import org.apache.solr.common.params.SolrParams;
-import org.apache.solr.common.util.IOUtils;
-import org.apache.solr.common.params.ModifiableSolrParams;
-import org.apache.solr.common.SolrInputDocument;
-import org.apache.solr.common.SolrInputField;
-import org.apache.solr.core.SolrCore;
-import org.apache.solr.request.SolrRequestInfo;
-import org.apache.solr.request.LocalSolrQueryRequest;
-import org.apache.solr.request.SolrQueryRequest;
-import org.apache.solr.response.SolrQueryResponse;
-import org.apache.solr.update.AddUpdateCommand;
-import org.apache.solr.update.CommitUpdateCommand;
-import org.apache.solr.update.DeleteUpdateCommand;
-
-import java.io.IOException;
-
-public class UpdateProcessorTestBase extends SolrTestCaseJ4 {
-
-  /**
-   * Runs a document through the specified chain, and returns the final
-   * document used when the chain is completed (NOTE: some chains may
-   * modify the document in place
-   */
-  protected SolrInputDocument processAdd(final String chain,
-                                         final SolrInputDocument docIn)
-    throws IOException {
-
-    return processAdd(chain, new ModifiableSolrParams(), docIn);
-  }
-
-  /**
-   * Runs a document through the specified chain, and returns the final
-   * document used when the chain is completed (NOTE: some chains may
-   * modify the document in place
-   */
-  protected SolrInputDocument processAdd(final String chain,
-                                         final SolrParams requestParams,
-                                         final SolrInputDocument docIn)
-    throws IOException {
-
-    SolrCore core = h.getCore();
-    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
-    assertNotNull("No Chain named: " + chain, pc);
-
-    SolrQueryResponse rsp = new SolrQueryResponse();
-
-    SolrQueryRequest req = new LocalSolrQueryRequest(core, requestParams);
-    try {
-      SolrRequestInfo.setRequestInfo(new SolrRequestInfo(req, rsp));
-      AddUpdateCommand cmd = new AddUpdateCommand(req);
-      cmd.solrDoc = docIn;
-
-      UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
-      if (null != processor) {
-        // test chain might be empty or short circuited.
-        processor.processAdd(cmd);
-      }
-
-      return cmd.solrDoc;
-    } finally {
-      SolrRequestInfo.clearRequestInfo();
-      req.close();
-    }
-  }
-
-  protected void processCommit(final String chain) throws IOException {
-    SolrCore core = h.getCore();
-    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
-    assertNotNull("No Chain named: " + chain, pc);
-
-    SolrQueryResponse rsp = new SolrQueryResponse();
-
-    SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
-
-    CommitUpdateCommand cmd = new CommitUpdateCommand(req,false);
-    UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
-    try {
-      processor.processCommit(cmd);
-    } finally {
-      req.close();
-    }
-  }
-
-  protected void processDeleteById(final String chain, String id) throws IOException {
-    SolrCore core = h.getCore();
-    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
-    assertNotNull("No Chain named: " + chain, pc);
-
-    SolrQueryResponse rsp = new SolrQueryResponse();
-
-    SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
-
-    DeleteUpdateCommand cmd = new DeleteUpdateCommand(req);
-    cmd.setId(id);
-    UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
-    try {
-      processor.processDelete(cmd);
-    } finally {
-      req.close();
-    }
-  }
-
-  protected void finish(final String chain) throws IOException {
-    SolrCore core = h.getCore();
-    UpdateRequestProcessorChain pc = core.getUpdateProcessingChain(chain);
-    assertNotNull("No Chain named: " + chain, pc);
-
-    SolrQueryResponse rsp = new SolrQueryResponse();
-    SolrQueryRequest req = new LocalSolrQueryRequest(core, new ModifiableSolrParams());
-
-    UpdateRequestProcessor processor = pc.createProcessor(req, rsp);
-    try {
-      processor.finish();
-    } finally {
-      IOUtils.closeQuietly(processor);
-      req.close();
-    }
-  }
-
-
-  /**
-   * Convenience method for building up SolrInputDocuments
-   */
-  final SolrInputDocument doc(SolrInputField... fields) {
-    SolrInputDocument d = new SolrInputDocument();
-    for (SolrInputField f : fields) {
-      d.put(f.getName(), f);
-    }
-    return d;
-  }
-
-  /**
-   * Convenience method for building up SolrInputFields
-   */
-  final SolrInputField field(String name, Object... values) {
-    SolrInputField f = new SolrInputField(name);
-    for (Object v : values) {
-      f.addValue(v);
-    }
-    return f;
-  }
-
-  /**
-   * Convenience method for building up SolrInputFields with default boost
-   */
-  final SolrInputField f(String name, Object... values) {
-    return field(name, values);
-  }
-}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/solr/licenses/opennlp-maxent-3.0.3.jar.sha1
----------------------------------------------------------------------
diff --git a/solr/licenses/opennlp-maxent-3.0.3.jar.sha1 b/solr/licenses/opennlp-maxent-3.0.3.jar.sha1
new file mode 100644
index 0000000..c3c412f
--- /dev/null
+++ b/solr/licenses/opennlp-maxent-3.0.3.jar.sha1
@@ -0,0 +1 @@
+55e39e6b46e71f35229cdd6950e72d8cce3b5fd4


[12/12] lucene-solr:branch_7x: LUCENE-2899: Add OpenNLP Analysis capabilities as a module

Posted by sa...@apache.org.
LUCENE-2899: Add OpenNLP Analysis capabilities as a module


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/b720e1ee
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/b720e1ee
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/b720e1ee

Branch: refs/heads/branch_7x
Commit: b720e1ee3a524034fb8a8a6188b0b23bf17ff1cb
Parents: 7f30646
Author: Steve Rowe <sa...@apache.org>
Authored: Fri Dec 15 11:24:18 2017 -0500
Committer: Steve Rowe <sa...@apache.org>
Committed: Fri Dec 15 11:24:58 2017 -0500

----------------------------------------------------------------------
 dev-tools/idea/.idea/ant.xml                    |    1 +
 dev-tools/idea/.idea/modules.xml                |    1 +
 dev-tools/idea/.idea/workspace.xml              |   83 +-
 .../idea/lucene/analysis/opennlp/opennlp.iml    |   30 +
 .../contrib/analysis-extras/analysis-extras.iml |    1 +
 .../lucene/analysis/opennlp/pom.xml.template    |   78 +
 .../maven/lucene/analysis/pom.xml.template      |    1 +
 lucene/CHANGES.txt                              |    9 +
 lucene/analysis/README.txt                      |    5 +
 lucene/analysis/build.xml                       |    6 +-
 .../miscellaneous/TypeAsSynonymFilter.java      |   80 +
 .../TypeAsSynonymFilterFactory.java             |   55 +
 ...ache.lucene.analysis.util.TokenFilterFactory |    1 +
 .../analysis/minhash/MinHashFilterTest.java     |    6 +-
 .../TestTypeAsSynonymFilterFactory.java         |   50 +
 lucene/analysis/opennlp/build.xml               |  118 +
 lucene/analysis/opennlp/ivy.xml                 |   29 +
 .../analysis/opennlp/OpenNLPChunkerFilter.java  |  108 +
 .../opennlp/OpenNLPChunkerFilterFactory.java    |   81 +
 .../opennlp/OpenNLPLemmatizerFilter.java        |  123 +
 .../opennlp/OpenNLPLemmatizerFilterFactory.java |   89 +
 .../analysis/opennlp/OpenNLPPOSFilter.java      |   96 +
 .../opennlp/OpenNLPPOSFilterFactory.java        |   71 +
 .../opennlp/OpenNLPSentenceBreakIterator.java   |  224 ++
 .../analysis/opennlp/OpenNLPTokenizer.java      |   98 +
 .../opennlp/OpenNLPTokenizerFactory.java        |   79 +
 .../lucene/analysis/opennlp/package-info.java   |   21 +
 .../analysis/opennlp/tools/NLPChunkerOp.java    |   41 +
 .../analysis/opennlp/tools/NLPLemmatizerOp.java |   80 +
 .../analysis/opennlp/tools/NLPNERTaggerOp.java  |   56 +
 .../analysis/opennlp/tools/NLPPOSTaggerOp.java  |   41 +
 .../opennlp/tools/NLPSentenceDetectorOp.java    |   50 +
 .../analysis/opennlp/tools/NLPTokenizerOp.java  |   48 +
 .../opennlp/tools/OpenNLPOpsFactory.java        |  176 +
 .../analysis/opennlp/tools/package-info.java    |   21 +
 lucene/analysis/opennlp/src/java/overview.html  |   61 +
 ...ache.lucene.analysis.util.TokenFilterFactory |   18 +
 ...apache.lucene.analysis.util.TokenizerFactory |   16 +
 .../lucene/analysis/opennlp/en-test-chunker.bin |  Bin 0 -> 89915 bytes
 .../lucene/analysis/opennlp/en-test-lemmas.dict |   12 +
 .../analysis/opennlp/en-test-lemmatizer.bin     |  Bin 0 -> 7370 bytes
 .../analysis/opennlp/en-test-ner-person.bin     |  Bin 0 -> 1700 bytes
 .../analysis/opennlp/en-test-pos-maxent.bin     |  Bin 0 -> 18424 bytes
 .../lucene/analysis/opennlp/en-test-sent.bin    |  Bin 0 -> 1050 bytes
 .../analysis/opennlp/en-test-tokenizer.bin      |  Bin 0 -> 15096 bytes
 .../TestOpenNLPChunkerFilterFactory.java        |   74 +
 .../TestOpenNLPLemmatizerFilterFactory.java     |  169 +
 .../opennlp/TestOpenNLPPOSFilterFactory.java    |   95 +
 .../TestOpenNLPSentenceBreakIterator.java       |  201 +
 .../opennlp/TestOpenNLPTokenizerFactory.java    |   97 +
 .../src/tools/test-model-data/README.txt        |    6 +
 .../src/tools/test-model-data/chunks.txt        | 3566 ++++++++++++++++++
 .../src/tools/test-model-data/lemmas.txt        |  875 +++++
 .../tools/test-model-data/ner_TrainerParams.txt |   21 +
 .../src/tools/test-model-data/ner_flashman.txt  |  143 +
 .../opennlp/src/tools/test-model-data/pos.txt   |   30 +
 .../src/tools/test-model-data/sentences.txt     |  144 +
 .../src/tools/test-model-data/tokenizer.txt     |   69 +
 .../apache/lucene/analysis/TestStopFilter.java  |    9 +-
 lucene/ivy-versions.properties                  |    3 +
 lucene/licenses/opennlp-maxent-3.0.3.jar.sha1   |    1 +
 lucene/licenses/opennlp-maxent-LICENSE-ASL.txt  |  202 +
 lucene/licenses/opennlp-maxent-NOTICE.txt       |    6 +
 lucene/licenses/opennlp-tools-1.8.3.jar.sha1    |    1 +
 lucene/licenses/opennlp-tools-LICENSE-ASL.txt   |  202 +
 lucene/licenses/opennlp-tools-NOTICE.txt        |    6 +
 lucene/module-build.xml                         |   22 +
 .../analysis/BaseTokenStreamTestCase.java       |   32 +-
 solr/CHANGES.txt                                |    7 +
 solr/contrib/analysis-extras/README.txt         |   10 +-
 solr/contrib/analysis-extras/build.xml          |   20 +-
 solr/contrib/analysis-extras/ivy.xml            |    3 +
 ...ractNamedEntitiesUpdateProcessorFactory.java |  571 +++
 .../apache/solr/update/processor/package.html   |   24 +
 .../collection1/conf/en-test-ner-person.bin     |  Bin 0 -> 1700 bytes
 .../solr/collection1/conf/en-test-sent.bin      |  Bin 0 -> 1050 bytes
 .../solr/collection1/conf/en-test-tokenizer.bin |  Bin 0 -> 15096 bytes
 .../collection1/conf/schema-opennlp-extract.xml |   49 +
 .../conf/solrconfig-opennlp-extract.xml         |  206 +
 .../solrconfig.snippet.randomindexconfig.xml    |   48 +
 ...ractNamedEntitiesUpdateProcessorFactory.java |  192 +
 .../processor/UpdateProcessorTestBase.java      |  168 -
 solr/licenses/opennlp-maxent-3.0.3.jar.sha1     |    1 +
 solr/licenses/opennlp-maxent-LICENSE-ASL.txt    |  202 +
 solr/licenses/opennlp-maxent-NOTICE.txt         |    6 +
 solr/licenses/opennlp-tools-1.8.3.jar.sha1      |    1 +
 solr/licenses/opennlp-tools-LICENSE-ASL.txt     |  202 +
 solr/licenses/opennlp-tools-NOTICE.txt          |    6 +
 .../solr-ref-guide/src/filter-descriptions.adoc |   32 +
 solr/solr-ref-guide/src/language-analysis.adoc  |  208 +
 solr/solr-ref-guide/src/tokenizers.adoc         |    4 +
 .../src/update-request-processors.adoc          |    6 +
 .../processor/UpdateProcessorTestBase.java      |  168 +
 93 files changed, 10040 insertions(+), 232 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/dev-tools/idea/.idea/ant.xml
----------------------------------------------------------------------
diff --git a/dev-tools/idea/.idea/ant.xml b/dev-tools/idea/.idea/ant.xml
index 8723e63..6c7bc8c 100644
--- a/dev-tools/idea/.idea/ant.xml
+++ b/dev-tools/idea/.idea/ant.xml
@@ -11,6 +11,7 @@
     <buildFile url="file://$PROJECT_DIR$/lucene/analysis/icu/build.xml" />
     <buildFile url="file://$PROJECT_DIR$/lucene/analysis/kuromoji/build.xml" />
     <buildFile url="file://$PROJECT_DIR$/lucene/analysis/morfologik/build.xml" />
+    <buildFile url="file://$PROJECT_DIR$/lucene/analysis/opennlp/build.xml" />
     <buildFile url="file://$PROJECT_DIR$/lucene/analysis/phonetic/build.xml" />
     <buildFile url="file://$PROJECT_DIR$/lucene/analysis/smartcn/build.xml" />
     <buildFile url="file://$PROJECT_DIR$/lucene/analysis/stempel/build.xml" />

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/dev-tools/idea/.idea/modules.xml
----------------------------------------------------------------------
diff --git a/dev-tools/idea/.idea/modules.xml b/dev-tools/idea/.idea/modules.xml
index 7ad2a78..4df1000 100644
--- a/dev-tools/idea/.idea/modules.xml
+++ b/dev-tools/idea/.idea/modules.xml
@@ -15,6 +15,7 @@
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/icu/icu.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/kuromoji/kuromoji.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/morfologik/morfologik.iml" />
+      <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/opennlp/opennlp.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/phonetic/phonetic.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/smartcn/smartcn.iml" />
       <module group="Lucene/Analysis" filepath="$PROJECT_DIR$/lucene/analysis/stempel/stempel.iml" />

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/dev-tools/idea/.idea/workspace.xml
----------------------------------------------------------------------
diff --git a/dev-tools/idea/.idea/workspace.xml b/dev-tools/idea/.idea/workspace.xml
index e22108f..11794af 100644
--- a/dev-tools/idea/.idea/workspace.xml
+++ b/dev-tools/idea/.idea/workspace.xml
@@ -44,6 +44,14 @@
       <option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
       <patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
     </configuration>
+    <configuration default="false" name="Module analyzers-opennlp" type="JUnit" factoryName="JUnit">
+      <module name="opennlp" />
+      <option name="TEST_OBJECT" value="pattern" />
+      <option name="WORKING_DIRECTORY" value="file://$PROJECT_DIR$/idea-build/lucene/analysis/opennlp" />
+      <option name="VM_PARAMETERS" value="-ea -DtempDir=temp" />
+      <option name="TEST_SEARCH_SCOPE"><value defaultName="singleModule" /></option>
+      <patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
+    </configuration>
     <configuration default="false" name="Module analyzers-phonetic" type="JUnit" factoryName="JUnit">
       <module name="phonetic" />
       <option name="TEST_OBJECT" value="pattern" />
@@ -333,48 +341,49 @@
       <patterns><pattern testClass=".*\.Test[^.]*|.*\.[^.]*Test" /></patterns>
     </configuration>
 
-    <list size="41">
+    <list size="42">
       <item index="0" class="java.lang.String" itemvalue="JUnit.Lucene core" />
       <item index="1" class="java.lang.String" itemvalue="JUnit.Module analyzers-common" />
       <item index="2" class="java.lang.String" itemvalue="JUnit.Module analyzers-icu" />
       <item index="3" class="java.lang.String" itemvalue="JUnit.Module analyzers-kuromoji" />
       <item index="4" class="java.lang.String" itemvalue="JUnit.Module analyzers-morfologik" />
-      <item index="5" class="java.lang.String" itemvalue="JUnit.Module analyzers-phonetic" />
-      <item index="6" class="java.lang.String" itemvalue="JUnit.Module analyzers-smartcn" />
-      <item index="7" class="java.lang.String" itemvalue="JUnit.Module analyzers-stempel" />
-      <item index="8" class="java.lang.String" itemvalue="JUnit.Module analyzers-uima" />
-      <item index="9" class="java.lang.String" itemvalue="JUnit.Module backward-codecs" />
-      <item index="10" class="java.lang.String" itemvalue="JUnit.Module benchmark" />
-      <item index="11" class="java.lang.String" itemvalue="JUnit.Module classification" />
-      <item index="12" class="java.lang.String" itemvalue="JUnit.Module codecs" />
-      <item index="13" class="java.lang.String" itemvalue="JUnit.Module expressions" />
-      <item index="14" class="java.lang.String" itemvalue="JUnit.Module facet" />
-      <item index="15" class="java.lang.String" itemvalue="JUnit.Module grouping" />
-      <item index="16" class="java.lang.String" itemvalue="JUnit.Module highlighter" />
-      <item index="17" class="java.lang.String" itemvalue="JUnit.Module join" />
-      <item index="18" class="java.lang.String" itemvalue="JUnit.Module memory" />
-      <item index="19" class="java.lang.String" itemvalue="JUnit.Module misc" />
-      <item index="20" class="java.lang.String" itemvalue="JUnit.Module queries" />
-      <item index="21" class="java.lang.String" itemvalue="JUnit.Module queryparser" />
-      <item index="22" class="java.lang.String" itemvalue="JUnit.Module replicator" />
-      <item index="23" class="java.lang.String" itemvalue="JUnit.Module sandbox" />
-      <item index="24" class="java.lang.String" itemvalue="JUnit.Module spatial" />
-      <item index="25" class="java.lang.String" itemvalue="JUnit.Module spatial-extras" />
-      <item index="26" class="java.lang.String" itemvalue="JUnit.Module spatial3d" />
-      <item index="27" class="java.lang.String" itemvalue="JUnit.Module suggest" />
-      <item index="28" class="java.lang.String" itemvalue="Application.solrcloud" />
-      <item index="29" class="java.lang.String" itemvalue="JUnit.Solr core" />
-      <item index="30" class="java.lang.String" itemvalue="JUnit.Solrj" />
-      <item index="31" class="java.lang.String" itemvalue="JUnit.Solr analysis-extras contrib" />
-      <item index="32" class="java.lang.String" itemvalue="JUnit.Solr analytics contrib" />
-      <item index="33" class="java.lang.String" itemvalue="JUnit.Solr clustering contrib" />
-      <item index="34" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler contrib" />
-      <item index="35" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler-extras contrib" />
-      <item index="36" class="java.lang.String" itemvalue="JUnit.Solr extraction contrib" />
-      <item index="37" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
-      <item index="38" class="java.lang.String" itemvalue="JUnit.Solr ltr contrib" />
-      <item index="39" class="java.lang.String" itemvalue="JUnit.Solr uima contrib" />
-      <item index="40" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
+      <item index="5" class="java.lang.String" itemvalue="JUnit.Module analyzers-opennlp" />
+      <item index="6" class="java.lang.String" itemvalue="JUnit.Module analyzers-phonetic" />
+      <item index="7" class="java.lang.String" itemvalue="JUnit.Module analyzers-smartcn" />
+      <item index="8" class="java.lang.String" itemvalue="JUnit.Module analyzers-stempel" />
+      <item index="9" class="java.lang.String" itemvalue="JUnit.Module analyzers-uima" />
+      <item index="10" class="java.lang.String" itemvalue="JUnit.Module backward-codecs" />
+      <item index="11" class="java.lang.String" itemvalue="JUnit.Module benchmark" />
+      <item index="12" class="java.lang.String" itemvalue="JUnit.Module classification" />
+      <item index="13" class="java.lang.String" itemvalue="JUnit.Module codecs" />
+      <item index="14" class="java.lang.String" itemvalue="JUnit.Module expressions" />
+      <item index="15" class="java.lang.String" itemvalue="JUnit.Module facet" />
+      <item index="16" class="java.lang.String" itemvalue="JUnit.Module grouping" />
+      <item index="17" class="java.lang.String" itemvalue="JUnit.Module highlighter" />
+      <item index="18" class="java.lang.String" itemvalue="JUnit.Module join" />
+      <item index="19" class="java.lang.String" itemvalue="JUnit.Module memory" />
+      <item index="20" class="java.lang.String" itemvalue="JUnit.Module misc" />
+      <item index="21" class="java.lang.String" itemvalue="JUnit.Module queries" />
+      <item index="22" class="java.lang.String" itemvalue="JUnit.Module queryparser" />
+      <item index="23" class="java.lang.String" itemvalue="JUnit.Module replicator" />
+      <item index="24" class="java.lang.String" itemvalue="JUnit.Module sandbox" />
+      <item index="25" class="java.lang.String" itemvalue="JUnit.Module spatial" />
+      <item index="26" class="java.lang.String" itemvalue="JUnit.Module spatial-extras" />
+      <item index="27" class="java.lang.String" itemvalue="JUnit.Module spatial3d" />
+      <item index="28" class="java.lang.String" itemvalue="JUnit.Module suggest" />
+      <item index="29" class="java.lang.String" itemvalue="Application.solrcloud" />
+      <item index="30" class="java.lang.String" itemvalue="JUnit.Solr core" />
+      <item index="31" class="java.lang.String" itemvalue="JUnit.Solrj" />
+      <item index="32" class="java.lang.String" itemvalue="JUnit.Solr analysis-extras contrib" />
+      <item index="33" class="java.lang.String" itemvalue="JUnit.Solr analytics contrib" />
+      <item index="34" class="java.lang.String" itemvalue="JUnit.Solr clustering contrib" />
+      <item index="35" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler contrib" />
+      <item index="36" class="java.lang.String" itemvalue="JUnit.Solr dataimporthandler-extras contrib" />
+      <item index="37" class="java.lang.String" itemvalue="JUnit.Solr extraction contrib" />
+      <item index="38" class="java.lang.String" itemvalue="JUnit.Solr langid contrib" />
+      <item index="39" class="java.lang.String" itemvalue="JUnit.Solr ltr contrib" />
+      <item index="40" class="java.lang.String" itemvalue="JUnit.Solr uima contrib" />
+      <item index="41" class="java.lang.String" itemvalue="JUnit.Solr velocity contrib" />
     </list>
   </component>
 </project>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/dev-tools/idea/lucene/analysis/opennlp/opennlp.iml
----------------------------------------------------------------------
diff --git a/dev-tools/idea/lucene/analysis/opennlp/opennlp.iml b/dev-tools/idea/lucene/analysis/opennlp/opennlp.iml
new file mode 100644
index 0000000..7725065
--- /dev/null
+++ b/dev-tools/idea/lucene/analysis/opennlp/opennlp.iml
@@ -0,0 +1,30 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<module type="JAVA_MODULE" version="4">
+  <component name="NewModuleRootManager" inherit-compiler-output="false">
+    <output url="file://$MODULE_DIR$/../../../idea-build/lucene/analysis/opennlp/classes/java" />
+    <output-test url="file://$MODULE_DIR$/../../../idea-build/lucene/analysis/opennlp/classes/test" />
+    <exclude-output />
+    <content url="file://$MODULE_DIR$">
+      <sourceFolder url="file://$MODULE_DIR$/src/java" isTestSource="false" />
+      <sourceFolder url="file://$MODULE_DIR$/src/test" isTestSource="true" />
+      <sourceFolder url="file://$MODULE_DIR$/src/resources" type="java-resource" />
+      <sourceFolder url="file://$MODULE_DIR$/src/test-files" type="java-test-resource" />
+    </content>
+    <orderEntry type="inheritedJdk" />
+    <orderEntry type="sourceFolder" forTests="false" />
+    <orderEntry type="module-library">
+      <library>
+        <CLASSES>
+          <root url="file://$MODULE_DIR$/lib" />
+        </CLASSES>
+        <JAVADOC />
+        <SOURCES />
+        <jarDirectory url="file://$MODULE_DIR$/lib" recursive="false" />
+      </library>
+    </orderEntry>
+    <orderEntry type="library" scope="TEST" name="JUnit" level="project" />
+    <orderEntry type="module" scope="TEST" module-name="lucene-test-framework" />
+    <orderEntry type="module" module-name="analysis-common" />
+    <orderEntry type="module" module-name="lucene-core" />
+  </component>
+</module>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml
----------------------------------------------------------------------
diff --git a/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml b/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml
index 287b46a..7c0c0c1 100644
--- a/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml
+++ b/dev-tools/idea/solr/contrib/analysis-extras/analysis-extras.iml
@@ -37,5 +37,6 @@
     <orderEntry type="module" module-name="lucene-core" />
     <orderEntry type="module" module-name="misc" />
     <orderEntry type="module" module-name="sandbox" />
+    <orderEntry type="module" module-name="opennlp" />
   </component>
 </module>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/dev-tools/maven/lucene/analysis/opennlp/pom.xml.template
----------------------------------------------------------------------
diff --git a/dev-tools/maven/lucene/analysis/opennlp/pom.xml.template b/dev-tools/maven/lucene/analysis/opennlp/pom.xml.template
new file mode 100644
index 0000000..4109a0a
--- /dev/null
+++ b/dev-tools/maven/lucene/analysis/opennlp/pom.xml.template
@@ -0,0 +1,78 @@
+<!--
+  Licensed to the Apache Software Foundation (ASF) under one
+  or more contributor license agreements.  See the NOTICE file
+  distributed with this work for additional information
+  regarding copyright ownership.  The ASF licenses this file
+  to you under the Apache License, Version 2.0 (the
+  "License"); you may not use this file except in compliance
+  with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+  Unless required by applicable law or agreed to in writing,
+  software distributed under the License is distributed on an
+  "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+  KIND, either express or implied.  See the License for the
+  specific language governing permissions and limitations
+  under the License.
+-->
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.lucene</groupId>
+    <artifactId>lucene-parent</artifactId>
+    <version>@version@</version>
+    <relativePath>../../pom.xml</relativePath>
+  </parent>
+  <groupId>org.apache.lucene</groupId>
+  <artifactId>lucene-analyzers-opennlp</artifactId>
+  <packaging>jar</packaging>
+  <name>Lucene OpenNLP integration</name>
+  <description>
+    Lucene OpenNLP integration
+  </description>
+  <properties>
+    <module-directory>lucene/analysis/opennlp</module-directory>
+    <relative-top-level>../../../..</relative-top-level>
+    <module-path>${relative-top-level}/${module-directory}</module-path>
+  </properties>
+  <scm>
+    <connection>scm:git:${vc-anonymous-base-url}</connection>
+    <developerConnection>scm:git:${vc-dev-base-url}</developerConnection>
+    <url>${vc-browse-base-url};f=${module-directory}</url>
+  </scm>
+  <dependencies>
+    <dependency>
+      <!-- lucene-test-framework dependency must be declared before lucene-core -->
+      <groupId>org.apache.lucene</groupId>
+      <artifactId>lucene-test-framework</artifactId>
+      <scope>test</scope>
+    </dependency>
+    @lucene-analyzers-opennlp.internal.dependencies@
+    @lucene-analyzers-opennlp.external.dependencies@
+    @lucene-analyzers-opennlp.internal.test.dependencies@
+    @lucene-analyzers-opennlp.external.test.dependencies@
+  </dependencies>
+  <build>
+    <sourceDirectory>${module-path}/src/java</sourceDirectory>
+    <testSourceDirectory>${module-path}/src/test</testSourceDirectory>
+    <resources>
+      <resource>
+        <directory>${module-path}/src/resources</directory>
+      </resource>
+    </resources>
+    <testResources>
+      <testResource>
+        <directory>${project.build.testSourceDirectory}</directory>
+        <excludes>
+          <exclude>**/*.java</exclude>
+        </excludes>
+      </testResource>
+      <testResource>
+        <directory>${module-path}/src/test-files</directory>
+      </testResource>
+    </testResources>
+  </build>
+</project>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/dev-tools/maven/lucene/analysis/pom.xml.template
----------------------------------------------------------------------
diff --git a/dev-tools/maven/lucene/analysis/pom.xml.template b/dev-tools/maven/lucene/analysis/pom.xml.template
index 9058abf..466ad30 100644
--- a/dev-tools/maven/lucene/analysis/pom.xml.template
+++ b/dev-tools/maven/lucene/analysis/pom.xml.template
@@ -35,6 +35,7 @@
     <module>icu</module>
     <module>kuromoji</module>
     <module>morfologik</module>
+    <module>opennlp</module>
     <module>phonetic</module>
     <module>smartcn</module>
     <module>stempel</module>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/CHANGES.txt
----------------------------------------------------------------------
diff --git a/lucene/CHANGES.txt b/lucene/CHANGES.txt
index 7bd6501..8002b13 100644
--- a/lucene/CHANGES.txt
+++ b/lucene/CHANGES.txt
@@ -10,6 +10,15 @@ API Changes
 * LUCENE-8051: LevensteinDistance renamed to LevenshteinDistance.
   (Pulak Ghosh via Adrien Grand)
 
+New Features
+
+* LUCENE-2899: Add new module analysis/opennlp, with analysis components
+  to perform tokenization, part-of-speech tagging, lemmatization and phrase
+  chunking by invoking the corresponding OpenNLP tools. Named entity
+  recognition is also provided as a Solr update request processor.
+  (Lance Norskog, Grant Ingersoll, Joern Kottmann, Em, Kai Gülzau,
+  Rene Nederhand, Robert Muir, Steven Bower, Steve Rowe)
+
 Improvements
 
 * LUCENE-8081: Allow IndexWriter to opt out of flushing on indexing threads

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/README.txt
----------------------------------------------------------------------
diff --git a/lucene/analysis/README.txt b/lucene/analysis/README.txt
index 7dc7f53..c68584e 100644
--- a/lucene/analysis/README.txt
+++ b/lucene/analysis/README.txt
@@ -28,6 +28,9 @@ lucene-analyzers-kuromoji-XX.jar
 lucene-analyzers-morfologik-XX.jar
   An analyzer using the Morfologik stemming library.
 
+lucene-analyzers-opennlp-XX.jar
+  An analyzer using the OpenNLP natural-language processing library.
+
 lucene-analyzers-phonetic-XX.jar
   An add-on analysis library that provides phonetic encoders via Apache
   Commons-Codec. Note: this module depends on the commons-codec jar 
@@ -49,6 +52,7 @@ common/src/java
 icu/src/java
 kuromoji/src/java
 morfologik/src/java
+opennlp/src/java
 phonetic/src/java
 smartcn/src/java
 stempel/src/java
@@ -59,6 +63,7 @@ common/src/test
 icu/src/test
 kuromoji/src/test
 morfologik/src/test
+opennlp/src/test
 phonetic/src/test
 smartcn/src/test
 stempel/src/test

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/build.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/build.xml b/lucene/analysis/build.xml
index 844f5f3..ed1566c 100644
--- a/lucene/analysis/build.xml
+++ b/lucene/analysis/build.xml
@@ -65,6 +65,10 @@
     <ant dir="morfologik" />
   </target>
 
+  <target name="opennlp">
+    <ant dir="opennlp" />
+  </target>
+
   <target name="phonetic">
     <ant dir="phonetic" />
   </target>
@@ -82,7 +86,7 @@
   </target>
 
   <target name="default" depends="compile"/>
-  <target name="compile" depends="common,icu,kuromoji,morfologik,phonetic,smartcn,stempel,uima" />
+  <target name="compile" depends="common,icu,kuromoji,morfologik,opennlp,phonetic,smartcn,stempel,uima" />
 
   <target name="clean">
     <forall-analyzers target="clean"/>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilter.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilter.java
new file mode 100644
index 0000000..8269d5d
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilter.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.io.IOException;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.PositionIncrementAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * Adds the {@link TypeAttribute#type()} as a synonym,
+ * i.e. another token at the same position, optionally with a specified prefix prepended.
+ */
+public final class TypeAsSynonymFilter extends TokenFilter {
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  private final PositionIncrementAttribute posIncrAtt = addAttribute(PositionIncrementAttribute.class);
+  private final String prefix;
+
+  AttributeSource.State savedToken = null;
+
+
+  public TypeAsSynonymFilter(TokenStream input) {
+    this(input, null);
+  }
+
+  /**
+   * @param input input tokenstream
+   * @param prefix Prepend this string to every token type emitted as token text.
+   *               If null, nothing will be prepended.
+   */
+  public TypeAsSynonymFilter(TokenStream input, String prefix) {
+    super(input);
+    this.prefix = prefix;
+  }
+
+  @Override
+  public boolean incrementToken() throws IOException {
+    if (savedToken != null) {         // Emit last token's type at the same position
+      restoreState(savedToken);
+      savedToken = null;
+      termAtt.setEmpty();
+      if (prefix != null) {
+        termAtt.append(prefix);
+      }
+      termAtt.append(typeAtt.type());
+      posIncrAtt.setPositionIncrement(0);
+      return true;
+    } else if (input.incrementToken()) { // Ho pending token type to emit
+      savedToken = captureState();
+      return true;
+    }
+    return false;
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    savedToken = null;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilterFactory.java b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilterFactory.java
new file mode 100644
index 0000000..69708b7
--- /dev/null
+++ b/lucene/analysis/common/src/java/org/apache/lucene/analysis/miscellaneous/TypeAsSynonymFilterFactory.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link TypeAsSynonymFilter}.
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_type_as_synonym" class="solr.TextField" positionIncrementGap="100"&gt;
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.UAX29URLEmailTokenizerFactory"/&gt;
+ *     &lt;filter class="solr.TypeAsSynonymFilterFactory" prefix="_type_" /&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ *
+ * <p>
+ * If the optional {@code prefix} parameter is used, the specified value will be prepended
+ * to the type, e.g. with prefix="_type_", for a token "example.com" with type "&lt;URL&gt;",
+ * the emitted synonym will have text "_type_&lt;URL&gt;".
+ */
+public class TypeAsSynonymFilterFactory extends TokenFilterFactory {
+  private final String prefix;
+
+  public TypeAsSynonymFilterFactory(Map<String,String> args) {
+    super(args);
+    prefix = get(args, "prefix");  // default value is null
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public TokenStream create(TokenStream input) {
+    return new TypeAsSynonymFilter(input, prefix);
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
index d871ad6..6dcc81c 100644
--- a/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
+++ b/lucene/analysis/common/src/resources/META-INF/services/org.apache.lucene.analysis.util.TokenFilterFactory
@@ -80,6 +80,7 @@ org.apache.lucene.analysis.miscellaneous.RemoveDuplicatesTokenFilterFactory
 org.apache.lucene.analysis.miscellaneous.StemmerOverrideFilterFactory
 org.apache.lucene.analysis.miscellaneous.TrimFilterFactory
 org.apache.lucene.analysis.miscellaneous.TruncateTokenFilterFactory
+org.apache.lucene.analysis.miscellaneous.TypeAsSynonymFilterFactory
 org.apache.lucene.analysis.miscellaneous.WordDelimiterFilterFactory
 org.apache.lucene.analysis.miscellaneous.WordDelimiterGraphFilterFactory
 org.apache.lucene.analysis.miscellaneous.ScandinavianFoldingFilterFactory

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/MinHashFilterTest.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/MinHashFilterTest.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/MinHashFilterTest.java
index a4080fe..1bc6ed7 100644
--- a/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/MinHashFilterTest.java
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/minhash/MinHashFilterTest.java
@@ -183,14 +183,14 @@ public class MinHashFilterTest extends BaseTokenStreamTestCase {
     TokenStream ts = createTokenStream(5, "woof woof woof woof woof", 1, 1, 100, false);
     assertTokenStreamContents(ts, hashes, new int[]{0},
         new int[]{24}, new String[]{MinHashFilter.MIN_HASH_TYPE}, new int[]{1}, new int[]{1}, 24, 0, null,
-        true);
+        true, null);
 
     ts = createTokenStream(5, "woof woof woof woof woof", 2, 1, 1, false);
     assertTokenStreamContents(ts, new String[]{new String(new char[]{0, 0, 8449, 54077, 64133, 32857, 8605, 41409}),
             new String(new char[]{0, 1, 16887, 58164, 39536, 14926, 6529, 17276})}, new int[]{0, 0},
         new int[]{24, 24}, new String[]{MinHashFilter.MIN_HASH_TYPE, MinHashFilter.MIN_HASH_TYPE}, new int[]{1, 0},
         new int[]{1, 1}, 24, 0, null,
-        true);
+        true, null);
   }
 
   @Test
@@ -203,7 +203,7 @@ public class MinHashFilterTest extends BaseTokenStreamTestCase {
         false);
     assertTokenStreamContents(ts, hashes, new int[]{0, 0},
         new int[]{49, 49}, new String[]{MinHashFilter.MIN_HASH_TYPE, MinHashFilter.MIN_HASH_TYPE}, new int[]{1, 0},
-        new int[]{1, 1}, 49, 0, null, true);
+        new int[]{1, 1}, 49, 0, null, true, null);
   }
 
   private ArrayList<String> getTokens(TokenStream ts) throws IOException {

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTypeAsSynonymFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTypeAsSynonymFilterFactory.java b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTypeAsSynonymFilterFactory.java
new file mode 100644
index 0000000..6beb139
--- /dev/null
+++ b/lucene/analysis/common/src/test/org/apache/lucene/analysis/miscellaneous/TestTypeAsSynonymFilterFactory.java
@@ -0,0 +1,50 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.miscellaneous;
+
+import org.apache.lucene.analysis.CannedTokenStream;
+import org.apache.lucene.analysis.Token;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.util.BaseTokenStreamFactoryTestCase;
+
+public class TestTypeAsSynonymFilterFactory extends BaseTokenStreamFactoryTestCase {
+
+  private static final Token[] TOKENS =  { token("Visit", "<ALPHANUM>"), token("example.com", "<URL>") };
+
+  public void testBasic() throws Exception {
+    TokenStream stream = new CannedTokenStream(TOKENS);
+    stream = tokenFilterFactory("TypeAsSynonym").create(stream);
+    assertTokenStreamContents(stream, new String[] { "Visit", "<ALPHANUM>", "example.com", "<URL>" },
+        null, null, new String[] { "<ALPHANUM>", "<ALPHANUM>", "<URL>", "<URL>" }, new int[] { 1, 0, 1, 0 });
+  }
+
+  public void testPrefix() throws Exception {
+    TokenStream stream = new CannedTokenStream(TOKENS);
+    stream = tokenFilterFactory("TypeAsSynonym", "prefix", "_type_").create(stream);
+    assertTokenStreamContents(stream, new String[] { "Visit", "_type_<ALPHANUM>", "example.com", "_type_<URL>" },
+        null, null, new String[] { "<ALPHANUM>", "<ALPHANUM>", "<URL>", "<URL>" }, new int[] { 1, 0, 1, 0 });
+  }
+
+  private static Token token(String term, String type) {
+    Token token = new Token();
+    token.setEmpty();
+    token.append(term);
+    token.setType(type);
+    return token;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/build.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/build.xml b/lucene/analysis/opennlp/build.xml
new file mode 100644
index 0000000..e2cd20a
--- /dev/null
+++ b/lucene/analysis/opennlp/build.xml
@@ -0,0 +1,118 @@
+<?xml version="1.0"?>
+
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+<project name="analyzers-opennlp" default="default">
+
+  <description>
+    OpenNLP Library Integration
+  </description>
+
+  <path id="opennlpjars">
+    <fileset dir="lib"/>
+  </path>
+
+  <property name="test.model.data.dir" location="src/tools/test-model-data"/>
+  <property name="tests.userdir" location="src/test-files"/>
+  <property name="test.model.dir" location="${tests.userdir}/org/apache/lucene/analysis/opennlp"/>
+
+  <import file="../analysis-module-build.xml"/>
+
+  <property name="analysis-extras.conf.dir"
+            location="${common.dir}/../solr/contrib/analysis-extras/src/test-files/analysis-extras/solr/collection1/conf"/>
+
+  <path id="classpath">
+    <pathelement path="${analyzers-common.jar}"/>
+    <path refid="opennlpjars"/>
+    <path refid="base.classpath"/>
+  </path>
+
+  <path id="test.classpath">
+    <path refid="test.base.classpath"/>
+    <pathelement path="${tests.userdir}"/>
+  </path>
+
+  <target name="compile-core" depends="jar-analyzers-common, common.compile-core" />
+
+  <!--
+    This does not create real NLP models, just small unencumbered ones for the unit tests.
+    All text taken from reuters corpus.
+    Tags applied with online demos at CCG Urbana-Champaign.
+    -->
+  <target name="train-test-models" description="Train all small test models for unit tests" depends="resolve">
+    <mkdir dir="${test.model.dir}"/>
+    <!-- https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.sentdetect.training -->
+    <trainModel command="SentenceDetectorTrainer" lang="en" data="sentences.txt" model="en-test-sent.bin"/>
+    <copy file="${test.model.dir}/en-test-sent.bin" todir="${analysis-extras.conf.dir}"/>
+
+    <!-- https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.tokenizer.training -->
+    <trainModel command="TokenizerTrainer" lang="en" data="tokenizer.txt" model="en-test-tokenizer.bin"/>
+    <copy file="${test.model.dir}/en-test-tokenizer.bin" todir="${analysis-extras.conf.dir}"/>
+
+    <!-- https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.postagger.training -->
+    <trainModel command="POSTaggerTrainer" lang="en" data="pos.txt" model="en-test-pos-maxent.bin"/>
+
+    <!-- https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.chunker.training -->
+    <trainModel command="ChunkerTrainerME" lang="en" data="chunks.txt" model="en-test-chunker.bin"/>
+
+    <!-- https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.namefind.training -->
+    <trainModel command="TokenNameFinderTrainer" lang="en" data="ner_flashman.txt" model="en-test-ner-person.bin">
+      <extra-args>
+        <arg value="-params"/>
+        <arg value="ner_TrainerParams.txt"/>
+      </extra-args>
+    </trainModel>
+    <copy file="${test.model.dir}/en-test-ner-person.bin" todir="${analysis-extras.conf.dir}"/>
+
+    <!-- https://opennlp.apache.org/docs/1.8.3/manual/opennlp.html#tools.lemmatizer.training -->
+    <trainModel command="LemmatizerTrainerME" lang="en" data="lemmas.txt" model="en-test-lemmatizer.bin"/>
+  </target>
+
+  <macrodef name="trainModel">
+    <attribute name="command"/>
+    <attribute name="lang"/>
+    <attribute name="data"/>
+    <attribute name="model"/>
+    <element name="extra-args" optional="true"/>
+    <sequential>
+      <java classname="opennlp.tools.cmdline.CLI"
+            dir="${test.model.data.dir}"
+            fork="true"
+            failonerror="true">
+        <classpath>
+          <path refid="opennlpjars"/>
+        </classpath>
+
+        <arg value="@{command}"/>
+
+        <arg value="-lang"/>
+        <arg value="@{lang}"/>
+
+        <arg value="-data"/>
+        <arg value="@{data}"/>
+
+        <arg value="-model"/>
+        <arg value="${test.model.dir}/@{model}"/>
+
+        <extra-args/>
+      </java>
+    </sequential>
+  </macrodef>
+
+  <target name="regenerate" depends="train-test-models"/>
+</project>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/ivy.xml
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/ivy.xml b/lucene/analysis/opennlp/ivy.xml
new file mode 100644
index 0000000..c7b885f
--- /dev/null
+++ b/lucene/analysis/opennlp/ivy.xml
@@ -0,0 +1,29 @@
+<!--
+   Licensed to the Apache Software Foundation (ASF) under one
+   or more contributor license agreements.  See the NOTICE file
+   distributed with this work for additional information
+   regarding copyright ownership.  The ASF licenses this file
+   to you under the Apache License, Version 2.0 (the
+   "License"); you may not use this file except in compliance
+   with the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+   Unless required by applicable law or agreed to in writing,
+   software distributed under the License is distributed on an
+   "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+   KIND, either express or implied.  See the License for the
+   specific language governing permissions and limitations
+   under the License.
+-->
+<ivy-module version="2.0">
+  <info organisation="org.apache.lucene" module="analyzers-opennlp" />
+  <configurations defaultconfmapping="compile->master">
+    <conf name="compile" transitive="false"/>
+  </configurations>
+  <dependencies>
+    <dependency org="org.apache.opennlp" name="opennlp-tools" rev="${/org.apache.opennlp/opennlp-tools}" transitive="false" conf="compile" />
+    <dependency org="org.apache.opennlp" name="opennlp-maxent" rev="${/org.apache.opennlp/opennlp-maxent}" transitive="false" conf="compile" />
+    <exclude org="*" ext="*" matcher="regexp" type="${ivy.exclude.types}" />
+  </dependencies>
+</ivy-module>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilter.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilter.java
new file mode 100644
index 0000000..cfc47e6
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilter.java
@@ -0,0 +1,108 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.tools.NLPChunkerOp;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * Run OpenNLP chunker.  Prerequisite: the OpenNLPTokenizer and OpenNLPPOSFilter must precede this filter.
+ * Tags terms in the TypeAttribute, replacing the POS tags previously put there by OpenNLPPOSFilter.
+ */
+public final class OpenNLPChunkerFilter extends TokenFilter {
+
+  private List<AttributeSource> sentenceTokenAttrs = new ArrayList<>();
+  private int tokenNum = 0;
+  private boolean moreTokensAvailable = true;
+  private String[] sentenceTerms = null;
+  private String[] sentenceTermPOSTags = null;
+
+  private final NLPChunkerOp chunkerOp;
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+  public OpenNLPChunkerFilter(TokenStream input, NLPChunkerOp chunkerOp) {
+    super(input);
+    this.chunkerOp = chunkerOp;
+  }
+
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if ( ! moreTokensAvailable) {
+      clear();
+      return false;
+    }
+    if (tokenNum == sentenceTokenAttrs.size()) {
+      nextSentence();
+      if (sentenceTerms == null) {
+        clear();
+        return false;
+      }
+      assignTokenTypes(chunkerOp.getChunks(sentenceTerms, sentenceTermPOSTags, null));
+      tokenNum = 0;
+    }
+    clearAttributes();
+    sentenceTokenAttrs.get(tokenNum++).copyTo(this);
+    return true;
+  }
+
+  private void nextSentence() throws IOException {
+    List<String> termList = new ArrayList<>();
+    List<String> posTagList = new ArrayList<>();
+    sentenceTokenAttrs.clear();
+    boolean endOfSentence = false;
+    while ( ! endOfSentence && (moreTokensAvailable = input.incrementToken())) {
+      termList.add(termAtt.toString());
+      posTagList.add(typeAtt.type());
+      endOfSentence = 0 != (flagsAtt.getFlags() & OpenNLPTokenizer.EOS_FLAG_BIT);
+      sentenceTokenAttrs.add(input.cloneAttributes());
+    }
+    sentenceTerms = termList.size() > 0 ? termList.toArray(new String[termList.size()]) : null;
+    sentenceTermPOSTags = posTagList.size() > 0 ? posTagList.toArray(new String[posTagList.size()]) : null;
+  }
+
+  private void assignTokenTypes(String[] tags) {
+    for (int i = 0 ; i < tags.length ; ++i) {
+      sentenceTokenAttrs.get(i).getAttribute(TypeAttribute.class).setType(tags[i]);
+    }
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    moreTokensAvailable = true;
+    clear();
+  }
+
+  private void clear() {
+    sentenceTokenAttrs.clear();
+    sentenceTerms = null;
+    sentenceTermPOSTags = null;
+    tokenNum = 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilterFactory.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilterFactory.java
new file mode 100644
index 0000000..96eb672
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPChunkerFilterFactory.java
@@ -0,0 +1,81 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.tools.NLPChunkerOp;
+import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link OpenNLPChunkerFilter}.
+ *
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_opennlp_chunked" class="solr.TextField" positionIncrementGap="100"&gt;
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.OpenNLPTokenizerFactory" sentenceModel="filename" tokenizerModel="filename"/&gt;
+ *     &lt;filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="filename"/&gt;
+ *     &lt;filter class="solr.OpenNLPChunkerFilterFactory" chunkerModel="filename"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ * @since 7.3.0
+ */
+public class OpenNLPChunkerFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+  public static final String CHUNKER_MODEL = "chunkerModel";
+
+  private final String chunkerModelFile;
+
+  public OpenNLPChunkerFilterFactory(Map<String,String> args) {
+    super(args);
+    chunkerModelFile = get(args, CHUNKER_MODEL);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public OpenNLPChunkerFilter create(TokenStream in) {
+    try {
+      NLPChunkerOp chunkerOp = null;
+
+      if (chunkerModelFile != null) {
+        chunkerOp = OpenNLPOpsFactory.getChunker(chunkerModelFile);
+      }
+      return new OpenNLPChunkerFilter(in, chunkerOp);
+    } catch (IOException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+
+  @Override
+  public void inform(ResourceLoader loader) {
+    try {
+      // load and register read-only models in cache with file/resource names
+      if (chunkerModelFile != null) {
+        OpenNLPOpsFactory.getChunkerModel(chunkerModelFile, loader);
+      }
+    } catch (IOException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java
new file mode 100644
index 0000000..4c484b9
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilter.java
@@ -0,0 +1,123 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Iterator;
+import java.util.List;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.tools.NLPLemmatizerOp;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.KeywordAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * <p>Runs OpenNLP dictionary-based and/or MaxEnt lemmatizers.</p>
+ * <p>
+ *   Both a dictionary-based lemmatizer and a MaxEnt lemmatizer are supported,
+ *   via the "dictionary" and "lemmatizerModel" params, respectively.
+ *   If both are configured, the dictionary-based lemmatizer is tried first,
+ *   and then the MaxEnt lemmatizer is consulted for out-of-vocabulary tokens.
+ * </p>
+ * <p>
+ *   The dictionary file must be encoded as UTF-8, with one entry per line,
+ *   in the form <tt>word[tab]lemma[tab]part-of-speech</tt>
+ * </p>
+ */
+public class OpenNLPLemmatizerFilter extends TokenFilter {
+  private final NLPLemmatizerOp lemmatizerOp;
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  private final KeywordAttribute keywordAtt = addAttribute(KeywordAttribute.class);
+  private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
+  private List<AttributeSource> sentenceTokenAttrs = new ArrayList<>();
+  private Iterator<AttributeSource> sentenceTokenAttrsIter = null;
+  private boolean moreTokensAvailable = true;
+  private String[] sentenceTokens = null;     // non-keyword tokens
+  private String[] sentenceTokenTypes = null; // types for non-keyword tokens
+  private String[] lemmas = null;             // lemmas for non-keyword tokens
+  private int lemmaNum = 0;                   // lemma counter
+
+  public OpenNLPLemmatizerFilter(TokenStream input, NLPLemmatizerOp lemmatizerOp) {
+    super(input);
+    this.lemmatizerOp = lemmatizerOp;
+  }
+
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if ( ! moreTokensAvailable) {
+      clear();
+      return false;
+    }
+    if (sentenceTokenAttrsIter == null || ! sentenceTokenAttrsIter.hasNext()) {
+      nextSentence();
+      if (sentenceTokens == null) { // zero non-keyword tokens
+        clear();
+        return false;
+      }
+      lemmas = lemmatizerOp.lemmatize(sentenceTokens, sentenceTokenTypes);
+      lemmaNum = 0;
+      sentenceTokenAttrsIter = sentenceTokenAttrs.iterator();
+    }
+    clearAttributes();
+    sentenceTokenAttrsIter.next().copyTo(this);
+    if ( ! keywordAtt.isKeyword()) {
+      termAtt.setEmpty().append(lemmas[lemmaNum++]);
+    }
+    return true;
+
+  }
+
+  private void nextSentence() throws IOException {
+    List<String> tokenList = new ArrayList<>();
+    List<String> typeList = new ArrayList<>();
+    sentenceTokenAttrs.clear();
+    boolean endOfSentence = false;
+    while ( ! endOfSentence && (moreTokensAvailable = input.incrementToken())) {
+      if ( ! keywordAtt.isKeyword()) {
+        tokenList.add(termAtt.toString());
+        typeList.add(typeAtt.type());
+      }
+      endOfSentence = 0 != (flagsAtt.getFlags() & OpenNLPTokenizer.EOS_FLAG_BIT);
+      sentenceTokenAttrs.add(input.cloneAttributes());
+    }
+    sentenceTokens = tokenList.size() > 0 ? tokenList.toArray(new String[tokenList.size()]) : null;
+    sentenceTokenTypes = typeList.size() > 0 ? typeList.toArray(new String[typeList.size()]) : null;
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    moreTokensAvailable = true;
+    clear();
+  }
+
+  private void clear() {
+    sentenceTokenAttrs.clear();
+    sentenceTokenAttrsIter = null;
+    sentenceTokens = null;
+    sentenceTokenTypes = null;
+    lemmas = null;
+    lemmaNum = 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilterFactory.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilterFactory.java
new file mode 100644
index 0000000..90a0e43
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPLemmatizerFilterFactory.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.tools.NLPLemmatizerOp;
+import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link OpenNLPLemmatizerFilter}.
+ *
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_opennlp_lemma" class="solr.TextField" positionIncrementGap="100"
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.OpenNLPTokenizerFactory"
+ *                sentenceModel="filename"
+ *                tokenizerModel="filename"/&gt;
+ *     /&gt;
+ *     &lt;filter class="solr.OpenNLPLemmatizerFilterFactory"
+ *             dictionary="filename"
+ *             lemmatizerModel="filename"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ * @since 7.3.0
+ */
+public class OpenNLPLemmatizerFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+  public static final String DICTIONARY = "dictionary";
+  public static final String LEMMATIZER_MODEL = "lemmatizerModel";
+
+  private final String dictionaryFile;
+  private final String lemmatizerModelFile;
+
+  public OpenNLPLemmatizerFilterFactory(Map<String,String> args) {
+    super(args);
+    dictionaryFile = get(args, DICTIONARY);
+    lemmatizerModelFile = get(args, LEMMATIZER_MODEL);
+
+    if (dictionaryFile == null && lemmatizerModelFile == null) {
+      throw new IllegalArgumentException("Configuration Error: missing parameter: at least one of '"
+          + DICTIONARY + "' and '" + LEMMATIZER_MODEL + "' must be provided.");
+    }
+
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public OpenNLPLemmatizerFilter create(TokenStream in) {
+    try {
+      NLPLemmatizerOp lemmatizerOp = OpenNLPOpsFactory.getLemmatizer(dictionaryFile, lemmatizerModelFile);
+      return new OpenNLPLemmatizerFilter(in, lemmatizerOp);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  public void inform(ResourceLoader loader) throws IOException {
+    // register models in cache with file/resource names
+    if (dictionaryFile != null) {
+      OpenNLPOpsFactory.getLemmatizerDictionary(dictionaryFile, loader);
+    }
+    if (lemmatizerModelFile != null) {
+      OpenNLPOpsFactory.getLemmatizerModel(lemmatizerModelFile, loader);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilter.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilter.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilter.java
new file mode 100644
index 0000000..a5bea28
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilter.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.lucene.analysis.TokenFilter;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.tools.NLPPOSTaggerOp;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.TypeAttribute;
+import org.apache.lucene.util.AttributeSource;
+
+/**
+ * Run OpenNLP POS tagger.  Tags all terms in the TypeAttribute.
+ */
+public final class OpenNLPPOSFilter extends TokenFilter {
+
+  private List<AttributeSource> sentenceTokenAttrs = new ArrayList<>();
+  String[] tags = null;
+  private int tokenNum = 0;
+  private boolean moreTokensAvailable = true;
+
+  private final NLPPOSTaggerOp posTaggerOp;
+  private final TypeAttribute typeAtt = addAttribute(TypeAttribute.class);
+  private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+
+  public OpenNLPPOSFilter(TokenStream input, NLPPOSTaggerOp posTaggerOp) {
+    super(input);
+    this.posTaggerOp = posTaggerOp;
+  }
+
+  @Override
+  public final boolean incrementToken() throws IOException {
+    if ( ! moreTokensAvailable) {
+      clear();
+      return false;
+    }
+    if (tokenNum == sentenceTokenAttrs.size()) { // beginning of stream, or previous sentence exhausted
+      String[] sentenceTokens = nextSentence();
+      if (sentenceTokens == null) {
+        clear();
+        return false;
+      }
+      tags = posTaggerOp.getPOSTags(sentenceTokens);
+      tokenNum = 0;
+    }
+    clearAttributes();
+    sentenceTokenAttrs.get(tokenNum).copyTo(this);
+    typeAtt.setType(tags[tokenNum++]);
+    return true;
+  }
+
+  private String[] nextSentence() throws IOException {
+    List<String> termList = new ArrayList<>();
+    sentenceTokenAttrs.clear();
+    boolean endOfSentence = false;
+    while ( ! endOfSentence && (moreTokensAvailable = input.incrementToken())) {
+      termList.add(termAtt.toString());
+      endOfSentence = 0 != (flagsAtt.getFlags() & OpenNLPTokenizer.EOS_FLAG_BIT);
+      sentenceTokenAttrs.add(input.cloneAttributes());
+    }
+    return termList.size() > 0 ? termList.toArray(new String[termList.size()]) : null;
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    moreTokensAvailable = true;
+  }
+
+  private void clear() {
+    sentenceTokenAttrs.clear();
+    tags = null;
+    tokenNum = 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilterFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilterFactory.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilterFactory.java
new file mode 100644
index 0000000..952218f
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPPOSFilterFactory.java
@@ -0,0 +1,71 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenFilterFactory;
+
+/**
+ * Factory for {@link OpenNLPPOSFilter}.
+ *
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_opennlp_pos" class="solr.TextField" positionIncrementGap="100"&gt;
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.OpenNLPTokenizerFactory" sentenceModel="filename" tokenizerModel="filename"/&gt;
+ *     &lt;filter class="solr.OpenNLPPOSFilterFactory" posTaggerModel="filename"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ * @since 7.3.0
+ */
+public class OpenNLPPOSFilterFactory extends TokenFilterFactory implements ResourceLoaderAware {
+  public static final String POS_TAGGER_MODEL = "posTaggerModel";
+
+  private final String posTaggerModelFile;
+
+  public OpenNLPPOSFilterFactory(Map<String,String> args) {
+    super(args);
+    posTaggerModelFile = require(args, POS_TAGGER_MODEL);
+    if (!args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public OpenNLPPOSFilter create(TokenStream in) {
+    try {
+      return new OpenNLPPOSFilter(in, OpenNLPOpsFactory.getPOSTagger(posTaggerModelFile));
+    } catch (IOException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+
+  @Override
+  public void inform(ResourceLoader loader) {
+    try { // load and register the read-only model in cache with file/resource name
+      OpenNLPOpsFactory.getPOSTaggerModel(posTaggerModelFile, loader);
+    } catch (IOException e) {
+      throw new IllegalArgumentException(e);
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPSentenceBreakIterator.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPSentenceBreakIterator.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPSentenceBreakIterator.java
new file mode 100644
index 0000000..f69fbc6
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPSentenceBreakIterator.java
@@ -0,0 +1,224 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.text.BreakIterator;
+import java.text.CharacterIterator;
+
+import opennlp.tools.util.Span;
+import org.apache.lucene.analysis.opennlp.tools.NLPSentenceDetectorOp;
+import org.apache.lucene.analysis.util.CharArrayIterator;
+
+/**
+ * A {@link BreakIterator} that splits sentences using an OpenNLP sentence chunking model.
+ */
+public final class OpenNLPSentenceBreakIterator extends BreakIterator {
+
+  private CharacterIterator text;
+  private int currentSentence;
+  private int[] sentenceStarts;
+  private NLPSentenceDetectorOp sentenceOp;
+
+  public OpenNLPSentenceBreakIterator(NLPSentenceDetectorOp sentenceOp) {
+    this.sentenceOp = sentenceOp;
+  }
+
+  @Override
+  public int current() {
+    return text.getIndex();
+  }
+
+  @Override
+  public int first() {
+    currentSentence = 0;
+    text.setIndex(text.getBeginIndex());
+    return current();
+  }
+
+  @Override
+  public int last() {
+    if (sentenceStarts.length > 0) {
+      currentSentence = sentenceStarts.length - 1;
+      text.setIndex(text.getEndIndex());
+    } else { // there are no sentences; both the first and last positions are the begin index
+      currentSentence = 0;
+      text.setIndex(text.getBeginIndex());
+    }
+    return current();
+  }
+
+  @Override
+  public int next() {
+    if (text.getIndex() == text.getEndIndex() || 0 == sentenceStarts.length) {
+      return DONE;
+    } else if (currentSentence < sentenceStarts.length - 1) {
+      text.setIndex(sentenceStarts[++currentSentence]);
+      return current();
+    } else {
+      return last();
+    }
+  }
+
+  @Override
+  public int following(int pos) {
+    if (pos < text.getBeginIndex() || pos > text.getEndIndex()) {
+      throw new IllegalArgumentException("offset out of bounds");
+    } else if (0 == sentenceStarts.length) {
+      text.setIndex(text.getBeginIndex());
+      return DONE;
+    } else if (pos >= sentenceStarts[sentenceStarts.length - 1]) {
+      // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
+      // https://bugs.openjdk.java.net/browse/JDK-8015110
+      text.setIndex(text.getEndIndex());
+      currentSentence = sentenceStarts.length - 1;
+      return DONE;
+    } else { // there are at least two sentences
+      currentSentence = (sentenceStarts.length - 1) / 2; // start search from the middle
+      moveToSentenceAt(pos, 0, sentenceStarts.length - 2);
+      text.setIndex(sentenceStarts[++currentSentence]);
+      return current();
+    }
+  }
+
+  /** Binary search over sentences */
+  private void moveToSentenceAt(int pos, int minSentence, int maxSentence) {
+    if (minSentence != maxSentence) {
+      if (pos < sentenceStarts[currentSentence]) {
+        int newMaxSentence = currentSentence - 1;
+        currentSentence = minSentence + (currentSentence - minSentence) / 2;
+        moveToSentenceAt(pos, minSentence, newMaxSentence);
+      } else if (pos >= sentenceStarts[currentSentence + 1]) {
+        int newMinSentence = currentSentence + 1;
+        currentSentence = maxSentence - (maxSentence - currentSentence) / 2;
+        moveToSentenceAt(pos, newMinSentence, maxSentence);
+      }
+    } else {
+      assert currentSentence == minSentence;
+      assert pos >= sentenceStarts[currentSentence];
+      assert (currentSentence == sentenceStarts.length - 1 && pos <= text.getEndIndex())
+          || pos < sentenceStarts[currentSentence + 1];
+    }
+    // we have arrived - nothing to do
+  }
+
+  @Override
+  public int previous() {
+    if (text.getIndex() == text.getBeginIndex()) {
+      return DONE;
+    } else {
+      if (0 == sentenceStarts.length) {
+        text.setIndex(text.getBeginIndex());
+        return DONE;
+      }
+      if (text.getIndex() == text.getEndIndex()) {
+        text.setIndex(sentenceStarts[currentSentence]);
+      } else {
+        text.setIndex(sentenceStarts[--currentSentence]);
+      }
+      return current();
+    }
+  }
+
+  @Override
+  public int preceding(int pos) {
+    if (pos < text.getBeginIndex() || pos > text.getEndIndex()) {
+      throw new IllegalArgumentException("offset out of bounds");
+    } else if (0 == sentenceStarts.length) {
+      text.setIndex(text.getBeginIndex());
+      currentSentence = 0;
+      return DONE;
+    } else if (pos < sentenceStarts[0]) {
+      // this conflicts with the javadocs, but matches actual behavior (Oracle has a bug in something)
+      // https://bugs.openjdk.java.net/browse/JDK-8015110
+      text.setIndex(text.getBeginIndex());
+      currentSentence = 0;
+      return DONE;
+    } else {
+      currentSentence = sentenceStarts.length / 2; // start search from the middle
+      moveToSentenceAt(pos, 0, sentenceStarts.length - 1);
+      if (0 == currentSentence) {
+        text.setIndex(text.getBeginIndex());
+        return DONE;
+      } else {
+        text.setIndex(sentenceStarts[--currentSentence]);
+        return current();
+      }
+    }
+  }
+
+  @Override
+  public int next(int n) {
+    currentSentence += n;
+    if (n < 0) {
+      if (text.getIndex() == text.getEndIndex()) {
+        ++currentSentence;
+      }
+      if (currentSentence < 0) {
+        currentSentence = 0;
+        text.setIndex(text.getBeginIndex());
+        return DONE;
+      } else {
+        text.setIndex(sentenceStarts[currentSentence]);
+      }
+    } else if (n > 0) {
+      if (currentSentence >= sentenceStarts.length) {
+        currentSentence = sentenceStarts.length - 1;
+        text.setIndex(text.getEndIndex());
+        return DONE;
+      } else {
+        text.setIndex(sentenceStarts[currentSentence]);
+      }
+    }
+    return current();
+  }
+
+  @Override
+  public CharacterIterator getText() {
+    return text;
+  }
+
+  @Override
+  public void setText(CharacterIterator newText) {
+    text = newText;
+    text.setIndex(text.getBeginIndex());
+    currentSentence = 0;
+    Span[] spans = sentenceOp.splitSentences(characterIteratorToString());
+    sentenceStarts = new int[spans.length];
+    for (int i = 0; i < spans.length; ++i) {
+      // Adjust start positions to match those of the passed-in CharacterIterator
+      sentenceStarts[i] = spans[i].getStart() + text.getBeginIndex();
+    }
+  }
+
+  private String characterIteratorToString() {
+    String fullText;
+    if (text instanceof CharArrayIterator) {
+      CharArrayIterator charArrayIterator = (CharArrayIterator)text;
+      fullText = new String(charArrayIterator.getText(), charArrayIterator.getStart(), charArrayIterator.getLength());
+    } else {
+      // TODO: is there a better way to extract full text from arbitrary CharacterIterators?
+      StringBuilder builder = new StringBuilder();
+      for (char ch = text.first(); ch != CharacterIterator.DONE; ch = text.next()) {
+        builder.append(ch);
+      }
+      fullText = builder.toString();
+      text.setIndex(text.getBeginIndex());
+    }
+    return fullText;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java
new file mode 100644
index 0000000..75a3b81
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizer.java
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+
+import opennlp.tools.util.Span;
+
+import org.apache.lucene.analysis.opennlp.tools.NLPSentenceDetectorOp;
+import org.apache.lucene.analysis.opennlp.tools.NLPTokenizerOp;
+import org.apache.lucene.analysis.tokenattributes.FlagsAttribute;
+import org.apache.lucene.analysis.tokenattributes.OffsetAttribute;
+import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
+import org.apache.lucene.analysis.util.SegmentingTokenizerBase;
+import org.apache.lucene.util.AttributeFactory;
+
+/**
+ * Run OpenNLP SentenceDetector and Tokenizer.
+ * The last token in each sentence is marked by setting the {@link #EOS_FLAG_BIT} in the FlagsAttribute;
+ * following filters can use this information to apply operations to tokens one sentence at a time.
+ */
+public final class OpenNLPTokenizer extends SegmentingTokenizerBase {
+  public static int EOS_FLAG_BIT = 1;
+
+  private final CharTermAttribute termAtt = addAttribute(CharTermAttribute.class);
+  private final FlagsAttribute flagsAtt = addAttribute(FlagsAttribute.class);
+  private final OffsetAttribute offsetAtt = addAttribute(OffsetAttribute.class);
+
+  private Span[] termSpans = null;
+  private int termNum = 0;
+  private int sentenceStart = 0;
+
+  private NLPSentenceDetectorOp sentenceOp = null;
+  private NLPTokenizerOp tokenizerOp = null;
+
+  public OpenNLPTokenizer(AttributeFactory factory, NLPSentenceDetectorOp sentenceOp, NLPTokenizerOp tokenizerOp) throws IOException {
+    super(factory, new OpenNLPSentenceBreakIterator(sentenceOp));
+    if (sentenceOp == null || tokenizerOp == null) {
+      throw new IllegalArgumentException("OpenNLPTokenizer: both a Sentence Detector and a Tokenizer are required");
+    }
+    this.sentenceOp = sentenceOp;
+    this.tokenizerOp = tokenizerOp;
+  }
+
+  @Override
+  public void close() throws IOException {
+    super.close();
+    termSpans = null;
+    termNum = sentenceStart = 0;
+  };
+
+  @Override
+  protected void setNextSentence(int sentenceStart, int sentenceEnd) {
+    this.sentenceStart = sentenceStart;
+    String sentenceText = new String(buffer, sentenceStart, sentenceEnd - sentenceStart);
+    termSpans = tokenizerOp.getTerms(sentenceText);
+    termNum = 0;
+  }
+
+  @Override
+  protected boolean incrementWord() {
+    if (termSpans == null || termNum == termSpans.length) {
+      return false;
+    }
+    clearAttributes();
+    Span term = termSpans[termNum];
+    termAtt.copyBuffer(buffer, sentenceStart + term.getStart(), term.length());
+    offsetAtt.setOffset(correctOffset(offset + sentenceStart + term.getStart()),
+                        correctOffset(offset + sentenceStart + term.getEnd()));
+    if (termNum == termSpans.length - 1) {
+      flagsAtt.setFlags(flagsAtt.getFlags() | EOS_FLAG_BIT); // mark the last token in the sentence with EOS_FLAG_BIT
+    }
+    ++termNum;
+    return true;
+  }
+
+  @Override
+  public void reset() throws IOException {
+    super.reset();
+    termSpans = null;
+    termNum = sentenceStart = 0;
+  }
+}

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/b720e1ee/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizerFactory.java
----------------------------------------------------------------------
diff --git a/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizerFactory.java b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizerFactory.java
new file mode 100644
index 0000000..a60f23f
--- /dev/null
+++ b/lucene/analysis/opennlp/src/java/org/apache/lucene/analysis/opennlp/OpenNLPTokenizerFactory.java
@@ -0,0 +1,79 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.lucene.analysis.opennlp;
+
+import java.io.IOException;
+import java.util.Map;
+
+import org.apache.lucene.analysis.opennlp.tools.NLPSentenceDetectorOp;
+import org.apache.lucene.analysis.opennlp.tools.NLPTokenizerOp;
+import org.apache.lucene.analysis.opennlp.tools.OpenNLPOpsFactory;
+import org.apache.lucene.analysis.util.ResourceLoader;
+import org.apache.lucene.analysis.util.ResourceLoaderAware;
+import org.apache.lucene.analysis.util.TokenizerFactory;
+import org.apache.lucene.util.AttributeFactory;
+
+/**
+ * Factory for {@link OpenNLPTokenizer}.
+ *
+ * <pre class="prettyprint">
+ * &lt;fieldType name="text_opennlp" class="solr.TextField" positionIncrementGap="100"
+ *   &lt;analyzer&gt;
+ *     &lt;tokenizer class="solr.OpenNLPTokenizerFactory" sentenceModel="filename" tokenizerModel="filename"/&gt;
+ *   &lt;/analyzer&gt;
+ * &lt;/fieldType&gt;</pre>
+ * @since 7.3.0
+ */
+public class OpenNLPTokenizerFactory extends TokenizerFactory implements ResourceLoaderAware {
+  public static final String SENTENCE_MODEL = "sentenceModel";
+  public static final String TOKENIZER_MODEL = "tokenizerModel";
+
+  private final String sentenceModelFile;
+  private final String tokenizerModelFile;
+
+  public OpenNLPTokenizerFactory(Map<String,String> args) {
+    super(args);
+    sentenceModelFile = require(args, SENTENCE_MODEL);
+    tokenizerModelFile = require(args, TOKENIZER_MODEL);
+    if ( ! args.isEmpty()) {
+      throw new IllegalArgumentException("Unknown parameters: " + args);
+    }
+  }
+
+  @Override
+  public OpenNLPTokenizer create(AttributeFactory factory) {
+    try {
+      NLPSentenceDetectorOp sentenceOp = OpenNLPOpsFactory.getSentenceDetector(sentenceModelFile);
+      NLPTokenizerOp tokenizerOp = OpenNLPOpsFactory.getTokenizer(tokenizerModelFile);
+      return new OpenNLPTokenizer(factory, sentenceOp, tokenizerOp);
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  @Override
+  public void inform(ResourceLoader loader) throws IOException {
+    // register models in cache with file/resource names
+    if (sentenceModelFile != null) {
+      OpenNLPOpsFactory.getSentenceModel(sentenceModelFile, loader);
+    }
+    if (tokenizerModelFile != null) {
+      OpenNLPOpsFactory.getTokenizerModel(tokenizerModelFile, loader);
+    }
+  }
+}