You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cp...@apache.org on 2017/04/04 15:32:58 UTC

[16/36] lucene-solr:jira/solr-6203: SOLR-7383: Replace DIH 'rss' example with 'atom' rss example was broken for multiple reasons. atom example showcases the same - and more - features and uses the smallest config file needed to make it work.

SOLR-7383: Replace DIH 'rss' example with 'atom'
rss example was broken for multiple reasons.
atom example showcases the same - and more - features
and uses the smallest config file needed to make it work.


Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/580f6e98
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/580f6e98
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/580f6e98

Branch: refs/heads/jira/solr-6203
Commit: 580f6e98fb033dbbb8e0921fc3175021714ce956
Parents: 35aac1d
Author: Alexandre Rafalovitch <ar...@apache.org>
Authored: Sat Apr 1 13:42:23 2017 -0400
Committer: Alexandre Rafalovitch <ar...@apache.org>
Committed: Sat Apr 1 13:42:23 2017 -0400

----------------------------------------------------------------------
 solr/CHANGES.txt                                |    2 +
 solr/README.txt                                 |    2 +-
 solr/example/README.txt                         |    2 +-
 solr/example/example-DIH/README.txt             |   11 +-
 .../solr/atom/conf/atom-data-config.xml         |   35 +
 .../solr/atom/conf/lang/stopwords_en.txt        |   54 +
 .../example-DIH/solr/atom/conf/managed-schema   |  106 +
 .../example-DIH/solr/atom/conf/protwords.txt    |   17 +
 .../example-DIH/solr/atom/conf/solrconfig.xml   |   61 +
 .../example-DIH/solr/atom/conf/synonyms.txt     |   29 +
 .../example-DIH/solr/atom/conf/url_types.txt    |    1 +
 .../example-DIH/solr/atom/core.properties       |    0
 .../example-DIH/solr/rss/conf/admin-extra.html  |   24 -
 .../solr/rss/conf/admin-extra.menu-bottom.html  |   25 -
 .../solr/rss/conf/admin-extra.menu-top.html     |   25 -
 .../clustering/carrot2/kmeans-attributes.xml    |   19 -
 .../clustering/carrot2/lingo-attributes.xml     |   24 -
 .../conf/clustering/carrot2/stc-attributes.xml  |   19 -
 .../example-DIH/solr/rss/conf/currency.xml      |   67 -
 .../example-DIH/solr/rss/conf/elevate.xml       |   42 -
 .../solr/rss/conf/lang/contractions_ca.txt      |    8 -
 .../solr/rss/conf/lang/contractions_fr.txt      |   15 -
 .../solr/rss/conf/lang/contractions_ga.txt      |    5 -
 .../solr/rss/conf/lang/contractions_it.txt      |   23 -
 .../solr/rss/conf/lang/hyphenations_ga.txt      |    5 -
 .../solr/rss/conf/lang/stemdict_nl.txt          |    6 -
 .../solr/rss/conf/lang/stoptags_ja.txt          |  420 --
 .../solr/rss/conf/lang/stopwords_ar.txt         |  125 -
 .../solr/rss/conf/lang/stopwords_bg.txt         |  193 -
 .../solr/rss/conf/lang/stopwords_ca.txt         |  220 -
 .../solr/rss/conf/lang/stopwords_ckb.txt        |  136 -
 .../solr/rss/conf/lang/stopwords_cz.txt         |  172 -
 .../solr/rss/conf/lang/stopwords_da.txt         |  110 -
 .../solr/rss/conf/lang/stopwords_de.txt         |  294 --
 .../solr/rss/conf/lang/stopwords_el.txt         |   78 -
 .../solr/rss/conf/lang/stopwords_en.txt         |   54 -
 .../solr/rss/conf/lang/stopwords_es.txt         |  356 --
 .../solr/rss/conf/lang/stopwords_eu.txt         |   99 -
 .../solr/rss/conf/lang/stopwords_fa.txt         |  313 --
 .../solr/rss/conf/lang/stopwords_fi.txt         |   97 -
 .../solr/rss/conf/lang/stopwords_fr.txt         |  186 -
 .../solr/rss/conf/lang/stopwords_ga.txt         |  110 -
 .../solr/rss/conf/lang/stopwords_gl.txt         |  161 -
 .../solr/rss/conf/lang/stopwords_hi.txt         |  235 --
 .../solr/rss/conf/lang/stopwords_hu.txt         |  211 -
 .../solr/rss/conf/lang/stopwords_hy.txt         |   46 -
 .../solr/rss/conf/lang/stopwords_id.txt         |  359 --
 .../solr/rss/conf/lang/stopwords_it.txt         |  303 --
 .../solr/rss/conf/lang/stopwords_ja.txt         |  127 -
 .../solr/rss/conf/lang/stopwords_lv.txt         |  172 -
 .../solr/rss/conf/lang/stopwords_nl.txt         |  119 -
 .../solr/rss/conf/lang/stopwords_no.txt         |  194 -
 .../solr/rss/conf/lang/stopwords_pt.txt         |  253 --
 .../solr/rss/conf/lang/stopwords_ro.txt         |  233 --
 .../solr/rss/conf/lang/stopwords_ru.txt         |  243 --
 .../solr/rss/conf/lang/stopwords_sv.txt         |  133 -
 .../solr/rss/conf/lang/stopwords_th.txt         |  119 -
 .../solr/rss/conf/lang/stopwords_tr.txt         |  212 -
 .../solr/rss/conf/lang/userdict_ja.txt          |   29 -
 .../example-DIH/solr/rss/conf/managed-schema    | 1096 -----
 .../solr/rss/conf/mapping-FoldToASCII.txt       | 3813 ------------------
 .../solr/rss/conf/mapping-ISOLatin1Accent.txt   |  246 --
 .../example-DIH/solr/rss/conf/protwords.txt     |   21 -
 .../solr/rss/conf/rss-data-config.xml           |   26 -
 .../example-DIH/solr/rss/conf/solrconfig.xml    | 1396 -------
 .../example-DIH/solr/rss/conf/spellings.txt     |    2 -
 .../example-DIH/solr/rss/conf/stopwords.txt     |   14 -
 .../example-DIH/solr/rss/conf/synonyms.txt      |   29 -
 .../example-DIH/solr/rss/conf/update-script.js  |   53 -
 .../example-DIH/solr/rss/conf/xslt/example.xsl  |  132 -
 .../solr/rss/conf/xslt/example_atom.xsl         |   67 -
 .../solr/rss/conf/xslt/example_rss.xsl          |   66 -
 .../example-DIH/solr/rss/conf/xslt/luke.xsl     |  337 --
 .../solr/rss/conf/xslt/updateXml.xsl            |   70 -
 .../example-DIH/solr/rss/core.properties        |    0
 75 files changed, 312 insertions(+), 13795 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 1efefd0..3187dc3 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -180,6 +180,8 @@ Other Changes
 
 * SOLR-10399: Generalize some internal facet logic to simplify points/non-points field handling (Adrien Grand, hossman)
 
+* SOLR-7383: New DataImportHandler 'atom' example, replacing broken 'rss' example (Alexandre Rafalovitch)
+
 ==================  6.5.0 ==================
 
 Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/README.txt
----------------------------------------------------------------------
diff --git a/solr/README.txt b/solr/README.txt
index 4ef5eac..6af0cc6 100644
--- a/solr/README.txt
+++ b/solr/README.txt
@@ -67,7 +67,7 @@ Solr includes a few examples to help you get started. To run a specific example,
   bin/solr -e <EXAMPLE> where <EXAMPLE> is one of:
 
     cloud        : SolrCloud example
-    dih          : Data Import Handler (rdbms, mail, rss, tika)
+    dih          : Data Import Handler (rdbms, mail, atom, tika)
     schemaless   : Schema-less example (schema is inferred from data during indexing)
     techproducts : Kitchen sink example providing comprehensive examples of Solr features
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/README.txt
----------------------------------------------------------------------
diff --git a/solr/example/README.txt b/solr/example/README.txt
index d8402eb..4c8cca1 100644
--- a/solr/example/README.txt
+++ b/solr/example/README.txt
@@ -22,7 +22,7 @@ separate directory. To run a specific example, do:
   bin/solr -e <EXAMPLE> where <EXAMPLE> is one of:
   
     cloud        : SolrCloud example
-    dih          : Data Import Handler (rdbms, mail, rss, tika)
+    dih          : Data Import Handler (rdbms, mail, atom, tika)
     schemaless   : Schema-less example (schema is inferred from data during indexing)
     techproducts : Kitchen sink example providing comprehensive examples of Solr features
 

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/README.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/README.txt b/solr/example/example-DIH/README.txt
index 0926bb6..ea8d28f 100644
--- a/solr/example/example-DIH/README.txt
+++ b/solr/example/example-DIH/README.txt
@@ -16,7 +16,7 @@
 Solr DataImportHandler example configuration
 --------------------------------------------
 
-To run this example, use the "-e" option of the bin/solr script:
+To run this multi-core example, use the "-e" option of the bin/solr script:
 
 > bin/solr -e dih
 
@@ -28,9 +28,9 @@ When Solr is started connect to:
 
   http://localhost:8983/solr/db/dataimport?command=full-import
 
-* To import data from an RSS feed, connect to:
+* To import data from an ATOM feed, connect to:
 
-  http://localhost:8983/solr/rss/dataimport?command=full-import
+  http://localhost:8983/solr/atom/dataimport?command=full-import
 
 * To import data from your IMAP server:
 
@@ -45,6 +45,5 @@ When Solr is started connect to:
 
   http://localhost:8983/solr/tika/dataimport?command=full-import
 
-See also README.txt in the solr subdirectory, and check
-http://wiki.apache.org/solr/DataImportHandler for detailed
-usage guide and tutorial.
+Check also the Solr Reference Guide for detailed usage guide:
+https://cwiki.apache.org/confluence/display/solr/Uploading+Structured+Data+Store+Data+with+the+Data+Import+Handler

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml b/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml
new file mode 100644
index 0000000..53b5060
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml
@@ -0,0 +1,35 @@
+<dataConfig>
+  <dataSource type="URLDataSource"/>
+  <document>
+
+    <entity name="stackoverflow"
+            url="http://stackoverflow.com/feeds/tag/solr"
+            processor="XPathEntityProcessor"
+            forEach="/feed|/feed/entry"
+            transformer="HTMLStripTransformer,RegexTransformer">
+
+      <!-- Pick this value up from the feed level and apply to all documents -->
+      <field column="lastchecked_dt" xpath="/feed/updated" commonField="true"/>
+
+      <!-- Keep only the final numeric part of the URL -->
+      <field column="id" xpath="/feed/entry/id" regex=".*/" replaceWith=""/>
+
+      <field column="title"    xpath="/feed/entry/title"/>
+      <field column="author"   xpath="/feed/entry/author/name"/>
+      <field column="category" xpath="/feed/entry/category/@term"/>
+      <field column="link"     xpath="/feed/entry/link[@rel='alternate']/@href"/>
+
+      <!-- Use transformers to convert HTML into plain text.
+        There is also an UpdateRequestProcess to trim remaining spaces.
+      -->
+      <field column="summary" xpath="/feed/entry/summary" stripHTML="true" regex="( |\n)+" replaceWith=" "/>
+
+      <!-- Ignore namespaces when matching XPath -->
+      <field column="rank" xpath="/feed/entry/rank"/>
+
+      <field column="published_dt" xpath="/feed/entry/published"/>
+      <field column="updated_dt" xpath="/feed/entry/updated"/>
+    </entity>
+
+  </document>
+</dataConfig>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt b/solr/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt
new file mode 100644
index 0000000..2c164c0
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/managed-schema
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/managed-schema b/solr/example/example-DIH/solr/atom/conf/managed-schema
new file mode 100644
index 0000000..5875152
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/managed-schema
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="example-DIH-atom" version="1.6">
+  <uniqueKey>id</uniqueKey>
+
+  <field name="id" type="string" indexed="true" stored="true" required="true"/>
+  <field name="title" type="text_en_splitting" indexed="true" stored="true"/>
+  <field name="author" type="string" indexed="true" stored="true"/>
+  <field name="category" type="string" indexed="true" stored="true" multiValued="true"/>
+  <field name="link" type="string" indexed="true" stored="true"/>
+  <field name="summary" type="text_en_splitting" indexed="true" stored="true"/>
+  <field name="rank" type="pint" indexed="true" stored="true"/>
+
+  <dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/>
+
+  <!-- Catch-all field, aggregating all "useful to search as text" fields via the copyField instructions -->
+  <field name="text" type="text_en_splitting" indexed="true" stored="false" multiValued="true"/>
+
+  <field name="urls" type="url_only" indexed="true" stored="false"/>
+
+
+  <copyField source="id" dest="text"/>
+  <copyField source="title" dest="text"/>
+  <copyField source="author" dest="text"/>
+  <copyField source="category" dest="text"/>
+  <copyField source="summary" dest="text"/>
+
+  <!-- extract URLs from summary for faceting -->
+  <copyField source="summary" dest="urls"/>
+
+  <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/>
+  <fieldType name="pint" class="solr.IntPointField" docValues="true"/>
+  <fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
+
+
+  <!-- A text field with defaults appropriate for English, plus
+   aggressive word-splitting and autophrase features enabled.
+   This field is just like text_en, except it adds
+   WordDelimiterFilter to enable splitting and matching of
+   words on case-change, alpha numeric boundaries, and
+   non-alphanumeric chars.  This means certain compound word
+   cases will work, for example query "wi fi" will match
+   document "WiFi" or "wi-fi".
+  -->
+  <fieldType name="text_en_splitting" class="solr.TextField"
+             positionIncrementGap="100" autoGeneratePhraseQueries="true">
+    <analyzer type="index">
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      <!-- in this example, we will only use synonyms at query time
+      <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+      -->
+      <!-- Case insensitive stop word removal. -->
+      <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
+      <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1"
+              catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+      <filter class="solr.PorterStemFilterFactory"/>
+      <filter class="solr.FlattenGraphFilterFactory"/>
+    </analyzer>
+    <analyzer type="query">
+      <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+      <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+      <filter class="solr.StopFilterFactory"
+              ignoreCase="true"
+              words="lang/stopwords_en.txt"
+      />
+      <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1"
+              catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+      <filter class="solr.LowerCaseFilterFactory"/>
+      <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+      <filter class="solr.PorterStemFilterFactory"/>
+    </analyzer>
+  </fieldType>
+
+  <!-- Field type that extracts URLs from the text.
+   As the stored representation is not changed, it is only useful for faceting.
+   It is not terribly useful for searching URLs either, as there are too many special symbols.
+  -->
+  <fieldType name="url_only" class="solr.TextField" positionIncrementGap="100">
+    <analyzer type="index">
+      <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/>
+      <filter class="solr.TypeTokenFilterFactory" types="url_types.txt" useWhitelist="true"/>
+    </analyzer>
+    <analyzer type="query">
+      <tokenizer class="solr.KeywordTokenizerFactory"/>
+    </analyzer>
+  </fieldType>
+
+</schema>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/protwords.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/protwords.txt b/solr/example/example-DIH/solr/atom/conf/protwords.txt
new file mode 100644
index 0000000..1303e42
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/protwords.txt
@@ -0,0 +1,17 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+lucene

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/solrconfig.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/solrconfig.xml b/solr/example/example-DIH/solr/atom/conf/solrconfig.xml
new file mode 100644
index 0000000..22005dd
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/solrconfig.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- 
+ This is a DEMO configuration, highlighting elements 
+ specifically needed to get this example running
+ such as libraries and request handler specifics.
+
+ It uses defaults or does not define most of production-level settings
+ such as various caches or auto-commit policies.
+
+ See Solr Reference Guide and other examples for 
+ more details on a well configured solrconfig.xml
+ https://cwiki.apache.org/confluence/display/solr/The+Well-Configured+Solr+Instance
+-->
+<config>
+
+  <!-- Controls what version of Lucene various components of Solr
+    adhere to.  Generally, you want to use the latest version to
+    get all bug fixes and improvements. It is highly recommended
+    that you fully re-index after changing this setting as it can
+    affect both how text is indexed and queried.
+  -->
+  <luceneMatchVersion>7.0.0</luceneMatchVersion>
+
+  <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-dataimporthandler-.*\.jar"/>
+
+  <requestHandler name="/select" class="solr.SearchHandler">
+    <lst name="defaults">
+      <str name="echoParams">explicit</str>
+      <str name="df">text</str>
+    </lst>
+  </requestHandler>
+
+  <requestHandler name="/dataimport" class="solr.DataImportHandler">
+    <lst name="defaults">
+      <str name="config">atom-data-config.xml</str>
+      <str name="processor">trim_text</str>
+    </lst>
+  </requestHandler>
+
+  <updateProcessor class="solr.processor.TrimFieldUpdateProcessorFactory" name="trim_text">
+    <str name="typeName">text_en_splitting</str>
+  </updateProcessor>
+
+</config>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/synonyms.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/synonyms.txt b/solr/example/example-DIH/solr/atom/conf/synonyms.txt
new file mode 100644
index 0000000..eab4ee8
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/synonyms.txt
@@ -0,0 +1,29 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaafoo => aaabar
+bbbfoo => bbbfoo bbbbar
+cccfoo => cccbar cccbaz
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/url_types.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/url_types.txt b/solr/example/example-DIH/solr/atom/conf/url_types.txt
new file mode 100644
index 0000000..808f313
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/url_types.txt
@@ -0,0 +1 @@
+<URL>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/core.properties
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/core.properties b/solr/example/example-DIH/solr/atom/core.properties
new file mode 100644
index 0000000..e69de29

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/admin-extra.html
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/admin-extra.html b/solr/example/example-DIH/solr/rss/conf/admin-extra.html
deleted file mode 100644
index fecab20..0000000
--- a/solr/example/example-DIH/solr/rss/conf/admin-extra.html
+++ /dev/null
@@ -1,24 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- The content of this page will be statically included into the top-
-right box of the cores overview page. Uncomment this as an example to 
-see there the content will show up.
-
-<img src="img/ico/construction.png"> This line will appear at the top-
-right box on collection1's Overview
--->

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-bottom.html
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-bottom.html b/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-bottom.html
deleted file mode 100644
index 3359a46..0000000
--- a/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-bottom.html
+++ /dev/null
@@ -1,25 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- admin-extra.menu-bottom.html -->
-<!--
-<li>
-  <a href="#" style="background-image: url(img/ico/construction.png);">
-    LAST ITEM
-  </a>
-</li>
--->

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-top.html
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-top.html b/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-top.html
deleted file mode 100644
index 0886cee..0000000
--- a/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-top.html
+++ /dev/null
@@ -1,25 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- admin-extra.menu-top.html -->
-<!--
-<li>
-  <a href="#" style="background-image: url(img/ico/construction.png);">
-    FIRST ITEM
-  </a>
-</li>
--->

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/kmeans-attributes.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/kmeans-attributes.xml b/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/kmeans-attributes.xml
deleted file mode 100644
index d802465..0000000
--- a/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/kmeans-attributes.xml
+++ /dev/null
@@ -1,19 +0,0 @@
-<!-- 
-  Default configuration for the bisecting k-means clustering algorithm.
-  
-  This file can be loaded (and saved) by Carrot2 Workbench.
-  http://project.carrot2.org/download.html
--->
-<attribute-sets default="attributes">
-    <attribute-set id="attributes">
-      <value-set>
-        <label>attributes</label>
-          <attribute key="MultilingualClustering.defaultLanguage">
-            <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
-          </attribute>
-          <attribute key="MultilingualClustering.languageAggregationStrategy">
-            <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
-          </attribute>
-      </value-set>
-  </attribute-set>
-</attribute-sets>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/lingo-attributes.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/lingo-attributes.xml b/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/lingo-attributes.xml
deleted file mode 100644
index 4bf1360..0000000
--- a/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/lingo-attributes.xml
+++ /dev/null
@@ -1,24 +0,0 @@
-<!-- 
-  Default configuration for the Lingo clustering algorithm.
-
-  This file can be loaded (and saved) by Carrot2 Workbench.
-  http://project.carrot2.org/download.html
--->
-<attribute-sets default="attributes">
-    <attribute-set id="attributes">
-      <value-set>
-        <label>attributes</label>
-          <!-- 
-          The language to assume for clustered documents.
-          For a list of allowed values, see: 
-          http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
-          -->
-          <attribute key="MultilingualClustering.defaultLanguage">
-            <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
-          </attribute>
-          <attribute key="LingoClusteringAlgorithm.desiredClusterCountBase">
-            <value type="java.lang.Integer" value="20"/>
-          </attribute>
-      </value-set>
-  </attribute-set>
-</attribute-sets>
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/stc-attributes.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/stc-attributes.xml b/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/stc-attributes.xml
deleted file mode 100644
index c1bf110..0000000
--- a/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/stc-attributes.xml
+++ /dev/null
@@ -1,19 +0,0 @@
-<!-- 
-  Default configuration for the STC clustering algorithm.
-
-  This file can be loaded (and saved) by Carrot2 Workbench.
-  http://project.carrot2.org/download.html
--->
-<attribute-sets default="attributes">
-    <attribute-set id="attributes">
-      <value-set>
-        <label>attributes</label>
-          <attribute key="MultilingualClustering.defaultLanguage">
-            <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
-          </attribute>
-          <attribute key="MultilingualClustering.languageAggregationStrategy">
-            <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
-          </attribute>
-      </value-set>
-  </attribute-set>
-</attribute-sets>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/currency.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/currency.xml b/solr/example/example-DIH/solr/rss/conf/currency.xml
deleted file mode 100644
index 3a9c58a..0000000
--- a/solr/example/example-DIH/solr/rss/conf/currency.xml
+++ /dev/null
@@ -1,67 +0,0 @@
-<?xml version="1.0" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
-
-<currencyConfig version="1.0">
-  <rates>
-    <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
-    <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
-    <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
-    <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
-    <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
-    <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
-    <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
-    <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
-    <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
-    <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
-    <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
-    <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
-    <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
-    <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
-    <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
-    <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
-    <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
-    <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
-    <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
-    <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
-    <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
-    <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
-    <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
-    <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
-    <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
-    <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
-    <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
-    <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
-    <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
-    <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
-    <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
-    <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
-    <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
-    <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
-    <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
-    <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
-    <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
-    <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
-    
-    <!-- Cross-rates for some common currencies -->
-    <rate from="EUR" to="GBP" rate="0.869914" />  
-    <rate from="EUR" to="NOK" rate="7.800095" />  
-    <rate from="GBP" to="NOK" rate="8.966508" />  
-  </rates>
-</currencyConfig>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/elevate.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/elevate.xml b/solr/example/example-DIH/solr/rss/conf/elevate.xml
deleted file mode 100644
index 2c09ebe..0000000
--- a/solr/example/example-DIH/solr/rss/conf/elevate.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements.  See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License.  You may obtain a copy of the License at
-
-     http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- If this file is found in the config directory, it will only be
-     loaded once at startup.  If it is found in Solr's data
-     directory, it will be re-loaded every commit.
-
-   See http://wiki.apache.org/solr/QueryElevationComponent for more info
-
--->
-<elevate>
- <!-- Query elevation examples
-  <query text="foo bar">
-    <doc id="1" />
-    <doc id="2" />
-    <doc id="3" />
-  </query>
-
-for use with techproducts example
- 
-  <query text="ipod">
-    <doc id="MA147LL/A" />  put the actual ipod at the top 
-    <doc id="IW-02" exclude="true" /> exclude this cable
-  </query>
--->
-
-</elevate>

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/contractions_ca.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/contractions_ca.txt b/solr/example/example-DIH/solr/rss/conf/lang/contractions_ca.txt
deleted file mode 100644
index 307a85f..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/contractions_ca.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Set of Catalan contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-l
-m
-n
-s
-t

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/contractions_fr.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/contractions_fr.txt b/solr/example/example-DIH/solr/rss/conf/lang/contractions_fr.txt
deleted file mode 100644
index f1bba51..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/contractions_fr.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-# Set of French contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-l
-m
-t
-qu
-n
-s
-j
-d
-c
-jusqu
-quoiqu
-lorsqu
-puisqu

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/contractions_ga.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/contractions_ga.txt b/solr/example/example-DIH/solr/rss/conf/lang/contractions_ga.txt
deleted file mode 100644
index 9ebe7fa..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/contractions_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-m
-b

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/contractions_it.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/contractions_it.txt b/solr/example/example-DIH/solr/rss/conf/lang/contractions_it.txt
deleted file mode 100644
index cac0409..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/contractions_it.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# Set of Italian contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-c
-l 
-all 
-dall 
-dell 
-nell 
-sull 
-coll 
-pell 
-gl 
-agl 
-dagl 
-degl 
-negl 
-sugl 
-un 
-m 
-t 
-s 
-v 
-d

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/hyphenations_ga.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/hyphenations_ga.txt b/solr/example/example-DIH/solr/rss/conf/lang/hyphenations_ga.txt
deleted file mode 100644
index 4d2642c..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/hyphenations_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish hyphenations for StopFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-h
-n
-t

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stemdict_nl.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stemdict_nl.txt b/solr/example/example-DIH/solr/rss/conf/lang/stemdict_nl.txt
deleted file mode 100644
index 4410729..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stemdict_nl.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-# Set of overrides for the dutch stemmer
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-fiets	fiets
-bromfiets	bromfiets
-ei	eier
-kind	kinder

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stoptags_ja.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stoptags_ja.txt b/solr/example/example-DIH/solr/rss/conf/lang/stoptags_ja.txt
deleted file mode 100644
index 71b7508..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stoptags_ja.txt
+++ /dev/null
@@ -1,420 +0,0 @@
-#
-# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
-#
-# Any token with a part-of-speech tag that exactly matches those defined in this
-# file are removed from the token stream.
-#
-# Set your own stoptags by uncommenting the lines below.  Note that comments are
-# not allowed on the same line as a stoptag.  See LUCENE-3745 for frequency lists,
-# etc. that can be useful for building you own stoptag set.
-#
-# The entire possible tagset is provided below for convenience.
-#
-#####
-#  noun: unclassified nouns
-#\u540d\u8a5e
-#
-#  noun-common: Common nouns or nouns where the sub-classification is undefined
-#\u540d\u8a5e-\u4e00\u822c
-#
-#  noun-proper: Proper nouns where the sub-classification is undefined 
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e
-#
-#  noun-proper-misc: miscellaneous proper nouns
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4e00\u822c
-#
-#  noun-proper-person: Personal names where the sub-classification is undefined
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d
-#
-#  noun-proper-person-misc: names that cannot be divided into surname and 
-#  given name; foreign names; names where the surname or given name is unknown.
-#  e.g. \u304a\u5e02\u306e\u65b9
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d-\u4e00\u822c
-#
-#  noun-proper-person-surname: Mainly Japanese surnames.
-#  e.g. \u5c71\u7530
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d-\u59d3
-#
-#  noun-proper-person-given_name: Mainly Japanese given names.
-#  e.g. \u592a\u90ce
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d-\u540d
-#
-#  noun-proper-organization: Names representing organizations.
-#  e.g. \u901a\u7523\u7701, NHK
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u7d44\u7e54
-#
-#  noun-proper-place: Place names where the sub-classification is undefined
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u5730\u57df
-#
-#  noun-proper-place-misc: Place names excluding countries.
-#  e.g. \u30a2\u30b8\u30a2, \u30d0\u30eb\u30bb\u30ed\u30ca, \u4eac\u90fd
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u5730\u57df-\u4e00\u822c
-#
-#  noun-proper-place-country: Country names. 
-#  e.g. \u65e5\u672c, \u30aa\u30fc\u30b9\u30c8\u30e9\u30ea\u30a2
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u5730\u57df-\u56fd
-#
-#  noun-pronoun: Pronouns where the sub-classification is undefined
-#\u540d\u8a5e-\u4ee3\u540d\u8a5e
-#
-#  noun-pronoun-misc: miscellaneous pronouns: 
-#  e.g. \u305d\u308c, \u3053\u3053, \u3042\u3044\u3064, \u3042\u306a\u305f, \u3042\u3061\u3053\u3061, \u3044\u304f\u3064, \u3069\u3053\u304b, \u306a\u306b, \u307f\u306a\u3055\u3093, \u307f\u3093\u306a, \u308f\u305f\u304f\u3057, \u308f\u308c\u308f\u308c
-#\u540d\u8a5e-\u4ee3\u540d\u8a5e-\u4e00\u822c
-#
-#  noun-pronoun-contraction: Spoken language contraction made by combining a 
-#  pronoun and the particle 'wa'.
-#  e.g. \u3042\u308a\u3083, \u3053\u308a\u3083, \u3053\u308a\u3083\u3042, \u305d\u308a\u3083, \u305d\u308a\u3083\u3042 
-#\u540d\u8a5e-\u4ee3\u540d\u8a5e-\u7e2e\u7d04
-#
-#  noun-adverbial: Temporal nouns such as names of days or months that behave 
-#  like adverbs. Nouns that represent amount or ratios and can be used adverbially,
-#  e.g. \u91d1\u66dc, \u4e00\u6708, \u5348\u5f8c, \u5c11\u91cf
-#\u540d\u8a5e-\u526f\u8a5e\u53ef\u80fd
-#
-#  noun-verbal: Nouns that take arguments with case and can appear followed by 
-#  'suru' and related verbs (\u3059\u308b, \u3067\u304d\u308b, \u306a\u3055\u308b, \u304f\u3060\u3055\u308b)
-#  e.g. \u30a4\u30f3\u30d7\u30c3\u30c8, \u611b\u7740, \u60aa\u5316, \u60aa\u6226\u82e6\u95d8, \u4e00\u5b89\u5fc3, \u4e0b\u53d6\u308a
-#\u540d\u8a5e-\u30b5\u5909\u63a5\u7d9a
-#
-#  noun-adjective-base: The base form of adjectives, words that appear before \u306a ("na")
-#  e.g. \u5065\u5eb7, \u5b89\u6613, \u99c4\u76ee, \u3060\u3081
-#\u540d\u8a5e-\u5f62\u5bb9\u52d5\u8a5e\u8a9e\u5e79
-#
-#  noun-numeric: Arabic numbers, Chinese numerals, and counters like \u4f55 (\u56de), \u6570.
-#  e.g. 0, 1, 2, \u4f55, \u6570, \u5e7e
-#\u540d\u8a5e-\u6570
-#
-#  noun-affix: noun affixes where the sub-classification is undefined
-#\u540d\u8a5e-\u975e\u81ea\u7acb
-#
-#  noun-affix-misc: Of adnominalizers, the case-marker \u306e ("no"), and words that 
-#  attach to the base form of inflectional words, words that cannot be classified 
-#  into any of the other categories below. This category includes indefinite nouns.
-#  e.g. \u3042\u304b\u3064\u304d, \u6681, \u304b\u3044, \u7532\u6590, \u6c17, \u304d\u3089\u3044, \u5acc\u3044, \u304f\u305b, \u7656, \u3053\u3068, \u4e8b, \u3054\u3068, \u6bce, \u3057\u3060\u3044, \u6b21\u7b2c, 
-#       \u9806, \u305b\u3044, \u6240\u70ba, \u3064\u3044\u3067, \u5e8f\u3067, \u3064\u3082\u308a, \u7a4d\u3082\u308a, \u70b9, \u3069\u3053\u308d, \u306e, \u306f\u305a, \u7b48, \u306f\u305a\u307f, \u5f3e\u307f, 
-#       \u62cd\u5b50, \u3075\u3046, \u3075\u308a, \u632f\u308a, \u307b\u3046, \u65b9, \u65e8, \u3082\u306e, \u7269, \u8005, \u3086\u3048, \u6545, \u3086\u3048\u3093, \u6240\u4ee5, \u308f\u3051, \u8a33,
-#       \u308f\u308a, \u5272\u308a, \u5272, \u3093-\u53e3\u8a9e/, \u3082\u3093-\u53e3\u8a9e/
-#\u540d\u8a5e-\u975e\u81ea\u7acb-\u4e00\u822c
-#
-#  noun-affix-adverbial: noun affixes that that can behave as adverbs.
-#  e.g. \u3042\u3044\u3060, \u9593, \u3042\u3052\u304f, \u6319\u3052\u53e5, \u3042\u3068, \u5f8c, \u4f59\u308a, \u4ee5\u5916, \u4ee5\u964d, \u4ee5\u5f8c, \u4ee5\u4e0a, \u4ee5\u524d, \u4e00\u65b9, \u3046\u3048, 
-#       \u4e0a, \u3046\u3061, \u5185, \u304a\u308a, \u6298\u308a, \u304b\u304e\u308a, \u9650\u308a, \u304d\u308a, \u3063\u304d\u308a, \u7d50\u679c, \u3053\u308d, \u9803, \u3055\u3044, \u969b, \u6700\u4e2d, \u3055\u306a\u304b, 
-#       \u6700\u4e2d, \u3058\u305f\u3044, \u81ea\u4f53, \u305f\u3073, \u5ea6, \u305f\u3081, \u70ba, \u3064\u3069, \u90fd\u5ea6, \u3068\u304a\u308a, \u901a\u308a, \u3068\u304d, \u6642, \u3068\u3053\u308d, \u6240, 
-#       \u3068\u305f\u3093, \u9014\u7aef, \u306a\u304b, \u4e2d, \u306e\u3061, \u5f8c, \u3070\u3042\u3044, \u5834\u5408, \u65e5, \u3076\u3093, \u5206, \u307b\u304b, \u4ed6, \u307e\u3048, \u524d, \u307e\u307e, 
-#       \u5118, \u4fad, \u307f\u304e\u308a, \u77e2\u5148
-#\u540d\u8a5e-\u975e\u81ea\u7acb-\u526f\u8a5e\u53ef\u80fd
-#
-#  noun-affix-aux: noun affixes treated as \u52a9\u52d5\u8a5e ("auxiliary verb") in school grammars 
-#  with the stem \u3088\u3046(\u3060) ("you(da)").
-#  e.g.  \u3088\u3046, \u3084\u3046, \u69d8 (\u3088\u3046)
-#\u540d\u8a5e-\u975e\u81ea\u7acb-\u52a9\u52d5\u8a5e\u8a9e\u5e79
-#  
-#  noun-affix-adjective-base: noun affixes that can connect to the indeclinable
-#  connection form \u306a (aux "da").
-#  e.g. \u307f\u305f\u3044, \u3075\u3046
-#\u540d\u8a5e-\u975e\u81ea\u7acb-\u5f62\u5bb9\u52d5\u8a5e\u8a9e\u5e79
-#
-#  noun-special: special nouns where the sub-classification is undefined.
-#\u540d\u8a5e-\u7279\u6b8a
-#
-#  noun-special-aux: The \u305d\u3046\u3060 ("souda") stem form that is used for reporting news, is 
-#  treated as \u52a9\u52d5\u8a5e ("auxiliary verb") in school grammars, and attach to the base 
-#  form of inflectional words.
-#  e.g. \u305d\u3046
-#\u540d\u8a5e-\u7279\u6b8a-\u52a9\u52d5\u8a5e\u8a9e\u5e79
-#
-#  noun-suffix: noun suffixes where the sub-classification is undefined.
-#\u540d\u8a5e-\u63a5\u5c3e
-#
-#  noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect 
-#  to \u30ac\u30eb or \u30bf\u30a4 and can combine into compound nouns, words that cannot be classified into
-#  any of the other categories below. In general, this category is more inclusive than 
-#  \u63a5\u5c3e\u8a9e ("suffix") and is usually the last element in a compound noun.
-#  e.g. \u304a\u304d, \u304b\u305f, \u65b9, \u7532\u6590 (\u304c\u3044), \u304c\u304b\u308a, \u304e\u307f, \u6c17\u5473, \u3050\u308b\u307f, (\uff5e\u3057\u305f) \u3055, \u6b21\u7b2c, \u6e08 (\u305a) \u307f,
-#       \u3088\u3046, (\u3067\u304d)\u3063\u3053, \u611f, \u89b3, \u6027, \u5b66, \u985e, \u9762, \u7528
-#\u540d\u8a5e-\u63a5\u5c3e-\u4e00\u822c
-#
-#  noun-suffix-person: Suffixes that form nouns and attach to person names more often
-#  than other nouns.
-#  e.g. \u541b, \u69d8, \u8457
-#\u540d\u8a5e-\u63a5\u5c3e-\u4eba\u540d
-#
-#  noun-suffix-place: Suffixes that form nouns and attach to place names more often 
-#  than other nouns.
-#  e.g. \u753a, \u5e02, \u770c
-#\u540d\u8a5e-\u63a5\u5c3e-\u5730\u57df
-#
-#  noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that 
-#  can appear before \u30b9\u30eb ("suru").
-#  e.g. \u5316, \u8996, \u5206\u3051, \u5165\u308a, \u843d\u3061, \u8cb7\u3044
-#\u540d\u8a5e-\u63a5\u5c3e-\u30b5\u5909\u63a5\u7d9a
-#
-#  noun-suffix-aux: The stem form of \u305d\u3046\u3060 (\u69d8\u614b) that is used to indicate conditions, 
-#  is treated as \u52a9\u52d5\u8a5e ("auxiliary verb") in school grammars, and attach to the 
-#  conjunctive form of inflectional words.
-#  e.g. \u305d\u3046
-#\u540d\u8a5e-\u63a5\u5c3e-\u52a9\u52d5\u8a5e\u8a9e\u5e79
-#
-#  noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive 
-#  form of inflectional words and appear before the copula \u3060 ("da").
-#  e.g. \u7684, \u3052, \u304c\u3061
-#\u540d\u8a5e-\u63a5\u5c3e-\u5f62\u5bb9\u52d5\u8a5e\u8a9e\u5e79
-#
-#  noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
-#  e.g. \u5f8c (\u3054), \u4ee5\u5f8c, \u4ee5\u964d, \u4ee5\u524d, \u524d\u5f8c, \u4e2d, \u672b, \u4e0a, \u6642 (\u3058)
-#\u540d\u8a5e-\u63a5\u5c3e-\u526f\u8a5e\u53ef\u80fd
-#
-#  noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category 
-#  is more inclusive than \u52a9\u6570\u8a5e ("classifier") and includes common nouns that attach 
-#  to numbers.
-#  e.g. \u500b, \u3064, \u672c, \u518a, \u30d1\u30fc\u30bb\u30f3\u30c8, cm, kg, \u30ab\u6708, \u304b\u56fd, \u533a\u753b, \u6642\u9593, \u6642\u534a
-#\u540d\u8a5e-\u63a5\u5c3e-\u52a9\u6570\u8a5e
-#
-#  noun-suffix-special: Special suffixes that mainly attach to inflecting words.
-#  e.g. (\u697d\u3057) \u3055, (\u8003\u3048) \u65b9
-#\u540d\u8a5e-\u63a5\u5c3e-\u7279\u6b8a
-#
-#  noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words 
-#  together.
-#  e.g. (\u65e5\u672c) \u5bfe (\u30a2\u30e1\u30ea\u30ab), \u5bfe (\u30a2\u30e1\u30ea\u30ab), (3) \u5bfe (5), (\u5973\u512a) \u517c (\u4e3b\u5a66)
-#\u540d\u8a5e-\u63a5\u7d9a\u8a5e\u7684
-#
-#  noun-verbal_aux: Nouns that attach to the conjunctive particle \u3066 ("te") and are 
-#  semantically verb-like.
-#  e.g. \u3054\u3089\u3093, \u3054\u89a7, \u5fa1\u89a7, \u9802\u6234
-#\u540d\u8a5e-\u52d5\u8a5e\u975e\u81ea\u7acb\u7684
-#
-#  noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry, 
-#  dialects, English, etc. Currently, the only entry for \u540d\u8a5e \u5f15\u7528\u6587\u5b57\u5217 ("noun quotation") 
-#  is \u3044\u308f\u304f ("iwaku").
-#\u540d\u8a5e-\u5f15\u7528\u6587\u5b57\u5217
-#
-#  noun-nai_adjective: Words that appear before the auxiliary verb \u306a\u3044 ("nai") and
-#  behave like an adjective.
-#  e.g. \u7533\u3057\u8a33, \u4ed5\u65b9, \u3068\u3093\u3067\u3082, \u9055\u3044
-#\u540d\u8a5e-\u30ca\u30a4\u5f62\u5bb9\u8a5e\u8a9e\u5e79
-#
-#####
-#  prefix: unclassified prefixes
-#\u63a5\u982d\u8a5e
-#
-#  prefix-nominal: Prefixes that attach to nouns (including adjective stem forms) 
-#  excluding numerical expressions.
-#  e.g. \u304a (\u6c34), \u67d0 (\u6c0f), \u540c (\u793e), \u6545 (\uff5e\u6c0f), \u9ad8 (\u54c1\u8cea), \u304a (\u898b\u4e8b), \u3054 (\u7acb\u6d3e)
-#\u63a5\u982d\u8a5e-\u540d\u8a5e\u63a5\u7d9a
-#
-#  prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
-#  in conjunctive form followed by \u306a\u308b/\u306a\u3055\u308b/\u304f\u3060\u3055\u308b.
-#  e.g. \u304a (\u8aad\u307f\u306a\u3055\u3044), \u304a (\u5ea7\u308a)
-#\u63a5\u982d\u8a5e-\u52d5\u8a5e\u63a5\u7d9a
-#
-#  prefix-adjectival: Prefixes that attach to adjectives.
-#  e.g. \u304a (\u5bd2\u3044\u3067\u3059\u306d\u3048), \u30d0\u30ab (\u3067\u304b\u3044)
-#\u63a5\u982d\u8a5e-\u5f62\u5bb9\u8a5e\u63a5\u7d9a
-#
-#  prefix-numerical: Prefixes that attach to numerical expressions.
-#  e.g. \u7d04, \u304a\u3088\u305d, \u6bce\u6642
-#\u63a5\u982d\u8a5e-\u6570\u63a5\u7d9a
-#
-#####
-#  verb: unclassified verbs
-#\u52d5\u8a5e
-#
-#  verb-main:
-#\u52d5\u8a5e-\u81ea\u7acb
-#
-#  verb-auxiliary:
-#\u52d5\u8a5e-\u975e\u81ea\u7acb
-#
-#  verb-suffix:
-#\u52d5\u8a5e-\u63a5\u5c3e
-#
-#####
-#  adjective: unclassified adjectives
-#\u5f62\u5bb9\u8a5e
-#
-#  adjective-main:
-#\u5f62\u5bb9\u8a5e-\u81ea\u7acb
-#
-#  adjective-auxiliary:
-#\u5f62\u5bb9\u8a5e-\u975e\u81ea\u7acb
-#
-#  adjective-suffix:
-#\u5f62\u5bb9\u8a5e-\u63a5\u5c3e
-#
-#####
-#  adverb: unclassified adverbs
-#\u526f\u8a5e
-#
-#  adverb-misc: Words that can be segmented into one unit and where adnominal 
-#  modification is not possible.
-#  e.g. \u3042\u3044\u304b\u308f\u3089\u305a, \u591a\u5206
-#\u526f\u8a5e-\u4e00\u822c
-#
-#  adverb-particle_conjunction: Adverbs that can be followed by \u306e, \u306f, \u306b, 
-#  \u306a, \u3059\u308b, \u3060, etc.
-#  e.g. \u3053\u3093\u306a\u306b, \u305d\u3093\u306a\u306b, \u3042\u3093\u306a\u306b, \u306a\u306b\u304b, \u306a\u3093\u3067\u3082
-#\u526f\u8a5e-\u52a9\u8a5e\u985e\u63a5\u7d9a
-#
-#####
-#  adnominal: Words that only have noun-modifying forms.
-#  e.g. \u3053\u306e, \u305d\u306e, \u3042\u306e, \u3069\u306e, \u3044\u308f\u3086\u308b, \u306a\u3093\u3089\u304b\u306e, \u4f55\u3089\u304b\u306e, \u3044\u308d\u3093\u306a, \u3053\u3046\u3044\u3046, \u305d\u3046\u3044\u3046, \u3042\u3042\u3044\u3046, 
-#       \u3069\u3046\u3044\u3046, \u3053\u3093\u306a, \u305d\u3093\u306a, \u3042\u3093\u306a, \u3069\u3093\u306a, \u5927\u304d\u306a, \u5c0f\u3055\u306a, \u304a\u304b\u3057\u306a, \u307b\u3093\u306e, \u305f\u3044\u3057\u305f, 
-#       \u300c(, \u3082) \u3055\u308b (\u3053\u3068\u306a\u304c\u3089)\u300d, \u5fae\u3005\u305f\u308b, \u5802\u3005\u305f\u308b, \u5358\u306a\u308b, \u3044\u304b\u306a\u308b, \u6211\u304c\u300d\u300c\u540c\u3058, \u4ea1\u304d
-#\u9023\u4f53\u8a5e
-#
-#####
-#  conjunction: Conjunctions that can occur independently.
-#  e.g. \u304c, \u3051\u308c\u3069\u3082, \u305d\u3057\u3066, \u3058\u3083\u3042, \u305d\u308c\u3069\u3053\u308d\u304b
-\u63a5\u7d9a\u8a5e
-#
-#####
-#  particle: unclassified particles.
-\u52a9\u8a5e
-#
-#  particle-case: case particles where the subclassification is undefined.
-\u52a9\u8a5e-\u683c\u52a9\u8a5e
-#
-#  particle-case-misc: Case particles.
-#  e.g. \u304b\u3089, \u304c, \u3067, \u3068, \u306b, \u3078, \u3088\u308a, \u3092, \u306e, \u306b\u3066
-\u52a9\u8a5e-\u683c\u52a9\u8a5e-\u4e00\u822c
-#
-#  particle-case-quote: the "to" that appears after nouns, a person\u2019s speech, 
-#  quotation marks, expressions of decisions from a meeting, reasons, judgements,
-#  conjectures, etc.
-#  e.g. ( \u3060) \u3068 (\u8ff0\u3079\u305f.), ( \u3067\u3042\u308b) \u3068 (\u3057\u3066\u57f7\u884c\u7336\u4e88...)
-\u52a9\u8a5e-\u683c\u52a9\u8a5e-\u5f15\u7528
-#
-#  particle-case-compound: Compounds of particles and verbs that mainly behave 
-#  like case particles.
-#  e.g. \u3068\u3044\u3046, \u3068\u3044\u3063\u305f, \u3068\u304b\u3044\u3046, \u3068\u3057\u3066, \u3068\u3068\u3082\u306b, \u3068\u5171\u306b, \u3067\u3082\u3063\u3066, \u306b\u3042\u305f\u3063\u3066, \u306b\u5f53\u305f\u3063\u3066, \u306b\u5f53\u3063\u3066,
-#       \u306b\u3042\u305f\u308a, \u306b\u5f53\u305f\u308a, \u306b\u5f53\u308a, \u306b\u5f53\u305f\u308b, \u306b\u3042\u305f\u308b, \u306b\u304a\u3044\u3066, \u306b\u65bc\u3044\u3066,\u306b\u65bc\u3066, \u306b\u304a\u3051\u308b, \u306b\u65bc\u3051\u308b, 
-#       \u306b\u304b\u3051, \u306b\u304b\u3051\u3066, \u306b\u304b\u3093\u3057, \u306b\u95a2\u3057, \u306b\u304b\u3093\u3057\u3066, \u306b\u95a2\u3057\u3066, \u306b\u304b\u3093\u3059\u308b, \u306b\u95a2\u3059\u308b, \u306b\u969b\u3057, 
-#       \u306b\u969b\u3057\u3066, \u306b\u3057\u305f\u304c\u3044, \u306b\u5f93\u3044, \u306b\u5f93\u3046, \u306b\u3057\u305f\u304c\u3063\u3066, \u306b\u5f93\u3063\u3066, \u306b\u305f\u3044\u3057, \u306b\u5bfe\u3057, \u306b\u305f\u3044\u3057\u3066, 
-#       \u306b\u5bfe\u3057\u3066, \u306b\u305f\u3044\u3059\u308b, \u306b\u5bfe\u3059\u308b, \u306b\u3064\u3044\u3066, \u306b\u3064\u304d, \u306b\u3064\u3051, \u306b\u3064\u3051\u3066, \u306b\u3064\u308c, \u306b\u3064\u308c\u3066, \u306b\u3068\u3063\u3066,
-#       \u306b\u3068\u308a, \u306b\u307e\u3064\u308f\u308b, \u306b\u3088\u3063\u3066, \u306b\u4f9d\u3063\u3066, \u306b\u56e0\u3063\u3066, \u306b\u3088\u308a, \u306b\u4f9d\u308a, \u306b\u56e0\u308a, \u306b\u3088\u308b, \u306b\u4f9d\u308b, \u306b\u56e0\u308b, 
-#       \u306b\u308f\u305f\u3063\u3066, \u306b\u308f\u305f\u308b, \u3092\u3082\u3063\u3066, \u3092\u4ee5\u3063\u3066, \u3092\u901a\u3058, \u3092\u901a\u3058\u3066, \u3092\u901a\u3057\u3066, \u3092\u3081\u3050\u3063\u3066, \u3092\u3081\u3050\u308a, \u3092\u3081\u3050\u308b,
-#       \u3063\u3066-\u53e3\u8a9e/, \u3061\u3085\u3046-\u95a2\u897f\u5f01\u300c\u3068\u3044\u3046\u300d/, (\u4f55) \u3066\u3044\u3046 (\u4eba)-\u53e3\u8a9e/, \u3063\u3066\u3044\u3046-\u53e3\u8a9e/, \u3068\u3044\u3075, \u3068\u304b\u3044\u3075
-\u52a9\u8a5e-\u683c\u52a9\u8a5e-\u9023\u8a9e
-#
-#  particle-conjunctive:
-#  e.g. \u304b\u3089, \u304b\u3089\u306b\u306f, \u304c, \u3051\u308c\u3069, \u3051\u308c\u3069\u3082, \u3051\u3069, \u3057, \u3064\u3064, \u3066, \u3067, \u3068, \u3068\u3053\u308d\u304c, \u3069\u3053\u308d\u304b, \u3068\u3082, \u3069\u3082, 
-#       \u306a\u304c\u3089, \u306a\u308a, \u306e\u3067, \u306e\u306b, \u3070, \u3082\u306e\u306e, \u3084 ( \u3057\u305f), \u3084\u3044\u306a\u3084, (\u3053\u308d\u3093) \u3058\u3083(\u3044\u3051\u306a\u3044)-\u53e3\u8a9e/, 
-#       (\u884c\u3063) \u3061\u3083(\u3044\u3051\u306a\u3044)-\u53e3\u8a9e/, (\u8a00\u3063) \u305f\u3063\u3066 (\u3057\u304b\u305f\u304c\u306a\u3044)-\u53e3\u8a9e/, (\u305d\u308c\u304c\u306a\u304f)\u3063\u305f\u3063\u3066 (\u5e73\u6c17)-\u53e3\u8a9e/
-\u52a9\u8a5e-\u63a5\u7d9a\u52a9\u8a5e
-#
-#  particle-dependency:
-#  e.g. \u3053\u305d, \u3055\u3048, \u3057\u304b, \u3059\u3089, \u306f, \u3082, \u305e
-\u52a9\u8a5e-\u4fc2\u52a9\u8a5e
-#
-#  particle-adverbial:
-#  e.g. \u304c\u3066\u3089, \u304b\u3082, \u304f\u3089\u3044, \u4f4d, \u3050\u3089\u3044, \u3057\u3082, (\u5b66\u6821) \u3058\u3083(\u3053\u308c\u304c\u6d41\u884c\u3063\u3066\u3044\u308b)-\u53e3\u8a9e/, 
-#       (\u305d\u308c)\u3058\u3083\u3042 (\u3088\u304f\u306a\u3044)-\u53e3\u8a9e/, \u305a\u3064, (\u79c1) \u306a\u305e, \u306a\u3069, (\u79c1) \u306a\u308a (\u306b), (\u5148\u751f) \u306a\u3093\u304b (\u5927\u5acc\u3044)-\u53e3\u8a9e/,
-#       (\u79c1) \u306a\u3093\u305e, (\u5148\u751f) \u306a\u3093\u3066 (\u5927\u5acc\u3044)-\u53e3\u8a9e/, \u306e\u307f, \u3060\u3051, (\u79c1) \u3060\u3063\u3066-\u53e3\u8a9e/, \u3060\u306b, 
-#       (\u5f7c)\u3063\u305f\u3089-\u53e3\u8a9e/, (\u304a\u8336) \u3067\u3082 (\u3044\u304b\u304c), \u7b49 (\u3068\u3046), (\u4eca\u5f8c) \u3068\u3082, \u3070\u304b\u308a, \u3070\u3063\u304b-\u53e3\u8a9e/, \u3070\u3063\u304b\u308a-\u53e3\u8a9e/,
-#       \u307b\u3069, \u7a0b, \u307e\u3067, \u8fc4, (\u8ab0) \u3082 (\u304c)([\u52a9\u8a5e-\u683c\u52a9\u8a5e] \u304a\u3088\u3073 [\u52a9\u8a5e-\u4fc2\u52a9\u8a5e] \u306e\u524d\u306b\u4f4d\u7f6e\u3059\u308b\u300c\u3082\u300d)
-\u52a9\u8a5e-\u526f\u52a9\u8a5e
-#
-#  particle-interjective: particles with interjective grammatical roles.
-#  e.g. (\u677e\u5cf6) \u3084
-\u52a9\u8a5e-\u9593\u6295\u52a9\u8a5e
-#
-#  particle-coordinate:
-#  e.g. \u3068, \u305f\u308a, \u3060\u306e, \u3060\u308a, \u3068\u304b, \u306a\u308a, \u3084, \u3084\u3089
-\u52a9\u8a5e-\u4e26\u7acb\u52a9\u8a5e
-#
-#  particle-final:
-#  e.g. \u304b\u3044, \u304b\u3057\u3089, \u3055, \u305c, (\u3060)\u3063\u3051-\u53e3\u8a9e/, (\u3068\u307e\u3063\u3066\u308b) \u3067-\u65b9\u8a00/, \u306a, \u30ca, \u306a\u3042-\u53e3\u8a9e/, \u305e, \u306d, \u30cd, 
-#       \u306d\u3047-\u53e3\u8a9e/, \u306d\u3048-\u53e3\u8a9e/, \u306d\u3093-\u65b9\u8a00/, \u306e, \u306e\u3046-\u53e3\u8a9e/, \u3084, \u3088, \u30e8, \u3088\u3049-\u53e3\u8a9e/, \u308f, \u308f\u3044-\u53e3\u8a9e/
-\u52a9\u8a5e-\u7d42\u52a9\u8a5e
-#
-#  particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is 
-#  adverbial, conjunctive, or sentence final. For example:
-#       (a) \u300cA \u304b B \u304b\u300d. Ex:\u300c(\u56fd\u5185\u3067\u904b\u7528\u3059\u308b) \u304b,(\u6d77\u5916\u3067\u904b\u7528\u3059\u308b) \u304b (.)\u300d
-#       (b) Inside an adverb phrase. Ex:\u300c(\u5e78\u3044\u3068\u3044\u3046) \u304b (, \u6b7b\u8005\u306f\u3044\u306a\u304b\u3063\u305f.)\u300d
-#           \u300c(\u7948\u308a\u304c\u5c4a\u3044\u305f\u305b\u3044) \u304b (, \u8a66\u9a13\u306b\u5408\u683c\u3057\u305f.)\u300d
-#       (c) \u300c\u304b\u306e\u3088\u3046\u306b\u300d. Ex:\u300c(\u4f55\u3082\u306a\u304b\u3063\u305f) \u304b (\u306e\u3088\u3046\u306b\u632f\u308b\u821e\u3063\u305f.)\u300d
-#  e.g. \u304b
-\u52a9\u8a5e-\u526f\u52a9\u8a5e\uff0f\u4e26\u7acb\u52a9\u8a5e\uff0f\u7d42\u52a9\u8a5e
-#
-#  particle-adnominalizer: The "no" that attaches to nouns and modifies 
-#  non-inflectional words.
-\u52a9\u8a5e-\u9023\u4f53\u5316
-#
-#  particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs 
-#  that are giongo, giseigo, or gitaigo.
-#  e.g. \u306b, \u3068
-\u52a9\u8a5e-\u526f\u8a5e\u5316
-#
-#  particle-special: A particle that does not fit into one of the above classifications. 
-#  This includes particles that are used in Tanka, Haiku, and other poetry.
-#  e.g. \u304b\u306a, \u3051\u3080, ( \u3057\u305f\u3060\u308d\u3046) \u306b, (\u3042\u3093\u305f) \u306b\u3083(\u308f\u304b\u3089\u3093), (\u4ffa) \u3093 (\u5bb6)
-\u52a9\u8a5e-\u7279\u6b8a
-#
-#####
-#  auxiliary-verb:
-\u52a9\u52d5\u8a5e
-#
-#####
-#  interjection: Greetings and other exclamations.
-#  e.g. \u304a\u306f\u3088\u3046, \u304a\u306f\u3088\u3046\u3054\u3056\u3044\u307e\u3059, \u3053\u3093\u306b\u3061\u306f, \u3053\u3093\u3070\u3093\u306f, \u3042\u308a\u304c\u3068\u3046, \u3069\u3046\u3082\u3042\u308a\u304c\u3068\u3046, \u3042\u308a\u304c\u3068\u3046\u3054\u3056\u3044\u307e\u3059, 
-#       \u3044\u305f\u3060\u304d\u307e\u3059, \u3054\u3061\u305d\u3046\u3055\u307e, \u3055\u3088\u306a\u3089, \u3055\u3088\u3046\u306a\u3089, \u306f\u3044, \u3044\u3044\u3048, \u3054\u3081\u3093, \u3054\u3081\u3093\u306a\u3055\u3044
-#\u611f\u52d5\u8a5e
-#
-#####
-#  symbol: unclassified Symbols.
-\u8a18\u53f7
-#
-#  symbol-misc: A general symbol not in one of the categories below.
-#  e.g. [\u25cb\u25ce@$\u3012\u2192+]
-\u8a18\u53f7-\u4e00\u822c
-#
-#  symbol-comma: Commas
-#  e.g. [,\u3001]
-\u8a18\u53f7-\u8aad\u70b9
-#
-#  symbol-period: Periods and full stops.
-#  e.g. [.\uff0e\u3002]
-\u8a18\u53f7-\u53e5\u70b9
-#
-#  symbol-space: Full-width whitespace.
-\u8a18\u53f7-\u7a7a\u767d
-#
-#  symbol-open_bracket:
-#  e.g. [({\u2018\u201c\u300e\u3010]
-\u8a18\u53f7-\u62ec\u5f27\u958b
-#
-#  symbol-close_bracket:
-#  e.g. [)}\u2019\u201d\u300f\u300d\u3011]
-\u8a18\u53f7-\u62ec\u5f27\u9589
-#
-#  symbol-alphabetic:
-#\u8a18\u53f7-\u30a2\u30eb\u30d5\u30a1\u30d9\u30c3\u30c8
-#
-#####
-#  other: unclassified other
-#\u305d\u306e\u4ed6
-#
-#  other-interjection: Words that are hard to classify as noun-suffixes or 
-#  sentence-final particles.
-#  e.g. (\u3060)\u30a1
-\u305d\u306e\u4ed6-\u9593\u6295
-#
-#####
-#  filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
-#  e.g. \u3042\u306e, \u3046\u3093\u3068, \u3048\u3068
-\u30d5\u30a3\u30e9\u30fc
-#
-#####
-#  non-verbal: non-verbal sound.
-\u975e\u8a00\u8a9e\u97f3
-#
-#####
-#  fragment:
-#\u8a9e\u65ad\u7247
-#
-#####
-#  unknown: unknown part of speech.
-#\u672a\u77e5\u8a9e
-#
-##### End of file

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ar.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ar.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ar.txt
deleted file mode 100644
index 046829d..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ar.txt
+++ /dev/null
@@ -1,125 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Cleaned on October 11, 2009 (not normalized, so use before normalization)
-# This means that when modifying this list, you might need to add some 
-# redundant entries, for example containing forms with both \u0623 and \u0627
-\u0645\u0646
-\u0648\u0645\u0646
-\u0645\u0646\u0647\u0627
-\u0645\u0646\u0647
-\u0641\u064a
-\u0648\u0641\u064a
-\u0641\u064a\u0647\u0627
-\u0641\u064a\u0647
-\u0648
-\u0641
-\u062b\u0645
-\u0627\u0648
-\u0623\u0648
-\u0628
-\u0628\u0647\u0627
-\u0628\u0647
-\u0627
-\u0623
-\u0627\u0649
-\u0627\u064a
-\u0623\u064a
-\u0623\u0649
-\u0644\u0627
-\u0648\u0644\u0627
-\u0627\u0644\u0627
-\u0623\u0644\u0627
-\u0625\u0644\u0627
-\u0644\u0643\u0646
-\u0645\u0627
-\u0648\u0645\u0627
-\u0643\u0645\u0627
-\u0641\u0645\u0627
-\u0639\u0646
-\u0645\u0639
-\u0627\u0630\u0627
-\u0625\u0630\u0627
-\u0627\u0646
-\u0623\u0646
-\u0625\u0646
-\u0627\u0646\u0647\u0627
-\u0623\u0646\u0647\u0627
-\u0625\u0646\u0647\u0627
-\u0627\u0646\u0647
-\u0623\u0646\u0647
-\u0625\u0646\u0647
-\u0628\u0627\u0646
-\u0628\u0623\u0646
-\u0641\u0627\u0646
-\u0641\u0623\u0646
-\u0648\u0627\u0646
-\u0648\u0623\u0646
-\u0648\u0625\u0646
-\u0627\u0644\u062a\u0649
-\u0627\u0644\u062a\u064a
-\u0627\u0644\u0630\u0649
-\u0627\u0644\u0630\u064a
-\u0627\u0644\u0630\u064a\u0646
-\u0627\u0644\u0649
-\u0627\u0644\u064a
-\u0625\u0644\u0649
-\u0625\u0644\u064a
-\u0639\u0644\u0649
-\u0639\u0644\u064a\u0647\u0627
-\u0639\u0644\u064a\u0647
-\u0627\u0645\u0627
-\u0623\u0645\u0627
-\u0625\u0645\u0627
-\u0627\u064a\u0636\u0627
-\u0623\u064a\u0636\u0627
-\u0643\u0644
-\u0648\u0643\u0644
-\u0644\u0645
-\u0648\u0644\u0645
-\u0644\u0646
-\u0648\u0644\u0646
-\u0647\u0649
-\u0647\u064a
-\u0647\u0648
-\u0648\u0647\u0649
-\u0648\u0647\u064a
-\u0648\u0647\u0648
-\u0641\u0647\u0649
-\u0641\u0647\u064a
-\u0641\u0647\u0648
-\u0627\u0646\u062a
-\u0623\u0646\u062a
-\u0644\u0643
-\u0644\u0647\u0627
-\u0644\u0647
-\u0647\u0630\u0647
-\u0647\u0630\u0627
-\u062a\u0644\u0643
-\u0630\u0644\u0643
-\u0647\u0646\u0627\u0643
-\u0643\u0627\u0646\u062a
-\u0643\u0627\u0646
-\u064a\u0643\u0648\u0646
-\u062a\u0643\u0648\u0646
-\u0648\u0643\u0627\u0646\u062a
-\u0648\u0643\u0627\u0646
-\u063a\u064a\u0631
-\u0628\u0639\u0636
-\u0642\u062f
-\u0646\u062d\u0648
-\u0628\u064a\u0646
-\u0628\u064a\u0646\u0645\u0627
-\u0645\u0646\u0630
-\u0636\u0645\u0646
-\u062d\u064a\u062b
-\u0627\u0644\u0627\u0646
-\u0627\u0644\u0622\u0646
-\u062e\u0644\u0627\u0644
-\u0628\u0639\u062f
-\u0642\u0628\u0644
-\u062d\u062a\u0649
-\u0639\u0646\u062f
-\u0639\u0646\u062f\u0645\u0627
-\u0644\u062f\u0649
-\u062c\u0645\u064a\u0639

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_bg.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_bg.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_bg.txt
deleted file mode 100644
index 1ae4ba2..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_bg.txt
+++ /dev/null
@@ -1,193 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-\u0430
-\u0430\u0437
-\u0430\u043a\u043e
-\u0430\u043b\u0430
-\u0431\u0435
-\u0431\u0435\u0437
-\u0431\u0435\u0448\u0435
-\u0431\u0438
-\u0431\u0438\u043b
-\u0431\u0438\u043b\u0430
-\u0431\u0438\u043b\u0438
-\u0431\u0438\u043b\u043e
-\u0431\u043b\u0438\u0437\u043e
-\u0431\u044a\u0434\u0430\u0442
-\u0431\u044a\u0434\u0435
-\u0431\u044f\u0445\u0430
-\u0432
-\u0432\u0430\u0441
-\u0432\u0430\u0448
-\u0432\u0430\u0448\u0430
-\u0432\u0435\u0440\u043e\u044f\u0442\u043d\u043e
-\u0432\u0435\u0447\u0435
-\u0432\u0437\u0435\u043c\u0430
-\u0432\u0438
-\u0432\u0438\u0435
-\u0432\u0438\u043d\u0430\u0433\u0438
-\u0432\u0441\u0435
-\u0432\u0441\u0435\u043a\u0438
-\u0432\u0441\u0438\u0447\u043a\u0438
-\u0432\u0441\u0438\u0447\u043a\u043e
-\u0432\u0441\u044f\u043a\u0430
-\u0432\u044a\u0432
-\u0432\u044a\u043f\u0440\u0435\u043a\u0438
-\u0432\u044a\u0440\u0445\u0443
-\u0433
-\u0433\u0438
-\u0433\u043b\u0430\u0432\u043d\u043e
-\u0433\u043e
-\u0434
-\u0434\u0430
-\u0434\u0430\u043b\u0438
-\u0434\u043e
-\u0434\u043e\u043a\u0430\u0442\u043e
-\u0434\u043e\u043a\u043e\u0433\u0430
-\u0434\u043e\u0440\u0438
-\u0434\u043e\u0441\u0435\u0433\u0430
-\u0434\u043e\u0441\u0442\u0430
-\u0435
-\u0435\u0434\u0432\u0430
-\u0435\u0434\u0438\u043d
-\u0435\u0442\u043e
-\u0437\u0430
-\u0437\u0430\u0434
-\u0437\u0430\u0435\u0434\u043d\u043e
-\u0437\u0430\u0440\u0430\u0434\u0438
-\u0437\u0430\u0441\u0435\u0433\u0430
-\u0437\u0430\u0442\u043e\u0432\u0430
-\u0437\u0430\u0449\u043e
-\u0437\u0430\u0449\u043e\u0442\u043e
-\u0438
-\u0438\u0437
-\u0438\u043b\u0438
-\u0438\u043c
-\u0438\u043c\u0430
-\u0438\u043c\u0430\u0442
-\u0438\u0441\u043a\u0430
-\u0439
-\u043a\u0430\u0437\u0430
-\u043a\u0430\u043a
-\u043a\u0430\u043a\u0432\u0430
-\u043a\u0430\u043a\u0432\u043e
-\u043a\u0430\u043a\u0442\u043e
-\u043a\u0430\u043a\u044a\u0432
-\u043a\u0430\u0442\u043e
-\u043a\u043e\u0433\u0430
-\u043a\u043e\u0433\u0430\u0442\u043e
-\u043a\u043e\u0435\u0442\u043e
-\u043a\u043e\u0438\u0442\u043e
-\u043a\u043e\u0439
-\u043a\u043e\u0439\u0442\u043e
-\u043a\u043e\u043b\u043a\u043e
-\u043a\u043e\u044f\u0442\u043e
-\u043a\u044a\u0434\u0435
-\u043a\u044a\u0434\u0435\u0442\u043e
-\u043a\u044a\u043c
-\u043b\u0438
-\u043c
-\u043c\u0435
-\u043c\u0435\u0436\u0434\u0443
-\u043c\u0435\u043d
-\u043c\u0438
-\u043c\u043d\u043e\u0437\u0438\u043d\u0430
-\u043c\u043e\u0433\u0430
-\u043c\u043e\u0433\u0430\u0442
-\u043c\u043e\u0436\u0435
-\u043c\u043e\u043b\u044f
-\u043c\u043e\u043c\u0435\u043d\u0442\u0430
-\u043c\u0443
-\u043d
-\u043d\u0430
-\u043d\u0430\u0434
-\u043d\u0430\u0437\u0430\u0434
-\u043d\u0430\u0439
-\u043d\u0430\u043f\u0440\u0430\u0432\u0438
-\u043d\u0430\u043f\u0440\u0435\u0434
-\u043d\u0430\u043f\u0440\u0438\u043c\u0435\u0440
-\u043d\u0430\u0441
-\u043d\u0435
-\u043d\u0435\u0433\u043e
-\u043d\u0435\u044f
-\u043d\u0438
-\u043d\u0438\u0435
-\u043d\u0438\u043a\u043e\u0439
-\u043d\u0438\u0442\u043e
-\u043d\u043e
-\u043d\u044f\u043a\u043e\u0438
-\u043d\u044f\u043a\u043e\u0439
-\u043d\u044f\u043c\u0430
-\u043e\u0431\u0430\u0447\u0435
-\u043e\u043a\u043e\u043b\u043e
-\u043e\u0441\u0432\u0435\u043d
-\u043e\u0441\u043e\u0431\u0435\u043d\u043e
-\u043e\u0442
-\u043e\u0442\u0433\u043e\u0440\u0435
-\u043e\u0442\u043d\u043e\u0432\u043e
-\u043e\u0449\u0435
-\u043f\u0430\u043a
-\u043f\u043e
-\u043f\u043e\u0432\u0435\u0447\u0435
-\u043f\u043e\u0432\u0435\u0447\u0435\u0442\u043e
-\u043f\u043e\u0434
-\u043f\u043e\u043d\u0435
-\u043f\u043e\u0440\u0430\u0434\u0438
-\u043f\u043e\u0441\u043b\u0435
-\u043f\u043e\u0447\u0442\u0438
-\u043f\u0440\u0430\u0432\u0438
-\u043f\u0440\u0435\u0434
-\u043f\u0440\u0435\u0434\u0438
-\u043f\u0440\u0435\u0437
-\u043f\u0440\u0438
-\u043f\u044a\u043a
-\u043f\u044a\u0440\u0432\u043e
-\u0441
-\u0441\u0430
-\u0441\u0430\u043c\u043e
-\u0441\u0435
-\u0441\u0435\u0433\u0430
-\u0441\u0438
-\u0441\u043a\u043e\u0440\u043e
-\u0441\u043b\u0435\u0434
-\u0441\u043c\u0435
-\u0441\u043f\u043e\u0440\u0435\u0434
-\u0441\u0440\u0435\u0434
-\u0441\u0440\u0435\u0449\u0443
-\u0441\u0442\u0435
-\u0441\u044a\u043c
-\u0441\u044a\u0441
-\u0441\u044a\u0449\u043e
-\u0442
-\u0442\u0430\u0437\u0438
-\u0442\u0430\u043a\u0430
-\u0442\u0430\u043a\u0438\u0432\u0430
-\u0442\u0430\u043a\u044a\u0432
-\u0442\u0430\u043c
-\u0442\u0432\u043e\u0439
-\u0442\u0435
-\u0442\u0435\u0437\u0438
-\u0442\u0438
-\u0442\u043d
-\u0442\u043e
-\u0442\u043e\u0432\u0430
-\u0442\u043e\u0433\u0430\u0432\u0430
-\u0442\u043e\u0437\u0438
-\u0442\u043e\u0439
-\u0442\u043e\u043b\u043a\u043e\u0432\u0430
-\u0442\u043e\u0447\u043d\u043e
-\u0442\u0440\u044f\u0431\u0432\u0430
-\u0442\u0443\u043a
-\u0442\u044a\u0439
-\u0442\u044f
-\u0442\u044f\u0445
-\u0443
-\u0445\u0430\u0440\u0435\u0441\u0432\u0430
-\u0447
-\u0447\u0435
-\u0447\u0435\u0441\u0442\u043e
-\u0447\u0440\u0435\u0437
-\u0449\u0435
-\u0449\u043e\u043c
-\u044f

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ca.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ca.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ca.txt
deleted file mode 100644
index 3da65de..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ca.txt
+++ /dev/null
@@ -1,220 +0,0 @@
-# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
-a
-abans
-ac�
-ah
-aix�
-aix�
-al
-als
-aleshores
-algun
-alguna
-algunes
-alguns
-alhora
-all�
-all�
-all�
-altra
-altre
-altres
-amb
-ambd�s
-ambdues
-apa
-aquell
-aquella
-aquelles
-aquells
-aquest
-aquesta
-aquestes
-aquests
-aqu�
-baix
-cada
-cadasc�
-cadascuna
-cadascunes
-cadascuns
-com
-contra
-d'un
-d'una
-d'unes
-d'uns
-dalt
-de
-del
-dels
-des
-despr�s
-dins
-dintre
-donat
-doncs
-durant
-e
-eh
-el
-els
-em
-en
-encara
-ens
-entre
-�rem
-eren
-�reu
-es
-�s
-esta
-est�
-est�vem
-estaven
-est�veu
-esteu
-et
-etc
-ets
-fins
-fora
-gaireb�
-ha
-han
-has
-havia
-he
-hem
-heu
-hi 
-ho
-i
-igual
-iguals
-ja
-l'hi
-la
-les
-li
-li'n
-llavors
-m'he
-ma
-mal
-malgrat
-mateix
-mateixa
-mateixes
-mateixos
-me
-mentre
-m�s
-meu
-meus
-meva
-meves
-molt
-molta
-moltes
-molts
-mon
-mons
-n'he
-n'hi
-ne
-ni
-no
-nogensmenys
-nom�s
-nosaltres
-nostra
-nostre
-nostres
-o
-oh
-oi
-on
-pas
-pel
-pels
-per
-per�
-perqu�
-poc 
-poca
-pocs
-poques
-potser
-propi
-qual
-quals
-quan
-quant 
-que
-qu�
-quelcom
-qui
-quin
-quina
-quines
-quins
-s'ha
-s'han
-sa
-semblant
-semblants
-ses
-seu 
-seus
-seva
-seva
-seves
-si
-sobre
-sobretot
-s�c
-solament
-sols
-son 
-s�n
-sons 
-sota
-sou
-t'ha
-t'han
-t'he
-ta
-tal
-tamb�
-tampoc
-tan
-tant
-tanta
-tantes
-teu
-teus
-teva
-teves
-ton
-tons
-tot
-tota
-totes
-tots
-un
-una
-unes
-uns
-us
-va
-vaig
-vam
-van
-vas
-veu
-vosaltres
-vostra
-vostre
-vostres

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ckb.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ckb.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ckb.txt
deleted file mode 100644
index 87abf11..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ckb.txt
+++ /dev/null
@@ -1,136 +0,0 @@
-# set of kurdish stopwords
-# note these have been normalized with our scheme (e represented with U+06D5, etc)
-# constructed from:
-# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al)
-# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston)
-# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc
-
-# and
-\u0648
-# which
-\u06a9\u06d5
-# of
-\u06cc
-# made/did
-\u06a9\u0631\u062f
-# that/which
-\u0626\u06d5\u0648\u06d5\u06cc
-# on/head
-\u0633\u06d5\u0631
-# two
-\u062f\u0648\u0648
-# also
-\u0647\u06d5\u0631\u0648\u06d5\u0647\u0627
-# from/that
-\u0644\u06d5\u0648
-# makes/does
-\u062f\u06d5\u06a9\u0627\u062a
-# some
-\u0686\u06d5\u0646\u062f
-# every
-\u0647\u06d5\u0631
-
-# demonstratives
-# that
-\u0626\u06d5\u0648
-# this
-\u0626\u06d5\u0645
-
-# personal pronouns
-# I
-\u0645\u0646
-# we
-\u0626\u06ce\u0645\u06d5
-# you
-\u062a\u06c6
-# you
-\u0626\u06ce\u0648\u06d5
-# he/she/it
-\u0626\u06d5\u0648
-# they
-\u0626\u06d5\u0648\u0627\u0646
-
-# prepositions
-# to/with/by
-\u0628\u06d5
-\u067e\u06ce
-# without
-\u0628\u06d5\u0628\u06ce
-# along with/while/during
-\u0628\u06d5\u062f\u06d5\u0645
-# in the opinion of
-\u0628\u06d5\u0644\u0627\u06cc
-# according to
-\u0628\u06d5\u067e\u06ce\u06cc
-# before
-\u0628\u06d5\u0631\u0644\u06d5
-# in the direction of
-\u0628\u06d5\u0631\u06d5\u0648\u06cc
-# in front of/toward
-\u0628\u06d5\u0631\u06d5\u0648\u06d5
-# before/in the face of
-\u0628\u06d5\u0631\u062f\u06d5\u0645
-# without
-\u0628\u06ce
-# except for
-\u0628\u06ce\u062c\u06af\u06d5
-# for
-\u0628\u06c6
-# on/in
-\u062f\u06d5
-\u062a\u06ce
-# with
-\u062f\u06d5\u06af\u06d5\u06b5
-# after
-\u062f\u0648\u0627\u06cc
-# except for/aside from
-\u062c\u06af\u06d5
-# in/from
-\u0644\u06d5
-\u0644\u06ce
-# in front of/before/because of
-\u0644\u06d5\u0628\u06d5\u0631
-# between/among
-\u0644\u06d5\u0628\u06d5\u06cc\u0646\u06cc
-# concerning/about
-\u0644\u06d5\u0628\u0627\u0628\u06d5\u062a
-# concerning
-\u0644\u06d5\u0628\u0627\u0631\u06d5\u06cc
-# instead of
-\u0644\u06d5\u0628\u0627\u062a\u06cc
-# beside
-\u0644\u06d5\u0628\u0646
-# instead of
-\u0644\u06d5\u0628\u0631\u06ce\u062a\u06cc
-# behind
-\u0644\u06d5\u062f\u06d5\u0645
-# with/together with
-\u0644\u06d5\u06af\u06d5\u06b5
-# by
-\u0644\u06d5\u0644\u0627\u06cc\u06d5\u0646
-# within
-\u0644\u06d5\u0646\u0627\u0648
-# between/among
-\u0644\u06d5\u0646\u06ce\u0648
-# for the sake of
-\u0644\u06d5\u067e\u06ce\u0646\u0627\u0648\u06cc
-# with respect to
-\u0644\u06d5\u0631\u06d5\u0648\u06cc
-# by means of/for
-\u0644\u06d5\u0631\u06ce
-# for the sake of
-\u0644\u06d5\u0631\u06ce\u06af\u0627
-# on/on top of/according to
-\u0644\u06d5\u0633\u06d5\u0631
-# under
-\u0644\u06d5\u0698\u06ce\u0631
-# between/among
-\u0646\u0627\u0648
-# between/among
-\u0646\u06ce\u0648\u0627\u0646
-# after
-\u067e\u0627\u0634
-# before
-\u067e\u06ce\u0634
-# like
-\u0648\u06d5\u06a9

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_cz.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_cz.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_cz.txt
deleted file mode 100644
index 53c6097..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_cz.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-a
-s
-k
-o
-i
-u
-v
-z
-dnes
-cz
-t�mto
-bude\u0161
-budem
-byli
-jse\u0161
-m\u016fj
-sv�m
-ta
-tomto
-tohle
-tuto
-tyto
-jej
-zda
-pro\u010d
-m�te
-tato
-kam
-tohoto
-kdo
-kte\u0159�
-mi
-n�m
-tom
-tomuto
-m�t
-nic
-proto
-kterou
-byla
-toho
-proto\u017ee
-asi
-ho
-na\u0161i
-napi\u0161te
-re
-co\u017e
-t�m
-tak\u017ee
-sv�ch
-jej�
-sv�mi
-jste
-aj
-tu
-tedy
-teto
-bylo
-kde
-ke
-prav�
-ji
-nad
-nejsou
-\u010di
-pod
-t�ma
-mezi
-p\u0159es
-ty
-pak
-v�m
-ani
-kdy\u017e
-v\u0161ak
-neg
-jsem
-tento
-\u010dl�nku
-\u010dl�nky
-aby
-jsme
-p\u0159ed
-pta
-jejich
-byl
-je\u0161t\u011b
-a\u017e
-bez
-tak�
-pouze
-prvn�
-va\u0161e
-kter�
-n�s
-nov�
-tipy
-pokud
-m\u016f\u017ee
-strana
-jeho
-sv�
-jin�
-zpr�vy
-nov�
-nen�
-v�s
-jen
-podle
-zde
-u\u017e
-b�t
-v�ce
-bude
-ji\u017e
-ne\u017e
-kter�
-by
-kter�
-co
-nebo
-ten
-tak
-m�
-p\u0159i
-od
-po
-jsou
-jak
-dal\u0161�
-ale
-si
-se
-ve
-to
-jako
-za
-zp\u011bt
-ze
-do
-pro
-je
-na
-atd
-atp
-jakmile
-p\u0159i\u010dem\u017e
-j�
-on
-ona
-ono
-oni
-ony
-my
-vy
-j�
-ji
-m\u011b
-mne
-jemu
-tomu
-t\u011bm
-t\u011bmu
-n\u011bmu
-n\u011bmu\u017e
-jeho\u017e
-j�\u017e
-jeliko\u017e
-je\u017e
-jako\u017e
-na\u010de\u017e

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_da.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_da.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_da.txt
deleted file mode 100644
index 42e6145..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_da.txt
+++ /dev/null
@@ -1,110 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- |  - Encoding was converted to UTF-8.
- |  - This notice was added.
- |
- | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
-
- | A Danish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
-
-og           | and
-i            | in
-jeg          | I
-det          | that (dem. pronoun)/it (pers. pronoun)
-at           | that (in front of a sentence)/to (with infinitive)
-en           | a/an
-den          | it (pers. pronoun)/that (dem. pronoun)
-til          | to/at/for/until/against/by/of/into, more
-er           | present tense of "to be"
-som          | who, as
-p�           | on/upon/in/on/at/to/after/of/with/for, on
-de           | they
-med          | with/by/in, along
-han          | he
-af           | of/by/from/off/for/in/with/on, off
-for          | at/for/to/from/by/of/ago, in front/before, because
-ikke         | not
-der          | who/which, there/those
-var          | past tense of "to be"
-mig          | me/myself
-sig          | oneself/himself/herself/itself/themselves
-men          | but
-et           | a/an/one, one (number), someone/somebody/one
-har          | present tense of "to have"
-om           | round/about/for/in/a, about/around/down, if
-vi           | we
-min          | my
-havde        | past tense of "to have"
-ham          | him
-hun          | she
-nu           | now
-over         | over/above/across/by/beyond/past/on/about, over/past
-da           | then, when/as/since
-fra          | from/off/since, off, since
-du           | you
-ud           | out
-sin          | his/her/its/one's
-dem          | them
-os           | us/ourselves
-op           | up
-man          | you/one
-hans         | his
-hvor         | where
-eller        | or
-hvad         | what
-skal         | must/shall etc.
-selv         | myself/youself/herself/ourselves etc., even
-her          | here
-alle         | all/everyone/everybody etc.
-vil          | will (verb)
-blev         | past tense of "to stay/to remain/to get/to become"
-kunne        | could
-ind          | in
-n�r          | when
-v�re         | present tense of "to be"
-dog          | however/yet/after all
-noget        | something
-ville        | would
-jo           | you know/you see (adv), yes
-deres        | their/theirs
-efter        | after/behind/according to/for/by/from, later/afterwards
-ned          | down
-skulle       | should
-denne        | this
-end          | than
-dette        | this
-mit          | my/mine
-ogs�         | also
-under        | under/beneath/below/during, below/underneath
-have         | have
-dig          | you
-anden        | other
-hende        | her
-mine         | my
-alt          | everything
-meget        | much/very, plenty of
-sit          | his, her, its, one's
-sine         | his, her, its, one's
-vor          | our
-mod          | against
-disse        | these
-hvis         | if
-din          | your/yours
-nogle        | some
-hos          | by/at
-blive        | be/become
-mange        | many
-ad           | by/through
-bliver       | present tense of "to be/to become"
-hendes       | her/hers
-v�ret        | be
-thi          | for (conj)
-jer          | you
-s�dan        | such, like this/like that

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_de.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_de.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_de.txt
deleted file mode 100644
index 86525e7..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_de.txt
+++ /dev/null
@@ -1,294 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- |  - Encoding was converted to UTF-8.
- |  - This notice was added.
- |
- | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
-
- | A German stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | The number of forms in this list is reduced significantly by passing it
- | through the German stemmer.
-
-
-aber           |  but
-
-alle           |  all
-allem
-allen
-aller
-alles
-
-als            |  than, as
-also           |  so
-am             |  an + dem
-an             |  at
-
-ander          |  other
-andere
-anderem
-anderen
-anderer
-anderes
-anderm
-andern
-anderr
-anders
-
-auch           |  also
-auf            |  on
-aus            |  out of
-bei            |  by
-bin            |  am
-bis            |  until
-bist           |  art
-da             |  there
-damit          |  with it
-dann           |  then
-
-der            |  the
-den
-des
-dem
-die
-das
-
-da�            |  that
-
-derselbe       |  the same
-derselben
-denselben
-desselben
-demselben
-dieselbe
-dieselben
-dasselbe
-
-dazu           |  to that
-
-dein           |  thy
-deine
-deinem
-deinen
-deiner
-deines
-
-denn           |  because
-
-derer          |  of those
-dessen         |  of him
-
-dich           |  thee
-dir            |  to thee
-du             |  thou
-
-dies           |  this
-diese
-diesem
-diesen
-dieser
-dieses
-
-
-doch           |  (several meanings)
-dort           |  (over) there
-
-
-durch          |  through
-
-ein            |  a
-eine
-einem
-einen
-einer
-eines
-
-einig          |  some
-einige
-einigem
-einigen
-einiger
-einiges
-
-einmal         |  once
-
-er             |  he
-ihn            |  him
-ihm            |  to him
-
-es             |  it
-etwas          |  something
-
-euer           |  your
-eure
-eurem
-euren
-eurer
-eures
-
-f�r            |  for
-gegen          |  towards
-gewesen        |  p.p. of sein
-hab            |  have
-habe           |  have
-haben          |  have
-hat            |  has
-hatte          |  had
-hatten         |  had
-hier           |  here
-hin            |  there
-hinter         |  behind
-
-ich            |  I
-mich           |  me
-mir            |  to me
-
-
-ihr            |  you, to her
-ihre
-ihrem
-ihren
-ihrer
-ihres
-euch           |  to you
-
-im             |  in + dem
-in             |  in
-indem          |  while
-ins            |  in + das
-ist            |  is
-
-jede           |  each, every
-jedem
-jeden
-jeder
-jedes
-
-jene           |  that
-jenem
-jenen
-jener
-jenes
-
-jetzt          |  now
-kann           |  can
-
-kein           |  no
-keine
-keinem
-keinen
-keiner
-keines
-
-k�nnen         |  can
-k�nnte         |  could
-machen         |  do
-man            |  one
-
-manche         |  some, many a
-manchem
-manchen
-mancher
-manches
-
-mein           |  my
-meine
-meinem
-meinen
-meiner
-meines
-
-mit            |  with
-muss           |  must
-musste         |  had to
-nach           |  to(wards)
-nicht          |  not
-nichts         |  nothing
-noch           |  still, yet
-nun            |  now
-nur            |  only
-ob             |  whether
-oder           |  or
-ohne           |  without
-sehr           |  very
-
-sein           |  his
-seine
-seinem
-seinen
-seiner
-seines
-
-selbst         |  self
-sich           |  herself
-
-sie            |  they, she
-ihnen          |  to them
-
-sind           |  are
-so             |  so
-
-solche         |  such
-solchem
-solchen
-solcher
-solches
-
-soll           |  shall
-sollte         |  should
-sondern        |  but
-sonst          |  else
-�ber           |  over
-um             |  about, around
-und            |  and
-
-uns            |  us
-unse
-unsem
-unsen
-unser
-unses
-
-unter          |  under
-viel           |  much
-vom            |  von + dem
-von            |  from
-vor            |  before
-w�hrend        |  while
-war            |  was
-waren          |  were
-warst          |  wast
-was            |  what
-weg            |  away, off
-weil           |  because
-weiter         |  further
-
-welche         |  which
-welchem
-welchen
-welcher
-welches
-
-wenn           |  when
-werde          |  will
-werden         |  will
-wie            |  how
-wieder         |  again
-will           |  want
-wir            |  we
-wird           |  will
-wirst          |  willst
-wo             |  where
-wollen         |  want
-wollte         |  wanted
-w�rde          |  would
-w�rden         |  would
-zu             |  to
-zum            |  zu + dem
-zur            |  zu + der
-zwar           |  indeed
-zwischen       |  between
-

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_el.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_el.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_el.txt
deleted file mode 100644
index 232681f..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_el.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-# Lucene Greek Stopwords list
-# Note: by default this file is used after GreekLowerCaseFilter,
-# so when modifying this file use '\u03c3' instead of '\u03c2' 
-\u03bf
-\u03b7
-\u03c4\u03bf
-\u03bf\u03b9
-\u03c4\u03b1
-\u03c4\u03bf\u03c5
-\u03c4\u03b7\u03c3
-\u03c4\u03c9\u03bd
-\u03c4\u03bf\u03bd
-\u03c4\u03b7\u03bd
-\u03ba\u03b1\u03b9 
-\u03ba\u03b9
-\u03ba
-\u03b5\u03b9\u03bc\u03b1\u03b9
-\u03b5\u03b9\u03c3\u03b1\u03b9
-\u03b5\u03b9\u03bd\u03b1\u03b9
-\u03b5\u03b9\u03bc\u03b1\u03c3\u03c4\u03b5
-\u03b5\u03b9\u03c3\u03c4\u03b5
-\u03c3\u03c4\u03bf
-\u03c3\u03c4\u03bf\u03bd
-\u03c3\u03c4\u03b7
-\u03c3\u03c4\u03b7\u03bd
-\u03bc\u03b1
-\u03b1\u03bb\u03bb\u03b1
-\u03b1\u03c0\u03bf
-\u03b3\u03b9\u03b1
-\u03c0\u03c1\u03bf\u03c3
-\u03bc\u03b5
-\u03c3\u03b5
-\u03c9\u03c3
-\u03c0\u03b1\u03c1\u03b1
-\u03b1\u03bd\u03c4\u03b9
-\u03ba\u03b1\u03c4\u03b1
-\u03bc\u03b5\u03c4\u03b1
-\u03b8\u03b1
-\u03bd\u03b1
-\u03b4\u03b5
-\u03b4\u03b5\u03bd
-\u03bc\u03b7
-\u03bc\u03b7\u03bd
-\u03b5\u03c0\u03b9
-\u03b5\u03bd\u03c9
-\u03b5\u03b1\u03bd
-\u03b1\u03bd
-\u03c4\u03bf\u03c4\u03b5
-\u03c0\u03bf\u03c5
-\u03c0\u03c9\u03c3
-\u03c0\u03bf\u03b9\u03bf\u03c3
-\u03c0\u03bf\u03b9\u03b1
-\u03c0\u03bf\u03b9\u03bf
-\u03c0\u03bf\u03b9\u03bf\u03b9
-\u03c0\u03bf\u03b9\u03b5\u03c3
-\u03c0\u03bf\u03b9\u03c9\u03bd
-\u03c0\u03bf\u03b9\u03bf\u03c5\u03c3
-\u03b1\u03c5\u03c4\u03bf\u03c3
-\u03b1\u03c5\u03c4\u03b7
-\u03b1\u03c5\u03c4\u03bf
-\u03b1\u03c5\u03c4\u03bf\u03b9
-\u03b1\u03c5\u03c4\u03c9\u03bd
-\u03b1\u03c5\u03c4\u03bf\u03c5\u03c3
-\u03b1\u03c5\u03c4\u03b5\u03c3
-\u03b1\u03c5\u03c4\u03b1
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03bf\u03c3
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03b7
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03bf
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03bf\u03b9
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03b5\u03c3
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03b1
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03c9\u03bd
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03bf\u03c5\u03c3
-\u03bf\u03c0\u03c9\u03c3
-\u03bf\u03bc\u03c9\u03c3
-\u03b9\u03c3\u03c9\u03c3
-\u03bf\u03c3\u03bf
-\u03bf\u03c4\u03b9

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_en.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_en.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_en.txt
deleted file mode 100644
index 2c164c0..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_en.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# a couple of test stopwords to test that the words are really being
-# configured from this file:
-stopworda
-stopwordb
-
-# Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-such
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with