You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by cp...@apache.org on 2017/04/04 15:32:58 UTC
[16/36] lucene-solr:jira/solr-6203: SOLR-7383: Replace DIH 'rss'
example with 'atom' rss example was broken for multiple reasons. atom example
showcases the same - and more - features and uses the smallest config file
needed to make it work.
SOLR-7383: Replace DIH 'rss' example with 'atom'
rss example was broken for multiple reasons.
atom example showcases the same - and more - features
and uses the smallest config file needed to make it work.
Project: http://git-wip-us.apache.org/repos/asf/lucene-solr/repo
Commit: http://git-wip-us.apache.org/repos/asf/lucene-solr/commit/580f6e98
Tree: http://git-wip-us.apache.org/repos/asf/lucene-solr/tree/580f6e98
Diff: http://git-wip-us.apache.org/repos/asf/lucene-solr/diff/580f6e98
Branch: refs/heads/jira/solr-6203
Commit: 580f6e98fb033dbbb8e0921fc3175021714ce956
Parents: 35aac1d
Author: Alexandre Rafalovitch <ar...@apache.org>
Authored: Sat Apr 1 13:42:23 2017 -0400
Committer: Alexandre Rafalovitch <ar...@apache.org>
Committed: Sat Apr 1 13:42:23 2017 -0400
----------------------------------------------------------------------
solr/CHANGES.txt | 2 +
solr/README.txt | 2 +-
solr/example/README.txt | 2 +-
solr/example/example-DIH/README.txt | 11 +-
.../solr/atom/conf/atom-data-config.xml | 35 +
.../solr/atom/conf/lang/stopwords_en.txt | 54 +
.../example-DIH/solr/atom/conf/managed-schema | 106 +
.../example-DIH/solr/atom/conf/protwords.txt | 17 +
.../example-DIH/solr/atom/conf/solrconfig.xml | 61 +
.../example-DIH/solr/atom/conf/synonyms.txt | 29 +
.../example-DIH/solr/atom/conf/url_types.txt | 1 +
.../example-DIH/solr/atom/core.properties | 0
.../example-DIH/solr/rss/conf/admin-extra.html | 24 -
.../solr/rss/conf/admin-extra.menu-bottom.html | 25 -
.../solr/rss/conf/admin-extra.menu-top.html | 25 -
.../clustering/carrot2/kmeans-attributes.xml | 19 -
.../clustering/carrot2/lingo-attributes.xml | 24 -
.../conf/clustering/carrot2/stc-attributes.xml | 19 -
.../example-DIH/solr/rss/conf/currency.xml | 67 -
.../example-DIH/solr/rss/conf/elevate.xml | 42 -
.../solr/rss/conf/lang/contractions_ca.txt | 8 -
.../solr/rss/conf/lang/contractions_fr.txt | 15 -
.../solr/rss/conf/lang/contractions_ga.txt | 5 -
.../solr/rss/conf/lang/contractions_it.txt | 23 -
.../solr/rss/conf/lang/hyphenations_ga.txt | 5 -
.../solr/rss/conf/lang/stemdict_nl.txt | 6 -
.../solr/rss/conf/lang/stoptags_ja.txt | 420 --
.../solr/rss/conf/lang/stopwords_ar.txt | 125 -
.../solr/rss/conf/lang/stopwords_bg.txt | 193 -
.../solr/rss/conf/lang/stopwords_ca.txt | 220 -
.../solr/rss/conf/lang/stopwords_ckb.txt | 136 -
.../solr/rss/conf/lang/stopwords_cz.txt | 172 -
.../solr/rss/conf/lang/stopwords_da.txt | 110 -
.../solr/rss/conf/lang/stopwords_de.txt | 294 --
.../solr/rss/conf/lang/stopwords_el.txt | 78 -
.../solr/rss/conf/lang/stopwords_en.txt | 54 -
.../solr/rss/conf/lang/stopwords_es.txt | 356 --
.../solr/rss/conf/lang/stopwords_eu.txt | 99 -
.../solr/rss/conf/lang/stopwords_fa.txt | 313 --
.../solr/rss/conf/lang/stopwords_fi.txt | 97 -
.../solr/rss/conf/lang/stopwords_fr.txt | 186 -
.../solr/rss/conf/lang/stopwords_ga.txt | 110 -
.../solr/rss/conf/lang/stopwords_gl.txt | 161 -
.../solr/rss/conf/lang/stopwords_hi.txt | 235 --
.../solr/rss/conf/lang/stopwords_hu.txt | 211 -
.../solr/rss/conf/lang/stopwords_hy.txt | 46 -
.../solr/rss/conf/lang/stopwords_id.txt | 359 --
.../solr/rss/conf/lang/stopwords_it.txt | 303 --
.../solr/rss/conf/lang/stopwords_ja.txt | 127 -
.../solr/rss/conf/lang/stopwords_lv.txt | 172 -
.../solr/rss/conf/lang/stopwords_nl.txt | 119 -
.../solr/rss/conf/lang/stopwords_no.txt | 194 -
.../solr/rss/conf/lang/stopwords_pt.txt | 253 --
.../solr/rss/conf/lang/stopwords_ro.txt | 233 --
.../solr/rss/conf/lang/stopwords_ru.txt | 243 --
.../solr/rss/conf/lang/stopwords_sv.txt | 133 -
.../solr/rss/conf/lang/stopwords_th.txt | 119 -
.../solr/rss/conf/lang/stopwords_tr.txt | 212 -
.../solr/rss/conf/lang/userdict_ja.txt | 29 -
.../example-DIH/solr/rss/conf/managed-schema | 1096 -----
.../solr/rss/conf/mapping-FoldToASCII.txt | 3813 ------------------
.../solr/rss/conf/mapping-ISOLatin1Accent.txt | 246 --
.../example-DIH/solr/rss/conf/protwords.txt | 21 -
.../solr/rss/conf/rss-data-config.xml | 26 -
.../example-DIH/solr/rss/conf/solrconfig.xml | 1396 -------
.../example-DIH/solr/rss/conf/spellings.txt | 2 -
.../example-DIH/solr/rss/conf/stopwords.txt | 14 -
.../example-DIH/solr/rss/conf/synonyms.txt | 29 -
.../example-DIH/solr/rss/conf/update-script.js | 53 -
.../example-DIH/solr/rss/conf/xslt/example.xsl | 132 -
.../solr/rss/conf/xslt/example_atom.xsl | 67 -
.../solr/rss/conf/xslt/example_rss.xsl | 66 -
.../example-DIH/solr/rss/conf/xslt/luke.xsl | 337 --
.../solr/rss/conf/xslt/updateXml.xsl | 70 -
.../example-DIH/solr/rss/core.properties | 0
75 files changed, 312 insertions(+), 13795 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/CHANGES.txt
----------------------------------------------------------------------
diff --git a/solr/CHANGES.txt b/solr/CHANGES.txt
index 1efefd0..3187dc3 100644
--- a/solr/CHANGES.txt
+++ b/solr/CHANGES.txt
@@ -180,6 +180,8 @@ Other Changes
* SOLR-10399: Generalize some internal facet logic to simplify points/non-points field handling (Adrien Grand, hossman)
+* SOLR-7383: New DataImportHandler 'atom' example, replacing broken 'rss' example (Alexandre Rafalovitch)
+
================== 6.5.0 ==================
Consult the LUCENE_CHANGES.txt file for additional, low level, changes in this release.
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/README.txt
----------------------------------------------------------------------
diff --git a/solr/README.txt b/solr/README.txt
index 4ef5eac..6af0cc6 100644
--- a/solr/README.txt
+++ b/solr/README.txt
@@ -67,7 +67,7 @@ Solr includes a few examples to help you get started. To run a specific example,
bin/solr -e <EXAMPLE> where <EXAMPLE> is one of:
cloud : SolrCloud example
- dih : Data Import Handler (rdbms, mail, rss, tika)
+ dih : Data Import Handler (rdbms, mail, atom, tika)
schemaless : Schema-less example (schema is inferred from data during indexing)
techproducts : Kitchen sink example providing comprehensive examples of Solr features
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/README.txt
----------------------------------------------------------------------
diff --git a/solr/example/README.txt b/solr/example/README.txt
index d8402eb..4c8cca1 100644
--- a/solr/example/README.txt
+++ b/solr/example/README.txt
@@ -22,7 +22,7 @@ separate directory. To run a specific example, do:
bin/solr -e <EXAMPLE> where <EXAMPLE> is one of:
cloud : SolrCloud example
- dih : Data Import Handler (rdbms, mail, rss, tika)
+ dih : Data Import Handler (rdbms, mail, atom, tika)
schemaless : Schema-less example (schema is inferred from data during indexing)
techproducts : Kitchen sink example providing comprehensive examples of Solr features
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/README.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/README.txt b/solr/example/example-DIH/README.txt
index 0926bb6..ea8d28f 100644
--- a/solr/example/example-DIH/README.txt
+++ b/solr/example/example-DIH/README.txt
@@ -16,7 +16,7 @@
Solr DataImportHandler example configuration
--------------------------------------------
-To run this example, use the "-e" option of the bin/solr script:
+To run this multi-core example, use the "-e" option of the bin/solr script:
> bin/solr -e dih
@@ -28,9 +28,9 @@ When Solr is started connect to:
http://localhost:8983/solr/db/dataimport?command=full-import
-* To import data from an RSS feed, connect to:
+* To import data from an ATOM feed, connect to:
- http://localhost:8983/solr/rss/dataimport?command=full-import
+ http://localhost:8983/solr/atom/dataimport?command=full-import
* To import data from your IMAP server:
@@ -45,6 +45,5 @@ When Solr is started connect to:
http://localhost:8983/solr/tika/dataimport?command=full-import
-See also README.txt in the solr subdirectory, and check
-http://wiki.apache.org/solr/DataImportHandler for detailed
-usage guide and tutorial.
+Check also the Solr Reference Guide for detailed usage guide:
+https://cwiki.apache.org/confluence/display/solr/Uploading+Structured+Data+Store+Data+with+the+Data+Import+Handler
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml b/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml
new file mode 100644
index 0000000..53b5060
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/atom-data-config.xml
@@ -0,0 +1,35 @@
+<dataConfig>
+ <dataSource type="URLDataSource"/>
+ <document>
+
+ <entity name="stackoverflow"
+ url="http://stackoverflow.com/feeds/tag/solr"
+ processor="XPathEntityProcessor"
+ forEach="/feed|/feed/entry"
+ transformer="HTMLStripTransformer,RegexTransformer">
+
+ <!-- Pick this value up from the feed level and apply to all documents -->
+ <field column="lastchecked_dt" xpath="/feed/updated" commonField="true"/>
+
+ <!-- Keep only the final numeric part of the URL -->
+ <field column="id" xpath="/feed/entry/id" regex=".*/" replaceWith=""/>
+
+ <field column="title" xpath="/feed/entry/title"/>
+ <field column="author" xpath="/feed/entry/author/name"/>
+ <field column="category" xpath="/feed/entry/category/@term"/>
+ <field column="link" xpath="/feed/entry/link[@rel='alternate']/@href"/>
+
+ <!-- Use transformers to convert HTML into plain text.
+ There is also an UpdateRequestProcess to trim remaining spaces.
+ -->
+ <field column="summary" xpath="/feed/entry/summary" stripHTML="true" regex="( |\n)+" replaceWith=" "/>
+
+ <!-- Ignore namespaces when matching XPath -->
+ <field column="rank" xpath="/feed/entry/rank"/>
+
+ <field column="published_dt" xpath="/feed/entry/published"/>
+ <field column="updated_dt" xpath="/feed/entry/updated"/>
+ </entity>
+
+ </document>
+</dataConfig>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt b/solr/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt
new file mode 100644
index 0000000..2c164c0
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/lang/stopwords_en.txt
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/managed-schema
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/managed-schema b/solr/example/example-DIH/solr/atom/conf/managed-schema
new file mode 100644
index 0000000..5875152
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/managed-schema
@@ -0,0 +1,106 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<schema name="example-DIH-atom" version="1.6">
+ <uniqueKey>id</uniqueKey>
+
+ <field name="id" type="string" indexed="true" stored="true" required="true"/>
+ <field name="title" type="text_en_splitting" indexed="true" stored="true"/>
+ <field name="author" type="string" indexed="true" stored="true"/>
+ <field name="category" type="string" indexed="true" stored="true" multiValued="true"/>
+ <field name="link" type="string" indexed="true" stored="true"/>
+ <field name="summary" type="text_en_splitting" indexed="true" stored="true"/>
+ <field name="rank" type="pint" indexed="true" stored="true"/>
+
+ <dynamicField name="*_dt" type="pdate" indexed="true" stored="true"/>
+
+ <!-- Catch-all field, aggregating all "useful to search as text" fields via the copyField instructions -->
+ <field name="text" type="text_en_splitting" indexed="true" stored="false" multiValued="true"/>
+
+ <field name="urls" type="url_only" indexed="true" stored="false"/>
+
+
+ <copyField source="id" dest="text"/>
+ <copyField source="title" dest="text"/>
+ <copyField source="author" dest="text"/>
+ <copyField source="category" dest="text"/>
+ <copyField source="summary" dest="text"/>
+
+ <!-- extract URLs from summary for faceting -->
+ <copyField source="summary" dest="urls"/>
+
+ <fieldType name="string" class="solr.StrField" sortMissingLast="true" docValues="true"/>
+ <fieldType name="pint" class="solr.IntPointField" docValues="true"/>
+ <fieldType name="pdate" class="solr.DatePointField" docValues="true"/>
+
+
+ <!-- A text field with defaults appropriate for English, plus
+ aggressive word-splitting and autophrase features enabled.
+ This field is just like text_en, except it adds
+ WordDelimiterFilter to enable splitting and matching of
+ words on case-change, alpha numeric boundaries, and
+ non-alphanumeric chars. This means certain compound word
+ cases will work, for example query "wi fi" will match
+ document "WiFi" or "wi-fi".
+ -->
+ <fieldType name="text_en_splitting" class="solr.TextField"
+ positionIncrementGap="100" autoGeneratePhraseQueries="true">
+ <analyzer type="index">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <!-- in this example, we will only use synonyms at query time
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+ -->
+ <!-- Case insensitive stop word removal. -->
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
+ <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1"
+ catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
+ <filter class="solr.FlattenGraphFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+ <filter class="solr.SynonymGraphFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+ <filter class="solr.StopFilterFactory"
+ ignoreCase="true"
+ words="lang/stopwords_en.txt"
+ />
+ <filter class="solr.WordDelimiterGraphFilterFactory" generateWordParts="1" generateNumberParts="1"
+ catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
+ </analyzer>
+ </fieldType>
+
+ <!-- Field type that extracts URLs from the text.
+ As the stored representation is not changed, it is only useful for faceting.
+ It is not terribly useful for searching URLs either, as there are too many special symbols.
+ -->
+ <fieldType name="url_only" class="solr.TextField" positionIncrementGap="100">
+ <analyzer type="index">
+ <tokenizer class="solr.UAX29URLEmailTokenizerFactory" maxTokenLength="255"/>
+ <filter class="solr.TypeTokenFilterFactory" types="url_types.txt" useWhitelist="true"/>
+ </analyzer>
+ <analyzer type="query">
+ <tokenizer class="solr.KeywordTokenizerFactory"/>
+ </analyzer>
+ </fieldType>
+
+</schema>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/protwords.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/protwords.txt b/solr/example/example-DIH/solr/atom/conf/protwords.txt
new file mode 100644
index 0000000..1303e42
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/protwords.txt
@@ -0,0 +1,17 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+# Use a protected word file to protect against the stemmer reducing two
+# unrelated words to the same base word.
+
+lucene
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/solrconfig.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/solrconfig.xml b/solr/example/example-DIH/solr/atom/conf/solrconfig.xml
new file mode 100644
index 0000000..22005dd
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/solrconfig.xml
@@ -0,0 +1,61 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!--
+ This is a DEMO configuration, highlighting elements
+ specifically needed to get this example running
+ such as libraries and request handler specifics.
+
+ It uses defaults or does not define most of production-level settings
+ such as various caches or auto-commit policies.
+
+ See Solr Reference Guide and other examples for
+ more details on a well configured solrconfig.xml
+ https://cwiki.apache.org/confluence/display/solr/The+Well-Configured+Solr+Instance
+-->
+<config>
+
+ <!-- Controls what version of Lucene various components of Solr
+ adhere to. Generally, you want to use the latest version to
+ get all bug fixes and improvements. It is highly recommended
+ that you fully re-index after changing this setting as it can
+ affect both how text is indexed and queried.
+ -->
+ <luceneMatchVersion>7.0.0</luceneMatchVersion>
+
+ <lib dir="${solr.install.dir:../../../..}/dist/" regex="solr-dataimporthandler-.*\.jar"/>
+
+ <requestHandler name="/select" class="solr.SearchHandler">
+ <lst name="defaults">
+ <str name="echoParams">explicit</str>
+ <str name="df">text</str>
+ </lst>
+ </requestHandler>
+
+ <requestHandler name="/dataimport" class="solr.DataImportHandler">
+ <lst name="defaults">
+ <str name="config">atom-data-config.xml</str>
+ <str name="processor">trim_text</str>
+ </lst>
+ </requestHandler>
+
+ <updateProcessor class="solr.processor.TrimFieldUpdateProcessorFactory" name="trim_text">
+ <str name="typeName">text_en_splitting</str>
+ </updateProcessor>
+
+</config>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/synonyms.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/synonyms.txt b/solr/example/example-DIH/solr/atom/conf/synonyms.txt
new file mode 100644
index 0000000..eab4ee8
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/synonyms.txt
@@ -0,0 +1,29 @@
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#-----------------------------------------------------------------------
+#some test synonym mappings unlikely to appear in real input text
+aaafoo => aaabar
+bbbfoo => bbbfoo bbbbar
+cccfoo => cccbar cccbaz
+fooaaa,baraaa,bazaaa
+
+# Some synonym groups specific to this example
+GB,gib,gigabyte,gigabytes
+MB,mib,megabyte,megabytes
+Television, Televisions, TV, TVs
+#notice we use "gib" instead of "GiB" so any WordDelimiterGraphFilter coming
+#after us won't split it into two words.
+
+# Synonym mappings can be used for spelling correction too
+pixima => pixma
+
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/conf/url_types.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/conf/url_types.txt b/solr/example/example-DIH/solr/atom/conf/url_types.txt
new file mode 100644
index 0000000..808f313
--- /dev/null
+++ b/solr/example/example-DIH/solr/atom/conf/url_types.txt
@@ -0,0 +1 @@
+<URL>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/atom/core.properties
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/atom/core.properties b/solr/example/example-DIH/solr/atom/core.properties
new file mode 100644
index 0000000..e69de29
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/admin-extra.html
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/admin-extra.html b/solr/example/example-DIH/solr/rss/conf/admin-extra.html
deleted file mode 100644
index fecab20..0000000
--- a/solr/example/example-DIH/solr/rss/conf/admin-extra.html
+++ /dev/null
@@ -1,24 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- The content of this page will be statically included into the top-
-right box of the cores overview page. Uncomment this as an example to
-see there the content will show up.
-
-<img src="img/ico/construction.png"> This line will appear at the top-
-right box on collection1's Overview
--->
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-bottom.html
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-bottom.html b/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-bottom.html
deleted file mode 100644
index 3359a46..0000000
--- a/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-bottom.html
+++ /dev/null
@@ -1,25 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- admin-extra.menu-bottom.html -->
-<!--
-<li>
- <a href="#" style="background-image: url(img/ico/construction.png);">
- LAST ITEM
- </a>
-</li>
--->
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-top.html
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-top.html b/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-top.html
deleted file mode 100644
index 0886cee..0000000
--- a/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-top.html
+++ /dev/null
@@ -1,25 +0,0 @@
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- admin-extra.menu-top.html -->
-<!--
-<li>
- <a href="#" style="background-image: url(img/ico/construction.png);">
- FIRST ITEM
- </a>
-</li>
--->
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/kmeans-attributes.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/kmeans-attributes.xml b/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/kmeans-attributes.xml
deleted file mode 100644
index d802465..0000000
--- a/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/kmeans-attributes.xml
+++ /dev/null
@@ -1,19 +0,0 @@
-<!--
- Default configuration for the bisecting k-means clustering algorithm.
-
- This file can be loaded (and saved) by Carrot2 Workbench.
- http://project.carrot2.org/download.html
--->
-<attribute-sets default="attributes">
- <attribute-set id="attributes">
- <value-set>
- <label>attributes</label>
- <attribute key="MultilingualClustering.defaultLanguage">
- <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
- </attribute>
- <attribute key="MultilingualClustering.languageAggregationStrategy">
- <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
- </attribute>
- </value-set>
- </attribute-set>
-</attribute-sets>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/lingo-attributes.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/lingo-attributes.xml b/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/lingo-attributes.xml
deleted file mode 100644
index 4bf1360..0000000
--- a/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/lingo-attributes.xml
+++ /dev/null
@@ -1,24 +0,0 @@
-<!--
- Default configuration for the Lingo clustering algorithm.
-
- This file can be loaded (and saved) by Carrot2 Workbench.
- http://project.carrot2.org/download.html
--->
-<attribute-sets default="attributes">
- <attribute-set id="attributes">
- <value-set>
- <label>attributes</label>
- <!--
- The language to assume for clustered documents.
- For a list of allowed values, see:
- http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
- -->
- <attribute key="MultilingualClustering.defaultLanguage">
- <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
- </attribute>
- <attribute key="LingoClusteringAlgorithm.desiredClusterCountBase">
- <value type="java.lang.Integer" value="20"/>
- </attribute>
- </value-set>
- </attribute-set>
-</attribute-sets>
\ No newline at end of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/stc-attributes.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/stc-attributes.xml b/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/stc-attributes.xml
deleted file mode 100644
index c1bf110..0000000
--- a/solr/example/example-DIH/solr/rss/conf/clustering/carrot2/stc-attributes.xml
+++ /dev/null
@@ -1,19 +0,0 @@
-<!--
- Default configuration for the STC clustering algorithm.
-
- This file can be loaded (and saved) by Carrot2 Workbench.
- http://project.carrot2.org/download.html
--->
-<attribute-sets default="attributes">
- <attribute-set id="attributes">
- <value-set>
- <label>attributes</label>
- <attribute key="MultilingualClustering.defaultLanguage">
- <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
- </attribute>
- <attribute key="MultilingualClustering.languageAggregationStrategy">
- <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
- </attribute>
- </value-set>
- </attribute-set>
-</attribute-sets>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/currency.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/currency.xml b/solr/example/example-DIH/solr/rss/conf/currency.xml
deleted file mode 100644
index 3a9c58a..0000000
--- a/solr/example/example-DIH/solr/rss/conf/currency.xml
+++ /dev/null
@@ -1,67 +0,0 @@
-<?xml version="1.0" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
-
-<currencyConfig version="1.0">
- <rates>
- <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
- <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
- <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
- <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
- <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
- <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
- <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
- <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
- <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
- <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
- <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
- <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
- <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
- <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
- <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
- <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
- <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
- <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
- <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
- <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
- <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
- <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
- <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
- <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
- <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
- <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
- <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
- <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
- <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
- <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
- <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
- <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
- <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
- <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
- <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
- <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
- <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
- <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
-
- <!-- Cross-rates for some common currencies -->
- <rate from="EUR" to="GBP" rate="0.869914" />
- <rate from="EUR" to="NOK" rate="7.800095" />
- <rate from="GBP" to="NOK" rate="8.966508" />
- </rates>
-</currencyConfig>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/elevate.xml
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/elevate.xml b/solr/example/example-DIH/solr/rss/conf/elevate.xml
deleted file mode 100644
index 2c09ebe..0000000
--- a/solr/example/example-DIH/solr/rss/conf/elevate.xml
+++ /dev/null
@@ -1,42 +0,0 @@
-<?xml version="1.0" encoding="UTF-8" ?>
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- (the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
--->
-
-<!-- If this file is found in the config directory, it will only be
- loaded once at startup. If it is found in Solr's data
- directory, it will be re-loaded every commit.
-
- See http://wiki.apache.org/solr/QueryElevationComponent for more info
-
--->
-<elevate>
- <!-- Query elevation examples
- <query text="foo bar">
- <doc id="1" />
- <doc id="2" />
- <doc id="3" />
- </query>
-
-for use with techproducts example
-
- <query text="ipod">
- <doc id="MA147LL/A" /> put the actual ipod at the top
- <doc id="IW-02" exclude="true" /> exclude this cable
- </query>
--->
-
-</elevate>
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/contractions_ca.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/contractions_ca.txt b/solr/example/example-DIH/solr/rss/conf/lang/contractions_ca.txt
deleted file mode 100644
index 307a85f..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/contractions_ca.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-# Set of Catalan contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-l
-m
-n
-s
-t
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/contractions_fr.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/contractions_fr.txt b/solr/example/example-DIH/solr/rss/conf/lang/contractions_fr.txt
deleted file mode 100644
index f1bba51..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/contractions_fr.txt
+++ /dev/null
@@ -1,15 +0,0 @@
-# Set of French contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-l
-m
-t
-qu
-n
-s
-j
-d
-c
-jusqu
-quoiqu
-lorsqu
-puisqu
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/contractions_ga.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/contractions_ga.txt b/solr/example/example-DIH/solr/rss/conf/lang/contractions_ga.txt
deleted file mode 100644
index 9ebe7fa..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/contractions_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-d
-m
-b
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/contractions_it.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/contractions_it.txt b/solr/example/example-DIH/solr/rss/conf/lang/contractions_it.txt
deleted file mode 100644
index cac0409..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/contractions_it.txt
+++ /dev/null
@@ -1,23 +0,0 @@
-# Set of Italian contractions for ElisionFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-c
-l
-all
-dall
-dell
-nell
-sull
-coll
-pell
-gl
-agl
-dagl
-degl
-negl
-sugl
-un
-m
-t
-s
-v
-d
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/hyphenations_ga.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/hyphenations_ga.txt b/solr/example/example-DIH/solr/rss/conf/lang/hyphenations_ga.txt
deleted file mode 100644
index 4d2642c..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/hyphenations_ga.txt
+++ /dev/null
@@ -1,5 +0,0 @@
-# Set of Irish hyphenations for StopFilter
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-h
-n
-t
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stemdict_nl.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stemdict_nl.txt b/solr/example/example-DIH/solr/rss/conf/lang/stemdict_nl.txt
deleted file mode 100644
index 4410729..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stemdict_nl.txt
+++ /dev/null
@@ -1,6 +0,0 @@
-# Set of overrides for the dutch stemmer
-# TODO: load this as a resource from the analyzer and sync it in build.xml
-fiets fiets
-bromfiets bromfiets
-ei eier
-kind kinder
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stoptags_ja.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stoptags_ja.txt b/solr/example/example-DIH/solr/rss/conf/lang/stoptags_ja.txt
deleted file mode 100644
index 71b7508..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stoptags_ja.txt
+++ /dev/null
@@ -1,420 +0,0 @@
-#
-# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
-#
-# Any token with a part-of-speech tag that exactly matches those defined in this
-# file are removed from the token stream.
-#
-# Set your own stoptags by uncommenting the lines below. Note that comments are
-# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
-# etc. that can be useful for building you own stoptag set.
-#
-# The entire possible tagset is provided below for convenience.
-#
-#####
-# noun: unclassified nouns
-#\u540d\u8a5e
-#
-# noun-common: Common nouns or nouns where the sub-classification is undefined
-#\u540d\u8a5e-\u4e00\u822c
-#
-# noun-proper: Proper nouns where the sub-classification is undefined
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e
-#
-# noun-proper-misc: miscellaneous proper nouns
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4e00\u822c
-#
-# noun-proper-person: Personal names where the sub-classification is undefined
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d
-#
-# noun-proper-person-misc: names that cannot be divided into surname and
-# given name; foreign names; names where the surname or given name is unknown.
-# e.g. \u304a\u5e02\u306e\u65b9
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d-\u4e00\u822c
-#
-# noun-proper-person-surname: Mainly Japanese surnames.
-# e.g. \u5c71\u7530
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d-\u59d3
-#
-# noun-proper-person-given_name: Mainly Japanese given names.
-# e.g. \u592a\u90ce
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d-\u540d
-#
-# noun-proper-organization: Names representing organizations.
-# e.g. \u901a\u7523\u7701, NHK
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u7d44\u7e54
-#
-# noun-proper-place: Place names where the sub-classification is undefined
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u5730\u57df
-#
-# noun-proper-place-misc: Place names excluding countries.
-# e.g. \u30a2\u30b8\u30a2, \u30d0\u30eb\u30bb\u30ed\u30ca, \u4eac\u90fd
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u5730\u57df-\u4e00\u822c
-#
-# noun-proper-place-country: Country names.
-# e.g. \u65e5\u672c, \u30aa\u30fc\u30b9\u30c8\u30e9\u30ea\u30a2
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u5730\u57df-\u56fd
-#
-# noun-pronoun: Pronouns where the sub-classification is undefined
-#\u540d\u8a5e-\u4ee3\u540d\u8a5e
-#
-# noun-pronoun-misc: miscellaneous pronouns:
-# e.g. \u305d\u308c, \u3053\u3053, \u3042\u3044\u3064, \u3042\u306a\u305f, \u3042\u3061\u3053\u3061, \u3044\u304f\u3064, \u3069\u3053\u304b, \u306a\u306b, \u307f\u306a\u3055\u3093, \u307f\u3093\u306a, \u308f\u305f\u304f\u3057, \u308f\u308c\u308f\u308c
-#\u540d\u8a5e-\u4ee3\u540d\u8a5e-\u4e00\u822c
-#
-# noun-pronoun-contraction: Spoken language contraction made by combining a
-# pronoun and the particle 'wa'.
-# e.g. \u3042\u308a\u3083, \u3053\u308a\u3083, \u3053\u308a\u3083\u3042, \u305d\u308a\u3083, \u305d\u308a\u3083\u3042
-#\u540d\u8a5e-\u4ee3\u540d\u8a5e-\u7e2e\u7d04
-#
-# noun-adverbial: Temporal nouns such as names of days or months that behave
-# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
-# e.g. \u91d1\u66dc, \u4e00\u6708, \u5348\u5f8c, \u5c11\u91cf
-#\u540d\u8a5e-\u526f\u8a5e\u53ef\u80fd
-#
-# noun-verbal: Nouns that take arguments with case and can appear followed by
-# 'suru' and related verbs (\u3059\u308b, \u3067\u304d\u308b, \u306a\u3055\u308b, \u304f\u3060\u3055\u308b)
-# e.g. \u30a4\u30f3\u30d7\u30c3\u30c8, \u611b\u7740, \u60aa\u5316, \u60aa\u6226\u82e6\u95d8, \u4e00\u5b89\u5fc3, \u4e0b\u53d6\u308a
-#\u540d\u8a5e-\u30b5\u5909\u63a5\u7d9a
-#
-# noun-adjective-base: The base form of adjectives, words that appear before \u306a ("na")
-# e.g. \u5065\u5eb7, \u5b89\u6613, \u99c4\u76ee, \u3060\u3081
-#\u540d\u8a5e-\u5f62\u5bb9\u52d5\u8a5e\u8a9e\u5e79
-#
-# noun-numeric: Arabic numbers, Chinese numerals, and counters like \u4f55 (\u56de), \u6570.
-# e.g. 0, 1, 2, \u4f55, \u6570, \u5e7e
-#\u540d\u8a5e-\u6570
-#
-# noun-affix: noun affixes where the sub-classification is undefined
-#\u540d\u8a5e-\u975e\u81ea\u7acb
-#
-# noun-affix-misc: Of adnominalizers, the case-marker \u306e ("no"), and words that
-# attach to the base form of inflectional words, words that cannot be classified
-# into any of the other categories below. This category includes indefinite nouns.
-# e.g. \u3042\u304b\u3064\u304d, \u6681, \u304b\u3044, \u7532\u6590, \u6c17, \u304d\u3089\u3044, \u5acc\u3044, \u304f\u305b, \u7656, \u3053\u3068, \u4e8b, \u3054\u3068, \u6bce, \u3057\u3060\u3044, \u6b21\u7b2c,
-# \u9806, \u305b\u3044, \u6240\u70ba, \u3064\u3044\u3067, \u5e8f\u3067, \u3064\u3082\u308a, \u7a4d\u3082\u308a, \u70b9, \u3069\u3053\u308d, \u306e, \u306f\u305a, \u7b48, \u306f\u305a\u307f, \u5f3e\u307f,
-# \u62cd\u5b50, \u3075\u3046, \u3075\u308a, \u632f\u308a, \u307b\u3046, \u65b9, \u65e8, \u3082\u306e, \u7269, \u8005, \u3086\u3048, \u6545, \u3086\u3048\u3093, \u6240\u4ee5, \u308f\u3051, \u8a33,
-# \u308f\u308a, \u5272\u308a, \u5272, \u3093-\u53e3\u8a9e/, \u3082\u3093-\u53e3\u8a9e/
-#\u540d\u8a5e-\u975e\u81ea\u7acb-\u4e00\u822c
-#
-# noun-affix-adverbial: noun affixes that that can behave as adverbs.
-# e.g. \u3042\u3044\u3060, \u9593, \u3042\u3052\u304f, \u6319\u3052\u53e5, \u3042\u3068, \u5f8c, \u4f59\u308a, \u4ee5\u5916, \u4ee5\u964d, \u4ee5\u5f8c, \u4ee5\u4e0a, \u4ee5\u524d, \u4e00\u65b9, \u3046\u3048,
-# \u4e0a, \u3046\u3061, \u5185, \u304a\u308a, \u6298\u308a, \u304b\u304e\u308a, \u9650\u308a, \u304d\u308a, \u3063\u304d\u308a, \u7d50\u679c, \u3053\u308d, \u9803, \u3055\u3044, \u969b, \u6700\u4e2d, \u3055\u306a\u304b,
-# \u6700\u4e2d, \u3058\u305f\u3044, \u81ea\u4f53, \u305f\u3073, \u5ea6, \u305f\u3081, \u70ba, \u3064\u3069, \u90fd\u5ea6, \u3068\u304a\u308a, \u901a\u308a, \u3068\u304d, \u6642, \u3068\u3053\u308d, \u6240,
-# \u3068\u305f\u3093, \u9014\u7aef, \u306a\u304b, \u4e2d, \u306e\u3061, \u5f8c, \u3070\u3042\u3044, \u5834\u5408, \u65e5, \u3076\u3093, \u5206, \u307b\u304b, \u4ed6, \u307e\u3048, \u524d, \u307e\u307e,
-# \u5118, \u4fad, \u307f\u304e\u308a, \u77e2\u5148
-#\u540d\u8a5e-\u975e\u81ea\u7acb-\u526f\u8a5e\u53ef\u80fd
-#
-# noun-affix-aux: noun affixes treated as \u52a9\u52d5\u8a5e ("auxiliary verb") in school grammars
-# with the stem \u3088\u3046(\u3060) ("you(da)").
-# e.g. \u3088\u3046, \u3084\u3046, \u69d8 (\u3088\u3046)
-#\u540d\u8a5e-\u975e\u81ea\u7acb-\u52a9\u52d5\u8a5e\u8a9e\u5e79
-#
-# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
-# connection form \u306a (aux "da").
-# e.g. \u307f\u305f\u3044, \u3075\u3046
-#\u540d\u8a5e-\u975e\u81ea\u7acb-\u5f62\u5bb9\u52d5\u8a5e\u8a9e\u5e79
-#
-# noun-special: special nouns where the sub-classification is undefined.
-#\u540d\u8a5e-\u7279\u6b8a
-#
-# noun-special-aux: The \u305d\u3046\u3060 ("souda") stem form that is used for reporting news, is
-# treated as \u52a9\u52d5\u8a5e ("auxiliary verb") in school grammars, and attach to the base
-# form of inflectional words.
-# e.g. \u305d\u3046
-#\u540d\u8a5e-\u7279\u6b8a-\u52a9\u52d5\u8a5e\u8a9e\u5e79
-#
-# noun-suffix: noun suffixes where the sub-classification is undefined.
-#\u540d\u8a5e-\u63a5\u5c3e
-#
-# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
-# to \u30ac\u30eb or \u30bf\u30a4 and can combine into compound nouns, words that cannot be classified into
-# any of the other categories below. In general, this category is more inclusive than
-# \u63a5\u5c3e\u8a9e ("suffix") and is usually the last element in a compound noun.
-# e.g. \u304a\u304d, \u304b\u305f, \u65b9, \u7532\u6590 (\u304c\u3044), \u304c\u304b\u308a, \u304e\u307f, \u6c17\u5473, \u3050\u308b\u307f, (\uff5e\u3057\u305f) \u3055, \u6b21\u7b2c, \u6e08 (\u305a) \u307f,
-# \u3088\u3046, (\u3067\u304d)\u3063\u3053, \u611f, \u89b3, \u6027, \u5b66, \u985e, \u9762, \u7528
-#\u540d\u8a5e-\u63a5\u5c3e-\u4e00\u822c
-#
-# noun-suffix-person: Suffixes that form nouns and attach to person names more often
-# than other nouns.
-# e.g. \u541b, \u69d8, \u8457
-#\u540d\u8a5e-\u63a5\u5c3e-\u4eba\u540d
-#
-# noun-suffix-place: Suffixes that form nouns and attach to place names more often
-# than other nouns.
-# e.g. \u753a, \u5e02, \u770c
-#\u540d\u8a5e-\u63a5\u5c3e-\u5730\u57df
-#
-# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
-# can appear before \u30b9\u30eb ("suru").
-# e.g. \u5316, \u8996, \u5206\u3051, \u5165\u308a, \u843d\u3061, \u8cb7\u3044
-#\u540d\u8a5e-\u63a5\u5c3e-\u30b5\u5909\u63a5\u7d9a
-#
-# noun-suffix-aux: The stem form of \u305d\u3046\u3060 (\u69d8\u614b) that is used to indicate conditions,
-# is treated as \u52a9\u52d5\u8a5e ("auxiliary verb") in school grammars, and attach to the
-# conjunctive form of inflectional words.
-# e.g. \u305d\u3046
-#\u540d\u8a5e-\u63a5\u5c3e-\u52a9\u52d5\u8a5e\u8a9e\u5e79
-#
-# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
-# form of inflectional words and appear before the copula \u3060 ("da").
-# e.g. \u7684, \u3052, \u304c\u3061
-#\u540d\u8a5e-\u63a5\u5c3e-\u5f62\u5bb9\u52d5\u8a5e\u8a9e\u5e79
-#
-# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
-# e.g. \u5f8c (\u3054), \u4ee5\u5f8c, \u4ee5\u964d, \u4ee5\u524d, \u524d\u5f8c, \u4e2d, \u672b, \u4e0a, \u6642 (\u3058)
-#\u540d\u8a5e-\u63a5\u5c3e-\u526f\u8a5e\u53ef\u80fd
-#
-# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
-# is more inclusive than \u52a9\u6570\u8a5e ("classifier") and includes common nouns that attach
-# to numbers.
-# e.g. \u500b, \u3064, \u672c, \u518a, \u30d1\u30fc\u30bb\u30f3\u30c8, cm, kg, \u30ab\u6708, \u304b\u56fd, \u533a\u753b, \u6642\u9593, \u6642\u534a
-#\u540d\u8a5e-\u63a5\u5c3e-\u52a9\u6570\u8a5e
-#
-# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
-# e.g. (\u697d\u3057) \u3055, (\u8003\u3048) \u65b9
-#\u540d\u8a5e-\u63a5\u5c3e-\u7279\u6b8a
-#
-# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
-# together.
-# e.g. (\u65e5\u672c) \u5bfe (\u30a2\u30e1\u30ea\u30ab), \u5bfe (\u30a2\u30e1\u30ea\u30ab), (3) \u5bfe (5), (\u5973\u512a) \u517c (\u4e3b\u5a66)
-#\u540d\u8a5e-\u63a5\u7d9a\u8a5e\u7684
-#
-# noun-verbal_aux: Nouns that attach to the conjunctive particle \u3066 ("te") and are
-# semantically verb-like.
-# e.g. \u3054\u3089\u3093, \u3054\u89a7, \u5fa1\u89a7, \u9802\u6234
-#\u540d\u8a5e-\u52d5\u8a5e\u975e\u81ea\u7acb\u7684
-#
-# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
-# dialects, English, etc. Currently, the only entry for \u540d\u8a5e \u5f15\u7528\u6587\u5b57\u5217 ("noun quotation")
-# is \u3044\u308f\u304f ("iwaku").
-#\u540d\u8a5e-\u5f15\u7528\u6587\u5b57\u5217
-#
-# noun-nai_adjective: Words that appear before the auxiliary verb \u306a\u3044 ("nai") and
-# behave like an adjective.
-# e.g. \u7533\u3057\u8a33, \u4ed5\u65b9, \u3068\u3093\u3067\u3082, \u9055\u3044
-#\u540d\u8a5e-\u30ca\u30a4\u5f62\u5bb9\u8a5e\u8a9e\u5e79
-#
-#####
-# prefix: unclassified prefixes
-#\u63a5\u982d\u8a5e
-#
-# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
-# excluding numerical expressions.
-# e.g. \u304a (\u6c34), \u67d0 (\u6c0f), \u540c (\u793e), \u6545 (\uff5e\u6c0f), \u9ad8 (\u54c1\u8cea), \u304a (\u898b\u4e8b), \u3054 (\u7acb\u6d3e)
-#\u63a5\u982d\u8a5e-\u540d\u8a5e\u63a5\u7d9a
-#
-# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
-# in conjunctive form followed by \u306a\u308b/\u306a\u3055\u308b/\u304f\u3060\u3055\u308b.
-# e.g. \u304a (\u8aad\u307f\u306a\u3055\u3044), \u304a (\u5ea7\u308a)
-#\u63a5\u982d\u8a5e-\u52d5\u8a5e\u63a5\u7d9a
-#
-# prefix-adjectival: Prefixes that attach to adjectives.
-# e.g. \u304a (\u5bd2\u3044\u3067\u3059\u306d\u3048), \u30d0\u30ab (\u3067\u304b\u3044)
-#\u63a5\u982d\u8a5e-\u5f62\u5bb9\u8a5e\u63a5\u7d9a
-#
-# prefix-numerical: Prefixes that attach to numerical expressions.
-# e.g. \u7d04, \u304a\u3088\u305d, \u6bce\u6642
-#\u63a5\u982d\u8a5e-\u6570\u63a5\u7d9a
-#
-#####
-# verb: unclassified verbs
-#\u52d5\u8a5e
-#
-# verb-main:
-#\u52d5\u8a5e-\u81ea\u7acb
-#
-# verb-auxiliary:
-#\u52d5\u8a5e-\u975e\u81ea\u7acb
-#
-# verb-suffix:
-#\u52d5\u8a5e-\u63a5\u5c3e
-#
-#####
-# adjective: unclassified adjectives
-#\u5f62\u5bb9\u8a5e
-#
-# adjective-main:
-#\u5f62\u5bb9\u8a5e-\u81ea\u7acb
-#
-# adjective-auxiliary:
-#\u5f62\u5bb9\u8a5e-\u975e\u81ea\u7acb
-#
-# adjective-suffix:
-#\u5f62\u5bb9\u8a5e-\u63a5\u5c3e
-#
-#####
-# adverb: unclassified adverbs
-#\u526f\u8a5e
-#
-# adverb-misc: Words that can be segmented into one unit and where adnominal
-# modification is not possible.
-# e.g. \u3042\u3044\u304b\u308f\u3089\u305a, \u591a\u5206
-#\u526f\u8a5e-\u4e00\u822c
-#
-# adverb-particle_conjunction: Adverbs that can be followed by \u306e, \u306f, \u306b,
-# \u306a, \u3059\u308b, \u3060, etc.
-# e.g. \u3053\u3093\u306a\u306b, \u305d\u3093\u306a\u306b, \u3042\u3093\u306a\u306b, \u306a\u306b\u304b, \u306a\u3093\u3067\u3082
-#\u526f\u8a5e-\u52a9\u8a5e\u985e\u63a5\u7d9a
-#
-#####
-# adnominal: Words that only have noun-modifying forms.
-# e.g. \u3053\u306e, \u305d\u306e, \u3042\u306e, \u3069\u306e, \u3044\u308f\u3086\u308b, \u306a\u3093\u3089\u304b\u306e, \u4f55\u3089\u304b\u306e, \u3044\u308d\u3093\u306a, \u3053\u3046\u3044\u3046, \u305d\u3046\u3044\u3046, \u3042\u3042\u3044\u3046,
-# \u3069\u3046\u3044\u3046, \u3053\u3093\u306a, \u305d\u3093\u306a, \u3042\u3093\u306a, \u3069\u3093\u306a, \u5927\u304d\u306a, \u5c0f\u3055\u306a, \u304a\u304b\u3057\u306a, \u307b\u3093\u306e, \u305f\u3044\u3057\u305f,
-# \u300c(, \u3082) \u3055\u308b (\u3053\u3068\u306a\u304c\u3089)\u300d, \u5fae\u3005\u305f\u308b, \u5802\u3005\u305f\u308b, \u5358\u306a\u308b, \u3044\u304b\u306a\u308b, \u6211\u304c\u300d\u300c\u540c\u3058, \u4ea1\u304d
-#\u9023\u4f53\u8a5e
-#
-#####
-# conjunction: Conjunctions that can occur independently.
-# e.g. \u304c, \u3051\u308c\u3069\u3082, \u305d\u3057\u3066, \u3058\u3083\u3042, \u305d\u308c\u3069\u3053\u308d\u304b
-\u63a5\u7d9a\u8a5e
-#
-#####
-# particle: unclassified particles.
-\u52a9\u8a5e
-#
-# particle-case: case particles where the subclassification is undefined.
-\u52a9\u8a5e-\u683c\u52a9\u8a5e
-#
-# particle-case-misc: Case particles.
-# e.g. \u304b\u3089, \u304c, \u3067, \u3068, \u306b, \u3078, \u3088\u308a, \u3092, \u306e, \u306b\u3066
-\u52a9\u8a5e-\u683c\u52a9\u8a5e-\u4e00\u822c
-#
-# particle-case-quote: the "to" that appears after nouns, a person\u2019s speech,
-# quotation marks, expressions of decisions from a meeting, reasons, judgements,
-# conjectures, etc.
-# e.g. ( \u3060) \u3068 (\u8ff0\u3079\u305f.), ( \u3067\u3042\u308b) \u3068 (\u3057\u3066\u57f7\u884c\u7336\u4e88...)
-\u52a9\u8a5e-\u683c\u52a9\u8a5e-\u5f15\u7528
-#
-# particle-case-compound: Compounds of particles and verbs that mainly behave
-# like case particles.
-# e.g. \u3068\u3044\u3046, \u3068\u3044\u3063\u305f, \u3068\u304b\u3044\u3046, \u3068\u3057\u3066, \u3068\u3068\u3082\u306b, \u3068\u5171\u306b, \u3067\u3082\u3063\u3066, \u306b\u3042\u305f\u3063\u3066, \u306b\u5f53\u305f\u3063\u3066, \u306b\u5f53\u3063\u3066,
-# \u306b\u3042\u305f\u308a, \u306b\u5f53\u305f\u308a, \u306b\u5f53\u308a, \u306b\u5f53\u305f\u308b, \u306b\u3042\u305f\u308b, \u306b\u304a\u3044\u3066, \u306b\u65bc\u3044\u3066,\u306b\u65bc\u3066, \u306b\u304a\u3051\u308b, \u306b\u65bc\u3051\u308b,
-# \u306b\u304b\u3051, \u306b\u304b\u3051\u3066, \u306b\u304b\u3093\u3057, \u306b\u95a2\u3057, \u306b\u304b\u3093\u3057\u3066, \u306b\u95a2\u3057\u3066, \u306b\u304b\u3093\u3059\u308b, \u306b\u95a2\u3059\u308b, \u306b\u969b\u3057,
-# \u306b\u969b\u3057\u3066, \u306b\u3057\u305f\u304c\u3044, \u306b\u5f93\u3044, \u306b\u5f93\u3046, \u306b\u3057\u305f\u304c\u3063\u3066, \u306b\u5f93\u3063\u3066, \u306b\u305f\u3044\u3057, \u306b\u5bfe\u3057, \u306b\u305f\u3044\u3057\u3066,
-# \u306b\u5bfe\u3057\u3066, \u306b\u305f\u3044\u3059\u308b, \u306b\u5bfe\u3059\u308b, \u306b\u3064\u3044\u3066, \u306b\u3064\u304d, \u306b\u3064\u3051, \u306b\u3064\u3051\u3066, \u306b\u3064\u308c, \u306b\u3064\u308c\u3066, \u306b\u3068\u3063\u3066,
-# \u306b\u3068\u308a, \u306b\u307e\u3064\u308f\u308b, \u306b\u3088\u3063\u3066, \u306b\u4f9d\u3063\u3066, \u306b\u56e0\u3063\u3066, \u306b\u3088\u308a, \u306b\u4f9d\u308a, \u306b\u56e0\u308a, \u306b\u3088\u308b, \u306b\u4f9d\u308b, \u306b\u56e0\u308b,
-# \u306b\u308f\u305f\u3063\u3066, \u306b\u308f\u305f\u308b, \u3092\u3082\u3063\u3066, \u3092\u4ee5\u3063\u3066, \u3092\u901a\u3058, \u3092\u901a\u3058\u3066, \u3092\u901a\u3057\u3066, \u3092\u3081\u3050\u3063\u3066, \u3092\u3081\u3050\u308a, \u3092\u3081\u3050\u308b,
-# \u3063\u3066-\u53e3\u8a9e/, \u3061\u3085\u3046-\u95a2\u897f\u5f01\u300c\u3068\u3044\u3046\u300d/, (\u4f55) \u3066\u3044\u3046 (\u4eba)-\u53e3\u8a9e/, \u3063\u3066\u3044\u3046-\u53e3\u8a9e/, \u3068\u3044\u3075, \u3068\u304b\u3044\u3075
-\u52a9\u8a5e-\u683c\u52a9\u8a5e-\u9023\u8a9e
-#
-# particle-conjunctive:
-# e.g. \u304b\u3089, \u304b\u3089\u306b\u306f, \u304c, \u3051\u308c\u3069, \u3051\u308c\u3069\u3082, \u3051\u3069, \u3057, \u3064\u3064, \u3066, \u3067, \u3068, \u3068\u3053\u308d\u304c, \u3069\u3053\u308d\u304b, \u3068\u3082, \u3069\u3082,
-# \u306a\u304c\u3089, \u306a\u308a, \u306e\u3067, \u306e\u306b, \u3070, \u3082\u306e\u306e, \u3084 ( \u3057\u305f), \u3084\u3044\u306a\u3084, (\u3053\u308d\u3093) \u3058\u3083(\u3044\u3051\u306a\u3044)-\u53e3\u8a9e/,
-# (\u884c\u3063) \u3061\u3083(\u3044\u3051\u306a\u3044)-\u53e3\u8a9e/, (\u8a00\u3063) \u305f\u3063\u3066 (\u3057\u304b\u305f\u304c\u306a\u3044)-\u53e3\u8a9e/, (\u305d\u308c\u304c\u306a\u304f)\u3063\u305f\u3063\u3066 (\u5e73\u6c17)-\u53e3\u8a9e/
-\u52a9\u8a5e-\u63a5\u7d9a\u52a9\u8a5e
-#
-# particle-dependency:
-# e.g. \u3053\u305d, \u3055\u3048, \u3057\u304b, \u3059\u3089, \u306f, \u3082, \u305e
-\u52a9\u8a5e-\u4fc2\u52a9\u8a5e
-#
-# particle-adverbial:
-# e.g. \u304c\u3066\u3089, \u304b\u3082, \u304f\u3089\u3044, \u4f4d, \u3050\u3089\u3044, \u3057\u3082, (\u5b66\u6821) \u3058\u3083(\u3053\u308c\u304c\u6d41\u884c\u3063\u3066\u3044\u308b)-\u53e3\u8a9e/,
-# (\u305d\u308c)\u3058\u3083\u3042 (\u3088\u304f\u306a\u3044)-\u53e3\u8a9e/, \u305a\u3064, (\u79c1) \u306a\u305e, \u306a\u3069, (\u79c1) \u306a\u308a (\u306b), (\u5148\u751f) \u306a\u3093\u304b (\u5927\u5acc\u3044)-\u53e3\u8a9e/,
-# (\u79c1) \u306a\u3093\u305e, (\u5148\u751f) \u306a\u3093\u3066 (\u5927\u5acc\u3044)-\u53e3\u8a9e/, \u306e\u307f, \u3060\u3051, (\u79c1) \u3060\u3063\u3066-\u53e3\u8a9e/, \u3060\u306b,
-# (\u5f7c)\u3063\u305f\u3089-\u53e3\u8a9e/, (\u304a\u8336) \u3067\u3082 (\u3044\u304b\u304c), \u7b49 (\u3068\u3046), (\u4eca\u5f8c) \u3068\u3082, \u3070\u304b\u308a, \u3070\u3063\u304b-\u53e3\u8a9e/, \u3070\u3063\u304b\u308a-\u53e3\u8a9e/,
-# \u307b\u3069, \u7a0b, \u307e\u3067, \u8fc4, (\u8ab0) \u3082 (\u304c)([\u52a9\u8a5e-\u683c\u52a9\u8a5e] \u304a\u3088\u3073 [\u52a9\u8a5e-\u4fc2\u52a9\u8a5e] \u306e\u524d\u306b\u4f4d\u7f6e\u3059\u308b\u300c\u3082\u300d)
-\u52a9\u8a5e-\u526f\u52a9\u8a5e
-#
-# particle-interjective: particles with interjective grammatical roles.
-# e.g. (\u677e\u5cf6) \u3084
-\u52a9\u8a5e-\u9593\u6295\u52a9\u8a5e
-#
-# particle-coordinate:
-# e.g. \u3068, \u305f\u308a, \u3060\u306e, \u3060\u308a, \u3068\u304b, \u306a\u308a, \u3084, \u3084\u3089
-\u52a9\u8a5e-\u4e26\u7acb\u52a9\u8a5e
-#
-# particle-final:
-# e.g. \u304b\u3044, \u304b\u3057\u3089, \u3055, \u305c, (\u3060)\u3063\u3051-\u53e3\u8a9e/, (\u3068\u307e\u3063\u3066\u308b) \u3067-\u65b9\u8a00/, \u306a, \u30ca, \u306a\u3042-\u53e3\u8a9e/, \u305e, \u306d, \u30cd,
-# \u306d\u3047-\u53e3\u8a9e/, \u306d\u3048-\u53e3\u8a9e/, \u306d\u3093-\u65b9\u8a00/, \u306e, \u306e\u3046-\u53e3\u8a9e/, \u3084, \u3088, \u30e8, \u3088\u3049-\u53e3\u8a9e/, \u308f, \u308f\u3044-\u53e3\u8a9e/
-\u52a9\u8a5e-\u7d42\u52a9\u8a5e
-#
-# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
-# adverbial, conjunctive, or sentence final. For example:
-# (a) \u300cA \u304b B \u304b\u300d. Ex:\u300c(\u56fd\u5185\u3067\u904b\u7528\u3059\u308b) \u304b,(\u6d77\u5916\u3067\u904b\u7528\u3059\u308b) \u304b (.)\u300d
-# (b) Inside an adverb phrase. Ex:\u300c(\u5e78\u3044\u3068\u3044\u3046) \u304b (, \u6b7b\u8005\u306f\u3044\u306a\u304b\u3063\u305f.)\u300d
-# \u300c(\u7948\u308a\u304c\u5c4a\u3044\u305f\u305b\u3044) \u304b (, \u8a66\u9a13\u306b\u5408\u683c\u3057\u305f.)\u300d
-# (c) \u300c\u304b\u306e\u3088\u3046\u306b\u300d. Ex:\u300c(\u4f55\u3082\u306a\u304b\u3063\u305f) \u304b (\u306e\u3088\u3046\u306b\u632f\u308b\u821e\u3063\u305f.)\u300d
-# e.g. \u304b
-\u52a9\u8a5e-\u526f\u52a9\u8a5e\uff0f\u4e26\u7acb\u52a9\u8a5e\uff0f\u7d42\u52a9\u8a5e
-#
-# particle-adnominalizer: The "no" that attaches to nouns and modifies
-# non-inflectional words.
-\u52a9\u8a5e-\u9023\u4f53\u5316
-#
-# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
-# that are giongo, giseigo, or gitaigo.
-# e.g. \u306b, \u3068
-\u52a9\u8a5e-\u526f\u8a5e\u5316
-#
-# particle-special: A particle that does not fit into one of the above classifications.
-# This includes particles that are used in Tanka, Haiku, and other poetry.
-# e.g. \u304b\u306a, \u3051\u3080, ( \u3057\u305f\u3060\u308d\u3046) \u306b, (\u3042\u3093\u305f) \u306b\u3083(\u308f\u304b\u3089\u3093), (\u4ffa) \u3093 (\u5bb6)
-\u52a9\u8a5e-\u7279\u6b8a
-#
-#####
-# auxiliary-verb:
-\u52a9\u52d5\u8a5e
-#
-#####
-# interjection: Greetings and other exclamations.
-# e.g. \u304a\u306f\u3088\u3046, \u304a\u306f\u3088\u3046\u3054\u3056\u3044\u307e\u3059, \u3053\u3093\u306b\u3061\u306f, \u3053\u3093\u3070\u3093\u306f, \u3042\u308a\u304c\u3068\u3046, \u3069\u3046\u3082\u3042\u308a\u304c\u3068\u3046, \u3042\u308a\u304c\u3068\u3046\u3054\u3056\u3044\u307e\u3059,
-# \u3044\u305f\u3060\u304d\u307e\u3059, \u3054\u3061\u305d\u3046\u3055\u307e, \u3055\u3088\u306a\u3089, \u3055\u3088\u3046\u306a\u3089, \u306f\u3044, \u3044\u3044\u3048, \u3054\u3081\u3093, \u3054\u3081\u3093\u306a\u3055\u3044
-#\u611f\u52d5\u8a5e
-#
-#####
-# symbol: unclassified Symbols.
-\u8a18\u53f7
-#
-# symbol-misc: A general symbol not in one of the categories below.
-# e.g. [\u25cb\u25ce@$\u3012\u2192+]
-\u8a18\u53f7-\u4e00\u822c
-#
-# symbol-comma: Commas
-# e.g. [,\u3001]
-\u8a18\u53f7-\u8aad\u70b9
-#
-# symbol-period: Periods and full stops.
-# e.g. [.\uff0e\u3002]
-\u8a18\u53f7-\u53e5\u70b9
-#
-# symbol-space: Full-width whitespace.
-\u8a18\u53f7-\u7a7a\u767d
-#
-# symbol-open_bracket:
-# e.g. [({\u2018\u201c\u300e\u3010]
-\u8a18\u53f7-\u62ec\u5f27\u958b
-#
-# symbol-close_bracket:
-# e.g. [)}\u2019\u201d\u300f\u300d\u3011]
-\u8a18\u53f7-\u62ec\u5f27\u9589
-#
-# symbol-alphabetic:
-#\u8a18\u53f7-\u30a2\u30eb\u30d5\u30a1\u30d9\u30c3\u30c8
-#
-#####
-# other: unclassified other
-#\u305d\u306e\u4ed6
-#
-# other-interjection: Words that are hard to classify as noun-suffixes or
-# sentence-final particles.
-# e.g. (\u3060)\u30a1
-\u305d\u306e\u4ed6-\u9593\u6295
-#
-#####
-# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
-# e.g. \u3042\u306e, \u3046\u3093\u3068, \u3048\u3068
-\u30d5\u30a3\u30e9\u30fc
-#
-#####
-# non-verbal: non-verbal sound.
-\u975e\u8a00\u8a9e\u97f3
-#
-#####
-# fragment:
-#\u8a9e\u65ad\u7247
-#
-#####
-# unknown: unknown part of speech.
-#\u672a\u77e5\u8a9e
-#
-##### End of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ar.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ar.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ar.txt
deleted file mode 100644
index 046829d..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ar.txt
+++ /dev/null
@@ -1,125 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Cleaned on October 11, 2009 (not normalized, so use before normalization)
-# This means that when modifying this list, you might need to add some
-# redundant entries, for example containing forms with both \u0623 and \u0627
-\u0645\u0646
-\u0648\u0645\u0646
-\u0645\u0646\u0647\u0627
-\u0645\u0646\u0647
-\u0641\u064a
-\u0648\u0641\u064a
-\u0641\u064a\u0647\u0627
-\u0641\u064a\u0647
-\u0648
-\u0641
-\u062b\u0645
-\u0627\u0648
-\u0623\u0648
-\u0628
-\u0628\u0647\u0627
-\u0628\u0647
-\u0627
-\u0623
-\u0627\u0649
-\u0627\u064a
-\u0623\u064a
-\u0623\u0649
-\u0644\u0627
-\u0648\u0644\u0627
-\u0627\u0644\u0627
-\u0623\u0644\u0627
-\u0625\u0644\u0627
-\u0644\u0643\u0646
-\u0645\u0627
-\u0648\u0645\u0627
-\u0643\u0645\u0627
-\u0641\u0645\u0627
-\u0639\u0646
-\u0645\u0639
-\u0627\u0630\u0627
-\u0625\u0630\u0627
-\u0627\u0646
-\u0623\u0646
-\u0625\u0646
-\u0627\u0646\u0647\u0627
-\u0623\u0646\u0647\u0627
-\u0625\u0646\u0647\u0627
-\u0627\u0646\u0647
-\u0623\u0646\u0647
-\u0625\u0646\u0647
-\u0628\u0627\u0646
-\u0628\u0623\u0646
-\u0641\u0627\u0646
-\u0641\u0623\u0646
-\u0648\u0627\u0646
-\u0648\u0623\u0646
-\u0648\u0625\u0646
-\u0627\u0644\u062a\u0649
-\u0627\u0644\u062a\u064a
-\u0627\u0644\u0630\u0649
-\u0627\u0644\u0630\u064a
-\u0627\u0644\u0630\u064a\u0646
-\u0627\u0644\u0649
-\u0627\u0644\u064a
-\u0625\u0644\u0649
-\u0625\u0644\u064a
-\u0639\u0644\u0649
-\u0639\u0644\u064a\u0647\u0627
-\u0639\u0644\u064a\u0647
-\u0627\u0645\u0627
-\u0623\u0645\u0627
-\u0625\u0645\u0627
-\u0627\u064a\u0636\u0627
-\u0623\u064a\u0636\u0627
-\u0643\u0644
-\u0648\u0643\u0644
-\u0644\u0645
-\u0648\u0644\u0645
-\u0644\u0646
-\u0648\u0644\u0646
-\u0647\u0649
-\u0647\u064a
-\u0647\u0648
-\u0648\u0647\u0649
-\u0648\u0647\u064a
-\u0648\u0647\u0648
-\u0641\u0647\u0649
-\u0641\u0647\u064a
-\u0641\u0647\u0648
-\u0627\u0646\u062a
-\u0623\u0646\u062a
-\u0644\u0643
-\u0644\u0647\u0627
-\u0644\u0647
-\u0647\u0630\u0647
-\u0647\u0630\u0627
-\u062a\u0644\u0643
-\u0630\u0644\u0643
-\u0647\u0646\u0627\u0643
-\u0643\u0627\u0646\u062a
-\u0643\u0627\u0646
-\u064a\u0643\u0648\u0646
-\u062a\u0643\u0648\u0646
-\u0648\u0643\u0627\u0646\u062a
-\u0648\u0643\u0627\u0646
-\u063a\u064a\u0631
-\u0628\u0639\u0636
-\u0642\u062f
-\u0646\u062d\u0648
-\u0628\u064a\u0646
-\u0628\u064a\u0646\u0645\u0627
-\u0645\u0646\u0630
-\u0636\u0645\u0646
-\u062d\u064a\u062b
-\u0627\u0644\u0627\u0646
-\u0627\u0644\u0622\u0646
-\u062e\u0644\u0627\u0644
-\u0628\u0639\u062f
-\u0642\u0628\u0644
-\u062d\u062a\u0649
-\u0639\u0646\u062f
-\u0639\u0646\u062f\u0645\u0627
-\u0644\u062f\u0649
-\u062c\u0645\u064a\u0639
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_bg.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_bg.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_bg.txt
deleted file mode 100644
index 1ae4ba2..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_bg.txt
+++ /dev/null
@@ -1,193 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-\u0430
-\u0430\u0437
-\u0430\u043a\u043e
-\u0430\u043b\u0430
-\u0431\u0435
-\u0431\u0435\u0437
-\u0431\u0435\u0448\u0435
-\u0431\u0438
-\u0431\u0438\u043b
-\u0431\u0438\u043b\u0430
-\u0431\u0438\u043b\u0438
-\u0431\u0438\u043b\u043e
-\u0431\u043b\u0438\u0437\u043e
-\u0431\u044a\u0434\u0430\u0442
-\u0431\u044a\u0434\u0435
-\u0431\u044f\u0445\u0430
-\u0432
-\u0432\u0430\u0441
-\u0432\u0430\u0448
-\u0432\u0430\u0448\u0430
-\u0432\u0435\u0440\u043e\u044f\u0442\u043d\u043e
-\u0432\u0435\u0447\u0435
-\u0432\u0437\u0435\u043c\u0430
-\u0432\u0438
-\u0432\u0438\u0435
-\u0432\u0438\u043d\u0430\u0433\u0438
-\u0432\u0441\u0435
-\u0432\u0441\u0435\u043a\u0438
-\u0432\u0441\u0438\u0447\u043a\u0438
-\u0432\u0441\u0438\u0447\u043a\u043e
-\u0432\u0441\u044f\u043a\u0430
-\u0432\u044a\u0432
-\u0432\u044a\u043f\u0440\u0435\u043a\u0438
-\u0432\u044a\u0440\u0445\u0443
-\u0433
-\u0433\u0438
-\u0433\u043b\u0430\u0432\u043d\u043e
-\u0433\u043e
-\u0434
-\u0434\u0430
-\u0434\u0430\u043b\u0438
-\u0434\u043e
-\u0434\u043e\u043a\u0430\u0442\u043e
-\u0434\u043e\u043a\u043e\u0433\u0430
-\u0434\u043e\u0440\u0438
-\u0434\u043e\u0441\u0435\u0433\u0430
-\u0434\u043e\u0441\u0442\u0430
-\u0435
-\u0435\u0434\u0432\u0430
-\u0435\u0434\u0438\u043d
-\u0435\u0442\u043e
-\u0437\u0430
-\u0437\u0430\u0434
-\u0437\u0430\u0435\u0434\u043d\u043e
-\u0437\u0430\u0440\u0430\u0434\u0438
-\u0437\u0430\u0441\u0435\u0433\u0430
-\u0437\u0430\u0442\u043e\u0432\u0430
-\u0437\u0430\u0449\u043e
-\u0437\u0430\u0449\u043e\u0442\u043e
-\u0438
-\u0438\u0437
-\u0438\u043b\u0438
-\u0438\u043c
-\u0438\u043c\u0430
-\u0438\u043c\u0430\u0442
-\u0438\u0441\u043a\u0430
-\u0439
-\u043a\u0430\u0437\u0430
-\u043a\u0430\u043a
-\u043a\u0430\u043a\u0432\u0430
-\u043a\u0430\u043a\u0432\u043e
-\u043a\u0430\u043a\u0442\u043e
-\u043a\u0430\u043a\u044a\u0432
-\u043a\u0430\u0442\u043e
-\u043a\u043e\u0433\u0430
-\u043a\u043e\u0433\u0430\u0442\u043e
-\u043a\u043e\u0435\u0442\u043e
-\u043a\u043e\u0438\u0442\u043e
-\u043a\u043e\u0439
-\u043a\u043e\u0439\u0442\u043e
-\u043a\u043e\u043b\u043a\u043e
-\u043a\u043e\u044f\u0442\u043e
-\u043a\u044a\u0434\u0435
-\u043a\u044a\u0434\u0435\u0442\u043e
-\u043a\u044a\u043c
-\u043b\u0438
-\u043c
-\u043c\u0435
-\u043c\u0435\u0436\u0434\u0443
-\u043c\u0435\u043d
-\u043c\u0438
-\u043c\u043d\u043e\u0437\u0438\u043d\u0430
-\u043c\u043e\u0433\u0430
-\u043c\u043e\u0433\u0430\u0442
-\u043c\u043e\u0436\u0435
-\u043c\u043e\u043b\u044f
-\u043c\u043e\u043c\u0435\u043d\u0442\u0430
-\u043c\u0443
-\u043d
-\u043d\u0430
-\u043d\u0430\u0434
-\u043d\u0430\u0437\u0430\u0434
-\u043d\u0430\u0439
-\u043d\u0430\u043f\u0440\u0430\u0432\u0438
-\u043d\u0430\u043f\u0440\u0435\u0434
-\u043d\u0430\u043f\u0440\u0438\u043c\u0435\u0440
-\u043d\u0430\u0441
-\u043d\u0435
-\u043d\u0435\u0433\u043e
-\u043d\u0435\u044f
-\u043d\u0438
-\u043d\u0438\u0435
-\u043d\u0438\u043a\u043e\u0439
-\u043d\u0438\u0442\u043e
-\u043d\u043e
-\u043d\u044f\u043a\u043e\u0438
-\u043d\u044f\u043a\u043e\u0439
-\u043d\u044f\u043c\u0430
-\u043e\u0431\u0430\u0447\u0435
-\u043e\u043a\u043e\u043b\u043e
-\u043e\u0441\u0432\u0435\u043d
-\u043e\u0441\u043e\u0431\u0435\u043d\u043e
-\u043e\u0442
-\u043e\u0442\u0433\u043e\u0440\u0435
-\u043e\u0442\u043d\u043e\u0432\u043e
-\u043e\u0449\u0435
-\u043f\u0430\u043a
-\u043f\u043e
-\u043f\u043e\u0432\u0435\u0447\u0435
-\u043f\u043e\u0432\u0435\u0447\u0435\u0442\u043e
-\u043f\u043e\u0434
-\u043f\u043e\u043d\u0435
-\u043f\u043e\u0440\u0430\u0434\u0438
-\u043f\u043e\u0441\u043b\u0435
-\u043f\u043e\u0447\u0442\u0438
-\u043f\u0440\u0430\u0432\u0438
-\u043f\u0440\u0435\u0434
-\u043f\u0440\u0435\u0434\u0438
-\u043f\u0440\u0435\u0437
-\u043f\u0440\u0438
-\u043f\u044a\u043a
-\u043f\u044a\u0440\u0432\u043e
-\u0441
-\u0441\u0430
-\u0441\u0430\u043c\u043e
-\u0441\u0435
-\u0441\u0435\u0433\u0430
-\u0441\u0438
-\u0441\u043a\u043e\u0440\u043e
-\u0441\u043b\u0435\u0434
-\u0441\u043c\u0435
-\u0441\u043f\u043e\u0440\u0435\u0434
-\u0441\u0440\u0435\u0434
-\u0441\u0440\u0435\u0449\u0443
-\u0441\u0442\u0435
-\u0441\u044a\u043c
-\u0441\u044a\u0441
-\u0441\u044a\u0449\u043e
-\u0442
-\u0442\u0430\u0437\u0438
-\u0442\u0430\u043a\u0430
-\u0442\u0430\u043a\u0438\u0432\u0430
-\u0442\u0430\u043a\u044a\u0432
-\u0442\u0430\u043c
-\u0442\u0432\u043e\u0439
-\u0442\u0435
-\u0442\u0435\u0437\u0438
-\u0442\u0438
-\u0442\u043d
-\u0442\u043e
-\u0442\u043e\u0432\u0430
-\u0442\u043e\u0433\u0430\u0432\u0430
-\u0442\u043e\u0437\u0438
-\u0442\u043e\u0439
-\u0442\u043e\u043b\u043a\u043e\u0432\u0430
-\u0442\u043e\u0447\u043d\u043e
-\u0442\u0440\u044f\u0431\u0432\u0430
-\u0442\u0443\u043a
-\u0442\u044a\u0439
-\u0442\u044f
-\u0442\u044f\u0445
-\u0443
-\u0445\u0430\u0440\u0435\u0441\u0432\u0430
-\u0447
-\u0447\u0435
-\u0447\u0435\u0441\u0442\u043e
-\u0447\u0440\u0435\u0437
-\u0449\u0435
-\u0449\u043e\u043c
-\u044f
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ca.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ca.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ca.txt
deleted file mode 100644
index 3da65de..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ca.txt
+++ /dev/null
@@ -1,220 +0,0 @@
-# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
-a
-abans
-ac�
-ah
-aix�
-aix�
-al
-als
-aleshores
-algun
-alguna
-algunes
-alguns
-alhora
-all�
-all�
-all�
-altra
-altre
-altres
-amb
-ambd�s
-ambdues
-apa
-aquell
-aquella
-aquelles
-aquells
-aquest
-aquesta
-aquestes
-aquests
-aqu�
-baix
-cada
-cadasc�
-cadascuna
-cadascunes
-cadascuns
-com
-contra
-d'un
-d'una
-d'unes
-d'uns
-dalt
-de
-del
-dels
-des
-despr�s
-dins
-dintre
-donat
-doncs
-durant
-e
-eh
-el
-els
-em
-en
-encara
-ens
-entre
-�rem
-eren
-�reu
-es
-�s
-esta
-est�
-est�vem
-estaven
-est�veu
-esteu
-et
-etc
-ets
-fins
-fora
-gaireb�
-ha
-han
-has
-havia
-he
-hem
-heu
-hi
-ho
-i
-igual
-iguals
-ja
-l'hi
-la
-les
-li
-li'n
-llavors
-m'he
-ma
-mal
-malgrat
-mateix
-mateixa
-mateixes
-mateixos
-me
-mentre
-m�s
-meu
-meus
-meva
-meves
-molt
-molta
-moltes
-molts
-mon
-mons
-n'he
-n'hi
-ne
-ni
-no
-nogensmenys
-nom�s
-nosaltres
-nostra
-nostre
-nostres
-o
-oh
-oi
-on
-pas
-pel
-pels
-per
-per�
-perqu�
-poc
-poca
-pocs
-poques
-potser
-propi
-qual
-quals
-quan
-quant
-que
-qu�
-quelcom
-qui
-quin
-quina
-quines
-quins
-s'ha
-s'han
-sa
-semblant
-semblants
-ses
-seu
-seus
-seva
-seva
-seves
-si
-sobre
-sobretot
-s�c
-solament
-sols
-son
-s�n
-sons
-sota
-sou
-t'ha
-t'han
-t'he
-ta
-tal
-tamb�
-tampoc
-tan
-tant
-tanta
-tantes
-teu
-teus
-teva
-teves
-ton
-tons
-tot
-tota
-totes
-tots
-un
-una
-unes
-uns
-us
-va
-vaig
-vam
-van
-vas
-veu
-vosaltres
-vostra
-vostre
-vostres
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ckb.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ckb.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ckb.txt
deleted file mode 100644
index 87abf11..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_ckb.txt
+++ /dev/null
@@ -1,136 +0,0 @@
-# set of kurdish stopwords
-# note these have been normalized with our scheme (e represented with U+06D5, etc)
-# constructed from:
-# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al)
-# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston)
-# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc
-
-# and
-\u0648
-# which
-\u06a9\u06d5
-# of
-\u06cc
-# made/did
-\u06a9\u0631\u062f
-# that/which
-\u0626\u06d5\u0648\u06d5\u06cc
-# on/head
-\u0633\u06d5\u0631
-# two
-\u062f\u0648\u0648
-# also
-\u0647\u06d5\u0631\u0648\u06d5\u0647\u0627
-# from/that
-\u0644\u06d5\u0648
-# makes/does
-\u062f\u06d5\u06a9\u0627\u062a
-# some
-\u0686\u06d5\u0646\u062f
-# every
-\u0647\u06d5\u0631
-
-# demonstratives
-# that
-\u0626\u06d5\u0648
-# this
-\u0626\u06d5\u0645
-
-# personal pronouns
-# I
-\u0645\u0646
-# we
-\u0626\u06ce\u0645\u06d5
-# you
-\u062a\u06c6
-# you
-\u0626\u06ce\u0648\u06d5
-# he/she/it
-\u0626\u06d5\u0648
-# they
-\u0626\u06d5\u0648\u0627\u0646
-
-# prepositions
-# to/with/by
-\u0628\u06d5
-\u067e\u06ce
-# without
-\u0628\u06d5\u0628\u06ce
-# along with/while/during
-\u0628\u06d5\u062f\u06d5\u0645
-# in the opinion of
-\u0628\u06d5\u0644\u0627\u06cc
-# according to
-\u0628\u06d5\u067e\u06ce\u06cc
-# before
-\u0628\u06d5\u0631\u0644\u06d5
-# in the direction of
-\u0628\u06d5\u0631\u06d5\u0648\u06cc
-# in front of/toward
-\u0628\u06d5\u0631\u06d5\u0648\u06d5
-# before/in the face of
-\u0628\u06d5\u0631\u062f\u06d5\u0645
-# without
-\u0628\u06ce
-# except for
-\u0628\u06ce\u062c\u06af\u06d5
-# for
-\u0628\u06c6
-# on/in
-\u062f\u06d5
-\u062a\u06ce
-# with
-\u062f\u06d5\u06af\u06d5\u06b5
-# after
-\u062f\u0648\u0627\u06cc
-# except for/aside from
-\u062c\u06af\u06d5
-# in/from
-\u0644\u06d5
-\u0644\u06ce
-# in front of/before/because of
-\u0644\u06d5\u0628\u06d5\u0631
-# between/among
-\u0644\u06d5\u0628\u06d5\u06cc\u0646\u06cc
-# concerning/about
-\u0644\u06d5\u0628\u0627\u0628\u06d5\u062a
-# concerning
-\u0644\u06d5\u0628\u0627\u0631\u06d5\u06cc
-# instead of
-\u0644\u06d5\u0628\u0627\u062a\u06cc
-# beside
-\u0644\u06d5\u0628\u0646
-# instead of
-\u0644\u06d5\u0628\u0631\u06ce\u062a\u06cc
-# behind
-\u0644\u06d5\u062f\u06d5\u0645
-# with/together with
-\u0644\u06d5\u06af\u06d5\u06b5
-# by
-\u0644\u06d5\u0644\u0627\u06cc\u06d5\u0646
-# within
-\u0644\u06d5\u0646\u0627\u0648
-# between/among
-\u0644\u06d5\u0646\u06ce\u0648
-# for the sake of
-\u0644\u06d5\u067e\u06ce\u0646\u0627\u0648\u06cc
-# with respect to
-\u0644\u06d5\u0631\u06d5\u0648\u06cc
-# by means of/for
-\u0644\u06d5\u0631\u06ce
-# for the sake of
-\u0644\u06d5\u0631\u06ce\u06af\u0627
-# on/on top of/according to
-\u0644\u06d5\u0633\u06d5\u0631
-# under
-\u0644\u06d5\u0698\u06ce\u0631
-# between/among
-\u0646\u0627\u0648
-# between/among
-\u0646\u06ce\u0648\u0627\u0646
-# after
-\u067e\u0627\u0634
-# before
-\u067e\u06ce\u0634
-# like
-\u0648\u06d5\u06a9
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_cz.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_cz.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_cz.txt
deleted file mode 100644
index 53c6097..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_cz.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-a
-s
-k
-o
-i
-u
-v
-z
-dnes
-cz
-t�mto
-bude\u0161
-budem
-byli
-jse\u0161
-m\u016fj
-sv�m
-ta
-tomto
-tohle
-tuto
-tyto
-jej
-zda
-pro\u010d
-m�te
-tato
-kam
-tohoto
-kdo
-kte\u0159�
-mi
-n�m
-tom
-tomuto
-m�t
-nic
-proto
-kterou
-byla
-toho
-proto\u017ee
-asi
-ho
-na\u0161i
-napi\u0161te
-re
-co\u017e
-t�m
-tak\u017ee
-sv�ch
-jej�
-sv�mi
-jste
-aj
-tu
-tedy
-teto
-bylo
-kde
-ke
-prav�
-ji
-nad
-nejsou
-\u010di
-pod
-t�ma
-mezi
-p\u0159es
-ty
-pak
-v�m
-ani
-kdy\u017e
-v\u0161ak
-neg
-jsem
-tento
-\u010dl�nku
-\u010dl�nky
-aby
-jsme
-p\u0159ed
-pta
-jejich
-byl
-je\u0161t\u011b
-a\u017e
-bez
-tak�
-pouze
-prvn�
-va\u0161e
-kter�
-n�s
-nov�
-tipy
-pokud
-m\u016f\u017ee
-strana
-jeho
-sv�
-jin�
-zpr�vy
-nov�
-nen�
-v�s
-jen
-podle
-zde
-u\u017e
-b�t
-v�ce
-bude
-ji\u017e
-ne\u017e
-kter�
-by
-kter�
-co
-nebo
-ten
-tak
-m�
-p\u0159i
-od
-po
-jsou
-jak
-dal\u0161�
-ale
-si
-se
-ve
-to
-jako
-za
-zp\u011bt
-ze
-do
-pro
-je
-na
-atd
-atp
-jakmile
-p\u0159i\u010dem\u017e
-j�
-on
-ona
-ono
-oni
-ony
-my
-vy
-j�
-ji
-m\u011b
-mne
-jemu
-tomu
-t\u011bm
-t\u011bmu
-n\u011bmu
-n\u011bmu\u017e
-jeho\u017e
-j�\u017e
-jeliko\u017e
-je\u017e
-jako\u017e
-na\u010de\u017e
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_da.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_da.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_da.txt
deleted file mode 100644
index 42e6145..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_da.txt
+++ /dev/null
@@ -1,110 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
- |
- | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
-
- | A Danish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
-
-og | and
-i | in
-jeg | I
-det | that (dem. pronoun)/it (pers. pronoun)
-at | that (in front of a sentence)/to (with infinitive)
-en | a/an
-den | it (pers. pronoun)/that (dem. pronoun)
-til | to/at/for/until/against/by/of/into, more
-er | present tense of "to be"
-som | who, as
-p� | on/upon/in/on/at/to/after/of/with/for, on
-de | they
-med | with/by/in, along
-han | he
-af | of/by/from/off/for/in/with/on, off
-for | at/for/to/from/by/of/ago, in front/before, because
-ikke | not
-der | who/which, there/those
-var | past tense of "to be"
-mig | me/myself
-sig | oneself/himself/herself/itself/themselves
-men | but
-et | a/an/one, one (number), someone/somebody/one
-har | present tense of "to have"
-om | round/about/for/in/a, about/around/down, if
-vi | we
-min | my
-havde | past tense of "to have"
-ham | him
-hun | she
-nu | now
-over | over/above/across/by/beyond/past/on/about, over/past
-da | then, when/as/since
-fra | from/off/since, off, since
-du | you
-ud | out
-sin | his/her/its/one's
-dem | them
-os | us/ourselves
-op | up
-man | you/one
-hans | his
-hvor | where
-eller | or
-hvad | what
-skal | must/shall etc.
-selv | myself/youself/herself/ourselves etc., even
-her | here
-alle | all/everyone/everybody etc.
-vil | will (verb)
-blev | past tense of "to stay/to remain/to get/to become"
-kunne | could
-ind | in
-n�r | when
-v�re | present tense of "to be"
-dog | however/yet/after all
-noget | something
-ville | would
-jo | you know/you see (adv), yes
-deres | their/theirs
-efter | after/behind/according to/for/by/from, later/afterwards
-ned | down
-skulle | should
-denne | this
-end | than
-dette | this
-mit | my/mine
-ogs� | also
-under | under/beneath/below/during, below/underneath
-have | have
-dig | you
-anden | other
-hende | her
-mine | my
-alt | everything
-meget | much/very, plenty of
-sit | his, her, its, one's
-sine | his, her, its, one's
-vor | our
-mod | against
-disse | these
-hvis | if
-din | your/yours
-nogle | some
-hos | by/at
-blive | be/become
-mange | many
-ad | by/through
-bliver | present tense of "to be/to become"
-hendes | her/hers
-v�ret | be
-thi | for (conj)
-jer | you
-s�dan | such, like this/like that
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_de.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_de.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_de.txt
deleted file mode 100644
index 86525e7..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_de.txt
+++ /dev/null
@@ -1,294 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
- |
- | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
-
- | A German stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | The number of forms in this list is reduced significantly by passing it
- | through the German stemmer.
-
-
-aber | but
-
-alle | all
-allem
-allen
-aller
-alles
-
-als | than, as
-also | so
-am | an + dem
-an | at
-
-ander | other
-andere
-anderem
-anderen
-anderer
-anderes
-anderm
-andern
-anderr
-anders
-
-auch | also
-auf | on
-aus | out of
-bei | by
-bin | am
-bis | until
-bist | art
-da | there
-damit | with it
-dann | then
-
-der | the
-den
-des
-dem
-die
-das
-
-da� | that
-
-derselbe | the same
-derselben
-denselben
-desselben
-demselben
-dieselbe
-dieselben
-dasselbe
-
-dazu | to that
-
-dein | thy
-deine
-deinem
-deinen
-deiner
-deines
-
-denn | because
-
-derer | of those
-dessen | of him
-
-dich | thee
-dir | to thee
-du | thou
-
-dies | this
-diese
-diesem
-diesen
-dieser
-dieses
-
-
-doch | (several meanings)
-dort | (over) there
-
-
-durch | through
-
-ein | a
-eine
-einem
-einen
-einer
-eines
-
-einig | some
-einige
-einigem
-einigen
-einiger
-einiges
-
-einmal | once
-
-er | he
-ihn | him
-ihm | to him
-
-es | it
-etwas | something
-
-euer | your
-eure
-eurem
-euren
-eurer
-eures
-
-f�r | for
-gegen | towards
-gewesen | p.p. of sein
-hab | have
-habe | have
-haben | have
-hat | has
-hatte | had
-hatten | had
-hier | here
-hin | there
-hinter | behind
-
-ich | I
-mich | me
-mir | to me
-
-
-ihr | you, to her
-ihre
-ihrem
-ihren
-ihrer
-ihres
-euch | to you
-
-im | in + dem
-in | in
-indem | while
-ins | in + das
-ist | is
-
-jede | each, every
-jedem
-jeden
-jeder
-jedes
-
-jene | that
-jenem
-jenen
-jener
-jenes
-
-jetzt | now
-kann | can
-
-kein | no
-keine
-keinem
-keinen
-keiner
-keines
-
-k�nnen | can
-k�nnte | could
-machen | do
-man | one
-
-manche | some, many a
-manchem
-manchen
-mancher
-manches
-
-mein | my
-meine
-meinem
-meinen
-meiner
-meines
-
-mit | with
-muss | must
-musste | had to
-nach | to(wards)
-nicht | not
-nichts | nothing
-noch | still, yet
-nun | now
-nur | only
-ob | whether
-oder | or
-ohne | without
-sehr | very
-
-sein | his
-seine
-seinem
-seinen
-seiner
-seines
-
-selbst | self
-sich | herself
-
-sie | they, she
-ihnen | to them
-
-sind | are
-so | so
-
-solche | such
-solchem
-solchen
-solcher
-solches
-
-soll | shall
-sollte | should
-sondern | but
-sonst | else
-�ber | over
-um | about, around
-und | and
-
-uns | us
-unse
-unsem
-unsen
-unser
-unses
-
-unter | under
-viel | much
-vom | von + dem
-von | from
-vor | before
-w�hrend | while
-war | was
-waren | were
-warst | wast
-was | what
-weg | away, off
-weil | because
-weiter | further
-
-welche | which
-welchem
-welchen
-welcher
-welches
-
-wenn | when
-werde | will
-werden | will
-wie | how
-wieder | again
-will | want
-wir | we
-wird | will
-wirst | willst
-wo | where
-wollen | want
-wollte | wanted
-w�rde | would
-w�rden | would
-zu | to
-zum | zu + dem
-zur | zu + der
-zwar | indeed
-zwischen | between
-
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_el.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_el.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_el.txt
deleted file mode 100644
index 232681f..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_el.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-# Lucene Greek Stopwords list
-# Note: by default this file is used after GreekLowerCaseFilter,
-# so when modifying this file use '\u03c3' instead of '\u03c2'
-\u03bf
-\u03b7
-\u03c4\u03bf
-\u03bf\u03b9
-\u03c4\u03b1
-\u03c4\u03bf\u03c5
-\u03c4\u03b7\u03c3
-\u03c4\u03c9\u03bd
-\u03c4\u03bf\u03bd
-\u03c4\u03b7\u03bd
-\u03ba\u03b1\u03b9
-\u03ba\u03b9
-\u03ba
-\u03b5\u03b9\u03bc\u03b1\u03b9
-\u03b5\u03b9\u03c3\u03b1\u03b9
-\u03b5\u03b9\u03bd\u03b1\u03b9
-\u03b5\u03b9\u03bc\u03b1\u03c3\u03c4\u03b5
-\u03b5\u03b9\u03c3\u03c4\u03b5
-\u03c3\u03c4\u03bf
-\u03c3\u03c4\u03bf\u03bd
-\u03c3\u03c4\u03b7
-\u03c3\u03c4\u03b7\u03bd
-\u03bc\u03b1
-\u03b1\u03bb\u03bb\u03b1
-\u03b1\u03c0\u03bf
-\u03b3\u03b9\u03b1
-\u03c0\u03c1\u03bf\u03c3
-\u03bc\u03b5
-\u03c3\u03b5
-\u03c9\u03c3
-\u03c0\u03b1\u03c1\u03b1
-\u03b1\u03bd\u03c4\u03b9
-\u03ba\u03b1\u03c4\u03b1
-\u03bc\u03b5\u03c4\u03b1
-\u03b8\u03b1
-\u03bd\u03b1
-\u03b4\u03b5
-\u03b4\u03b5\u03bd
-\u03bc\u03b7
-\u03bc\u03b7\u03bd
-\u03b5\u03c0\u03b9
-\u03b5\u03bd\u03c9
-\u03b5\u03b1\u03bd
-\u03b1\u03bd
-\u03c4\u03bf\u03c4\u03b5
-\u03c0\u03bf\u03c5
-\u03c0\u03c9\u03c3
-\u03c0\u03bf\u03b9\u03bf\u03c3
-\u03c0\u03bf\u03b9\u03b1
-\u03c0\u03bf\u03b9\u03bf
-\u03c0\u03bf\u03b9\u03bf\u03b9
-\u03c0\u03bf\u03b9\u03b5\u03c3
-\u03c0\u03bf\u03b9\u03c9\u03bd
-\u03c0\u03bf\u03b9\u03bf\u03c5\u03c3
-\u03b1\u03c5\u03c4\u03bf\u03c3
-\u03b1\u03c5\u03c4\u03b7
-\u03b1\u03c5\u03c4\u03bf
-\u03b1\u03c5\u03c4\u03bf\u03b9
-\u03b1\u03c5\u03c4\u03c9\u03bd
-\u03b1\u03c5\u03c4\u03bf\u03c5\u03c3
-\u03b1\u03c5\u03c4\u03b5\u03c3
-\u03b1\u03c5\u03c4\u03b1
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03bf\u03c3
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03b7
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03bf
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03bf\u03b9
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03b5\u03c3
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03b1
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03c9\u03bd
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03bf\u03c5\u03c3
-\u03bf\u03c0\u03c9\u03c3
-\u03bf\u03bc\u03c9\u03c3
-\u03b9\u03c3\u03c9\u03c3
-\u03bf\u03c3\u03bf
-\u03bf\u03c4\u03b9
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/580f6e98/solr/example/example-DIH/solr/rss/conf/lang/stopwords_en.txt
----------------------------------------------------------------------
diff --git a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_en.txt b/solr/example/example-DIH/solr/rss/conf/lang/stopwords_en.txt
deleted file mode 100644
index 2c164c0..0000000
--- a/solr/example/example-DIH/solr/rss/conf/lang/stopwords_en.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# a couple of test stopwords to test that the words are really being
-# configured from this file:
-stopworda
-stopwordb
-
-# Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-such
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with