You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2017/03/24 16:36:58 UTC
[17/62] lucene-solr:master: SOLR-9221: Remove Solr contribs:
map-reduce, morphlines-core and morphlines-cell
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt
deleted file mode 100644
index 71b7508..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stoptags_ja.txt
+++ /dev/null
@@ -1,420 +0,0 @@
-#
-# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
-#
-# Any token with a part-of-speech tag that exactly matches those defined in this
-# file are removed from the token stream.
-#
-# Set your own stoptags by uncommenting the lines below. Note that comments are
-# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
-# etc. that can be useful for building you own stoptag set.
-#
-# The entire possible tagset is provided below for convenience.
-#
-#####
-# noun: unclassified nouns
-#\u540d\u8a5e
-#
-# noun-common: Common nouns or nouns where the sub-classification is undefined
-#\u540d\u8a5e-\u4e00\u822c
-#
-# noun-proper: Proper nouns where the sub-classification is undefined
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e
-#
-# noun-proper-misc: miscellaneous proper nouns
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4e00\u822c
-#
-# noun-proper-person: Personal names where the sub-classification is undefined
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d
-#
-# noun-proper-person-misc: names that cannot be divided into surname and
-# given name; foreign names; names where the surname or given name is unknown.
-# e.g. \u304a\u5e02\u306e\u65b9
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d-\u4e00\u822c
-#
-# noun-proper-person-surname: Mainly Japanese surnames.
-# e.g. \u5c71\u7530
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d-\u59d3
-#
-# noun-proper-person-given_name: Mainly Japanese given names.
-# e.g. \u592a\u90ce
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u4eba\u540d-\u540d
-#
-# noun-proper-organization: Names representing organizations.
-# e.g. \u901a\u7523\u7701, NHK
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u7d44\u7e54
-#
-# noun-proper-place: Place names where the sub-classification is undefined
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u5730\u57df
-#
-# noun-proper-place-misc: Place names excluding countries.
-# e.g. \u30a2\u30b8\u30a2, \u30d0\u30eb\u30bb\u30ed\u30ca, \u4eac\u90fd
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u5730\u57df-\u4e00\u822c
-#
-# noun-proper-place-country: Country names.
-# e.g. \u65e5\u672c, \u30aa\u30fc\u30b9\u30c8\u30e9\u30ea\u30a2
-#\u540d\u8a5e-\u56fa\u6709\u540d\u8a5e-\u5730\u57df-\u56fd
-#
-# noun-pronoun: Pronouns where the sub-classification is undefined
-#\u540d\u8a5e-\u4ee3\u540d\u8a5e
-#
-# noun-pronoun-misc: miscellaneous pronouns:
-# e.g. \u305d\u308c, \u3053\u3053, \u3042\u3044\u3064, \u3042\u306a\u305f, \u3042\u3061\u3053\u3061, \u3044\u304f\u3064, \u3069\u3053\u304b, \u306a\u306b, \u307f\u306a\u3055\u3093, \u307f\u3093\u306a, \u308f\u305f\u304f\u3057, \u308f\u308c\u308f\u308c
-#\u540d\u8a5e-\u4ee3\u540d\u8a5e-\u4e00\u822c
-#
-# noun-pronoun-contraction: Spoken language contraction made by combining a
-# pronoun and the particle 'wa'.
-# e.g. \u3042\u308a\u3083, \u3053\u308a\u3083, \u3053\u308a\u3083\u3042, \u305d\u308a\u3083, \u305d\u308a\u3083\u3042
-#\u540d\u8a5e-\u4ee3\u540d\u8a5e-\u7e2e\u7d04
-#
-# noun-adverbial: Temporal nouns such as names of days or months that behave
-# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
-# e.g. \u91d1\u66dc, \u4e00\u6708, \u5348\u5f8c, \u5c11\u91cf
-#\u540d\u8a5e-\u526f\u8a5e\u53ef\u80fd
-#
-# noun-verbal: Nouns that take arguments with case and can appear followed by
-# 'suru' and related verbs (\u3059\u308b, \u3067\u304d\u308b, \u306a\u3055\u308b, \u304f\u3060\u3055\u308b)
-# e.g. \u30a4\u30f3\u30d7\u30c3\u30c8, \u611b\u7740, \u60aa\u5316, \u60aa\u6226\u82e6\u95d8, \u4e00\u5b89\u5fc3, \u4e0b\u53d6\u308a
-#\u540d\u8a5e-\u30b5\u5909\u63a5\u7d9a
-#
-# noun-adjective-base: The base form of adjectives, words that appear before \u306a ("na")
-# e.g. \u5065\u5eb7, \u5b89\u6613, \u99c4\u76ee, \u3060\u3081
-#\u540d\u8a5e-\u5f62\u5bb9\u52d5\u8a5e\u8a9e\u5e79
-#
-# noun-numeric: Arabic numbers, Chinese numerals, and counters like \u4f55 (\u56de), \u6570.
-# e.g. 0, 1, 2, \u4f55, \u6570, \u5e7e
-#\u540d\u8a5e-\u6570
-#
-# noun-affix: noun affixes where the sub-classification is undefined
-#\u540d\u8a5e-\u975e\u81ea\u7acb
-#
-# noun-affix-misc: Of adnominalizers, the case-marker \u306e ("no"), and words that
-# attach to the base form of inflectional words, words that cannot be classified
-# into any of the other categories below. This category includes indefinite nouns.
-# e.g. \u3042\u304b\u3064\u304d, \u6681, \u304b\u3044, \u7532\u6590, \u6c17, \u304d\u3089\u3044, \u5acc\u3044, \u304f\u305b, \u7656, \u3053\u3068, \u4e8b, \u3054\u3068, \u6bce, \u3057\u3060\u3044, \u6b21\u7b2c,
-# \u9806, \u305b\u3044, \u6240\u70ba, \u3064\u3044\u3067, \u5e8f\u3067, \u3064\u3082\u308a, \u7a4d\u3082\u308a, \u70b9, \u3069\u3053\u308d, \u306e, \u306f\u305a, \u7b48, \u306f\u305a\u307f, \u5f3e\u307f,
-# \u62cd\u5b50, \u3075\u3046, \u3075\u308a, \u632f\u308a, \u307b\u3046, \u65b9, \u65e8, \u3082\u306e, \u7269, \u8005, \u3086\u3048, \u6545, \u3086\u3048\u3093, \u6240\u4ee5, \u308f\u3051, \u8a33,
-# \u308f\u308a, \u5272\u308a, \u5272, \u3093-\u53e3\u8a9e/, \u3082\u3093-\u53e3\u8a9e/
-#\u540d\u8a5e-\u975e\u81ea\u7acb-\u4e00\u822c
-#
-# noun-affix-adverbial: noun affixes that that can behave as adverbs.
-# e.g. \u3042\u3044\u3060, \u9593, \u3042\u3052\u304f, \u6319\u3052\u53e5, \u3042\u3068, \u5f8c, \u4f59\u308a, \u4ee5\u5916, \u4ee5\u964d, \u4ee5\u5f8c, \u4ee5\u4e0a, \u4ee5\u524d, \u4e00\u65b9, \u3046\u3048,
-# \u4e0a, \u3046\u3061, \u5185, \u304a\u308a, \u6298\u308a, \u304b\u304e\u308a, \u9650\u308a, \u304d\u308a, \u3063\u304d\u308a, \u7d50\u679c, \u3053\u308d, \u9803, \u3055\u3044, \u969b, \u6700\u4e2d, \u3055\u306a\u304b,
-# \u6700\u4e2d, \u3058\u305f\u3044, \u81ea\u4f53, \u305f\u3073, \u5ea6, \u305f\u3081, \u70ba, \u3064\u3069, \u90fd\u5ea6, \u3068\u304a\u308a, \u901a\u308a, \u3068\u304d, \u6642, \u3068\u3053\u308d, \u6240,
-# \u3068\u305f\u3093, \u9014\u7aef, \u306a\u304b, \u4e2d, \u306e\u3061, \u5f8c, \u3070\u3042\u3044, \u5834\u5408, \u65e5, \u3076\u3093, \u5206, \u307b\u304b, \u4ed6, \u307e\u3048, \u524d, \u307e\u307e,
-# \u5118, \u4fad, \u307f\u304e\u308a, \u77e2\u5148
-#\u540d\u8a5e-\u975e\u81ea\u7acb-\u526f\u8a5e\u53ef\u80fd
-#
-# noun-affix-aux: noun affixes treated as \u52a9\u52d5\u8a5e ("auxiliary verb") in school grammars
-# with the stem \u3088\u3046(\u3060) ("you(da)").
-# e.g. \u3088\u3046, \u3084\u3046, \u69d8 (\u3088\u3046)
-#\u540d\u8a5e-\u975e\u81ea\u7acb-\u52a9\u52d5\u8a5e\u8a9e\u5e79
-#
-# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
-# connection form \u306a (aux "da").
-# e.g. \u307f\u305f\u3044, \u3075\u3046
-#\u540d\u8a5e-\u975e\u81ea\u7acb-\u5f62\u5bb9\u52d5\u8a5e\u8a9e\u5e79
-#
-# noun-special: special nouns where the sub-classification is undefined.
-#\u540d\u8a5e-\u7279\u6b8a
-#
-# noun-special-aux: The \u305d\u3046\u3060 ("souda") stem form that is used for reporting news, is
-# treated as \u52a9\u52d5\u8a5e ("auxiliary verb") in school grammars, and attach to the base
-# form of inflectional words.
-# e.g. \u305d\u3046
-#\u540d\u8a5e-\u7279\u6b8a-\u52a9\u52d5\u8a5e\u8a9e\u5e79
-#
-# noun-suffix: noun suffixes where the sub-classification is undefined.
-#\u540d\u8a5e-\u63a5\u5c3e
-#
-# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
-# to \u30ac\u30eb or \u30bf\u30a4 and can combine into compound nouns, words that cannot be classified into
-# any of the other categories below. In general, this category is more inclusive than
-# \u63a5\u5c3e\u8a9e ("suffix") and is usually the last element in a compound noun.
-# e.g. \u304a\u304d, \u304b\u305f, \u65b9, \u7532\u6590 (\u304c\u3044), \u304c\u304b\u308a, \u304e\u307f, \u6c17\u5473, \u3050\u308b\u307f, (\uff5e\u3057\u305f) \u3055, \u6b21\u7b2c, \u6e08 (\u305a) \u307f,
-# \u3088\u3046, (\u3067\u304d)\u3063\u3053, \u611f, \u89b3, \u6027, \u5b66, \u985e, \u9762, \u7528
-#\u540d\u8a5e-\u63a5\u5c3e-\u4e00\u822c
-#
-# noun-suffix-person: Suffixes that form nouns and attach to person names more often
-# than other nouns.
-# e.g. \u541b, \u69d8, \u8457
-#\u540d\u8a5e-\u63a5\u5c3e-\u4eba\u540d
-#
-# noun-suffix-place: Suffixes that form nouns and attach to place names more often
-# than other nouns.
-# e.g. \u753a, \u5e02, \u770c
-#\u540d\u8a5e-\u63a5\u5c3e-\u5730\u57df
-#
-# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
-# can appear before \u30b9\u30eb ("suru").
-# e.g. \u5316, \u8996, \u5206\u3051, \u5165\u308a, \u843d\u3061, \u8cb7\u3044
-#\u540d\u8a5e-\u63a5\u5c3e-\u30b5\u5909\u63a5\u7d9a
-#
-# noun-suffix-aux: The stem form of \u305d\u3046\u3060 (\u69d8\u614b) that is used to indicate conditions,
-# is treated as \u52a9\u52d5\u8a5e ("auxiliary verb") in school grammars, and attach to the
-# conjunctive form of inflectional words.
-# e.g. \u305d\u3046
-#\u540d\u8a5e-\u63a5\u5c3e-\u52a9\u52d5\u8a5e\u8a9e\u5e79
-#
-# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
-# form of inflectional words and appear before the copula \u3060 ("da").
-# e.g. \u7684, \u3052, \u304c\u3061
-#\u540d\u8a5e-\u63a5\u5c3e-\u5f62\u5bb9\u52d5\u8a5e\u8a9e\u5e79
-#
-# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
-# e.g. \u5f8c (\u3054), \u4ee5\u5f8c, \u4ee5\u964d, \u4ee5\u524d, \u524d\u5f8c, \u4e2d, \u672b, \u4e0a, \u6642 (\u3058)
-#\u540d\u8a5e-\u63a5\u5c3e-\u526f\u8a5e\u53ef\u80fd
-#
-# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
-# is more inclusive than \u52a9\u6570\u8a5e ("classifier") and includes common nouns that attach
-# to numbers.
-# e.g. \u500b, \u3064, \u672c, \u518a, \u30d1\u30fc\u30bb\u30f3\u30c8, cm, kg, \u30ab\u6708, \u304b\u56fd, \u533a\u753b, \u6642\u9593, \u6642\u534a
-#\u540d\u8a5e-\u63a5\u5c3e-\u52a9\u6570\u8a5e
-#
-# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
-# e.g. (\u697d\u3057) \u3055, (\u8003\u3048) \u65b9
-#\u540d\u8a5e-\u63a5\u5c3e-\u7279\u6b8a
-#
-# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
-# together.
-# e.g. (\u65e5\u672c) \u5bfe (\u30a2\u30e1\u30ea\u30ab), \u5bfe (\u30a2\u30e1\u30ea\u30ab), (3) \u5bfe (5), (\u5973\u512a) \u517c (\u4e3b\u5a66)
-#\u540d\u8a5e-\u63a5\u7d9a\u8a5e\u7684
-#
-# noun-verbal_aux: Nouns that attach to the conjunctive particle \u3066 ("te") and are
-# semantically verb-like.
-# e.g. \u3054\u3089\u3093, \u3054\u89a7, \u5fa1\u89a7, \u9802\u6234
-#\u540d\u8a5e-\u52d5\u8a5e\u975e\u81ea\u7acb\u7684
-#
-# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
-# dialects, English, etc. Currently, the only entry for \u540d\u8a5e \u5f15\u7528\u6587\u5b57\u5217 ("noun quotation")
-# is \u3044\u308f\u304f ("iwaku").
-#\u540d\u8a5e-\u5f15\u7528\u6587\u5b57\u5217
-#
-# noun-nai_adjective: Words that appear before the auxiliary verb \u306a\u3044 ("nai") and
-# behave like an adjective.
-# e.g. \u7533\u3057\u8a33, \u4ed5\u65b9, \u3068\u3093\u3067\u3082, \u9055\u3044
-#\u540d\u8a5e-\u30ca\u30a4\u5f62\u5bb9\u8a5e\u8a9e\u5e79
-#
-#####
-# prefix: unclassified prefixes
-#\u63a5\u982d\u8a5e
-#
-# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
-# excluding numerical expressions.
-# e.g. \u304a (\u6c34), \u67d0 (\u6c0f), \u540c (\u793e), \u6545 (\uff5e\u6c0f), \u9ad8 (\u54c1\u8cea), \u304a (\u898b\u4e8b), \u3054 (\u7acb\u6d3e)
-#\u63a5\u982d\u8a5e-\u540d\u8a5e\u63a5\u7d9a
-#
-# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
-# in conjunctive form followed by \u306a\u308b/\u306a\u3055\u308b/\u304f\u3060\u3055\u308b.
-# e.g. \u304a (\u8aad\u307f\u306a\u3055\u3044), \u304a (\u5ea7\u308a)
-#\u63a5\u982d\u8a5e-\u52d5\u8a5e\u63a5\u7d9a
-#
-# prefix-adjectival: Prefixes that attach to adjectives.
-# e.g. \u304a (\u5bd2\u3044\u3067\u3059\u306d\u3048), \u30d0\u30ab (\u3067\u304b\u3044)
-#\u63a5\u982d\u8a5e-\u5f62\u5bb9\u8a5e\u63a5\u7d9a
-#
-# prefix-numerical: Prefixes that attach to numerical expressions.
-# e.g. \u7d04, \u304a\u3088\u305d, \u6bce\u6642
-#\u63a5\u982d\u8a5e-\u6570\u63a5\u7d9a
-#
-#####
-# verb: unclassified verbs
-#\u52d5\u8a5e
-#
-# verb-main:
-#\u52d5\u8a5e-\u81ea\u7acb
-#
-# verb-auxiliary:
-#\u52d5\u8a5e-\u975e\u81ea\u7acb
-#
-# verb-suffix:
-#\u52d5\u8a5e-\u63a5\u5c3e
-#
-#####
-# adjective: unclassified adjectives
-#\u5f62\u5bb9\u8a5e
-#
-# adjective-main:
-#\u5f62\u5bb9\u8a5e-\u81ea\u7acb
-#
-# adjective-auxiliary:
-#\u5f62\u5bb9\u8a5e-\u975e\u81ea\u7acb
-#
-# adjective-suffix:
-#\u5f62\u5bb9\u8a5e-\u63a5\u5c3e
-#
-#####
-# adverb: unclassified adverbs
-#\u526f\u8a5e
-#
-# adverb-misc: Words that can be segmented into one unit and where adnominal
-# modification is not possible.
-# e.g. \u3042\u3044\u304b\u308f\u3089\u305a, \u591a\u5206
-#\u526f\u8a5e-\u4e00\u822c
-#
-# adverb-particle_conjunction: Adverbs that can be followed by \u306e, \u306f, \u306b,
-# \u306a, \u3059\u308b, \u3060, etc.
-# e.g. \u3053\u3093\u306a\u306b, \u305d\u3093\u306a\u306b, \u3042\u3093\u306a\u306b, \u306a\u306b\u304b, \u306a\u3093\u3067\u3082
-#\u526f\u8a5e-\u52a9\u8a5e\u985e\u63a5\u7d9a
-#
-#####
-# adnominal: Words that only have noun-modifying forms.
-# e.g. \u3053\u306e, \u305d\u306e, \u3042\u306e, \u3069\u306e, \u3044\u308f\u3086\u308b, \u306a\u3093\u3089\u304b\u306e, \u4f55\u3089\u304b\u306e, \u3044\u308d\u3093\u306a, \u3053\u3046\u3044\u3046, \u305d\u3046\u3044\u3046, \u3042\u3042\u3044\u3046,
-# \u3069\u3046\u3044\u3046, \u3053\u3093\u306a, \u305d\u3093\u306a, \u3042\u3093\u306a, \u3069\u3093\u306a, \u5927\u304d\u306a, \u5c0f\u3055\u306a, \u304a\u304b\u3057\u306a, \u307b\u3093\u306e, \u305f\u3044\u3057\u305f,
-# \u300c(, \u3082) \u3055\u308b (\u3053\u3068\u306a\u304c\u3089)\u300d, \u5fae\u3005\u305f\u308b, \u5802\u3005\u305f\u308b, \u5358\u306a\u308b, \u3044\u304b\u306a\u308b, \u6211\u304c\u300d\u300c\u540c\u3058, \u4ea1\u304d
-#\u9023\u4f53\u8a5e
-#
-#####
-# conjunction: Conjunctions that can occur independently.
-# e.g. \u304c, \u3051\u308c\u3069\u3082, \u305d\u3057\u3066, \u3058\u3083\u3042, \u305d\u308c\u3069\u3053\u308d\u304b
-\u63a5\u7d9a\u8a5e
-#
-#####
-# particle: unclassified particles.
-\u52a9\u8a5e
-#
-# particle-case: case particles where the subclassification is undefined.
-\u52a9\u8a5e-\u683c\u52a9\u8a5e
-#
-# particle-case-misc: Case particles.
-# e.g. \u304b\u3089, \u304c, \u3067, \u3068, \u306b, \u3078, \u3088\u308a, \u3092, \u306e, \u306b\u3066
-\u52a9\u8a5e-\u683c\u52a9\u8a5e-\u4e00\u822c
-#
-# particle-case-quote: the "to" that appears after nouns, a person\u2019s speech,
-# quotation marks, expressions of decisions from a meeting, reasons, judgements,
-# conjectures, etc.
-# e.g. ( \u3060) \u3068 (\u8ff0\u3079\u305f.), ( \u3067\u3042\u308b) \u3068 (\u3057\u3066\u57f7\u884c\u7336\u4e88...)
-\u52a9\u8a5e-\u683c\u52a9\u8a5e-\u5f15\u7528
-#
-# particle-case-compound: Compounds of particles and verbs that mainly behave
-# like case particles.
-# e.g. \u3068\u3044\u3046, \u3068\u3044\u3063\u305f, \u3068\u304b\u3044\u3046, \u3068\u3057\u3066, \u3068\u3068\u3082\u306b, \u3068\u5171\u306b, \u3067\u3082\u3063\u3066, \u306b\u3042\u305f\u3063\u3066, \u306b\u5f53\u305f\u3063\u3066, \u306b\u5f53\u3063\u3066,
-# \u306b\u3042\u305f\u308a, \u306b\u5f53\u305f\u308a, \u306b\u5f53\u308a, \u306b\u5f53\u305f\u308b, \u306b\u3042\u305f\u308b, \u306b\u304a\u3044\u3066, \u306b\u65bc\u3044\u3066,\u306b\u65bc\u3066, \u306b\u304a\u3051\u308b, \u306b\u65bc\u3051\u308b,
-# \u306b\u304b\u3051, \u306b\u304b\u3051\u3066, \u306b\u304b\u3093\u3057, \u306b\u95a2\u3057, \u306b\u304b\u3093\u3057\u3066, \u306b\u95a2\u3057\u3066, \u306b\u304b\u3093\u3059\u308b, \u306b\u95a2\u3059\u308b, \u306b\u969b\u3057,
-# \u306b\u969b\u3057\u3066, \u306b\u3057\u305f\u304c\u3044, \u306b\u5f93\u3044, \u306b\u5f93\u3046, \u306b\u3057\u305f\u304c\u3063\u3066, \u306b\u5f93\u3063\u3066, \u306b\u305f\u3044\u3057, \u306b\u5bfe\u3057, \u306b\u305f\u3044\u3057\u3066,
-# \u306b\u5bfe\u3057\u3066, \u306b\u305f\u3044\u3059\u308b, \u306b\u5bfe\u3059\u308b, \u306b\u3064\u3044\u3066, \u306b\u3064\u304d, \u306b\u3064\u3051, \u306b\u3064\u3051\u3066, \u306b\u3064\u308c, \u306b\u3064\u308c\u3066, \u306b\u3068\u3063\u3066,
-# \u306b\u3068\u308a, \u306b\u307e\u3064\u308f\u308b, \u306b\u3088\u3063\u3066, \u306b\u4f9d\u3063\u3066, \u306b\u56e0\u3063\u3066, \u306b\u3088\u308a, \u306b\u4f9d\u308a, \u306b\u56e0\u308a, \u306b\u3088\u308b, \u306b\u4f9d\u308b, \u306b\u56e0\u308b,
-# \u306b\u308f\u305f\u3063\u3066, \u306b\u308f\u305f\u308b, \u3092\u3082\u3063\u3066, \u3092\u4ee5\u3063\u3066, \u3092\u901a\u3058, \u3092\u901a\u3058\u3066, \u3092\u901a\u3057\u3066, \u3092\u3081\u3050\u3063\u3066, \u3092\u3081\u3050\u308a, \u3092\u3081\u3050\u308b,
-# \u3063\u3066-\u53e3\u8a9e/, \u3061\u3085\u3046-\u95a2\u897f\u5f01\u300c\u3068\u3044\u3046\u300d/, (\u4f55) \u3066\u3044\u3046 (\u4eba)-\u53e3\u8a9e/, \u3063\u3066\u3044\u3046-\u53e3\u8a9e/, \u3068\u3044\u3075, \u3068\u304b\u3044\u3075
-\u52a9\u8a5e-\u683c\u52a9\u8a5e-\u9023\u8a9e
-#
-# particle-conjunctive:
-# e.g. \u304b\u3089, \u304b\u3089\u306b\u306f, \u304c, \u3051\u308c\u3069, \u3051\u308c\u3069\u3082, \u3051\u3069, \u3057, \u3064\u3064, \u3066, \u3067, \u3068, \u3068\u3053\u308d\u304c, \u3069\u3053\u308d\u304b, \u3068\u3082, \u3069\u3082,
-# \u306a\u304c\u3089, \u306a\u308a, \u306e\u3067, \u306e\u306b, \u3070, \u3082\u306e\u306e, \u3084 ( \u3057\u305f), \u3084\u3044\u306a\u3084, (\u3053\u308d\u3093) \u3058\u3083(\u3044\u3051\u306a\u3044)-\u53e3\u8a9e/,
-# (\u884c\u3063) \u3061\u3083(\u3044\u3051\u306a\u3044)-\u53e3\u8a9e/, (\u8a00\u3063) \u305f\u3063\u3066 (\u3057\u304b\u305f\u304c\u306a\u3044)-\u53e3\u8a9e/, (\u305d\u308c\u304c\u306a\u304f)\u3063\u305f\u3063\u3066 (\u5e73\u6c17)-\u53e3\u8a9e/
-\u52a9\u8a5e-\u63a5\u7d9a\u52a9\u8a5e
-#
-# particle-dependency:
-# e.g. \u3053\u305d, \u3055\u3048, \u3057\u304b, \u3059\u3089, \u306f, \u3082, \u305e
-\u52a9\u8a5e-\u4fc2\u52a9\u8a5e
-#
-# particle-adverbial:
-# e.g. \u304c\u3066\u3089, \u304b\u3082, \u304f\u3089\u3044, \u4f4d, \u3050\u3089\u3044, \u3057\u3082, (\u5b66\u6821) \u3058\u3083(\u3053\u308c\u304c\u6d41\u884c\u3063\u3066\u3044\u308b)-\u53e3\u8a9e/,
-# (\u305d\u308c)\u3058\u3083\u3042 (\u3088\u304f\u306a\u3044)-\u53e3\u8a9e/, \u305a\u3064, (\u79c1) \u306a\u305e, \u306a\u3069, (\u79c1) \u306a\u308a (\u306b), (\u5148\u751f) \u306a\u3093\u304b (\u5927\u5acc\u3044)-\u53e3\u8a9e/,
-# (\u79c1) \u306a\u3093\u305e, (\u5148\u751f) \u306a\u3093\u3066 (\u5927\u5acc\u3044)-\u53e3\u8a9e/, \u306e\u307f, \u3060\u3051, (\u79c1) \u3060\u3063\u3066-\u53e3\u8a9e/, \u3060\u306b,
-# (\u5f7c)\u3063\u305f\u3089-\u53e3\u8a9e/, (\u304a\u8336) \u3067\u3082 (\u3044\u304b\u304c), \u7b49 (\u3068\u3046), (\u4eca\u5f8c) \u3068\u3082, \u3070\u304b\u308a, \u3070\u3063\u304b-\u53e3\u8a9e/, \u3070\u3063\u304b\u308a-\u53e3\u8a9e/,
-# \u307b\u3069, \u7a0b, \u307e\u3067, \u8fc4, (\u8ab0) \u3082 (\u304c)([\u52a9\u8a5e-\u683c\u52a9\u8a5e] \u304a\u3088\u3073 [\u52a9\u8a5e-\u4fc2\u52a9\u8a5e] \u306e\u524d\u306b\u4f4d\u7f6e\u3059\u308b\u300c\u3082\u300d)
-\u52a9\u8a5e-\u526f\u52a9\u8a5e
-#
-# particle-interjective: particles with interjective grammatical roles.
-# e.g. (\u677e\u5cf6) \u3084
-\u52a9\u8a5e-\u9593\u6295\u52a9\u8a5e
-#
-# particle-coordinate:
-# e.g. \u3068, \u305f\u308a, \u3060\u306e, \u3060\u308a, \u3068\u304b, \u306a\u308a, \u3084, \u3084\u3089
-\u52a9\u8a5e-\u4e26\u7acb\u52a9\u8a5e
-#
-# particle-final:
-# e.g. \u304b\u3044, \u304b\u3057\u3089, \u3055, \u305c, (\u3060)\u3063\u3051-\u53e3\u8a9e/, (\u3068\u307e\u3063\u3066\u308b) \u3067-\u65b9\u8a00/, \u306a, \u30ca, \u306a\u3042-\u53e3\u8a9e/, \u305e, \u306d, \u30cd,
-# \u306d\u3047-\u53e3\u8a9e/, \u306d\u3048-\u53e3\u8a9e/, \u306d\u3093-\u65b9\u8a00/, \u306e, \u306e\u3046-\u53e3\u8a9e/, \u3084, \u3088, \u30e8, \u3088\u3049-\u53e3\u8a9e/, \u308f, \u308f\u3044-\u53e3\u8a9e/
-\u52a9\u8a5e-\u7d42\u52a9\u8a5e
-#
-# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
-# adverbial, conjunctive, or sentence final. For example:
-# (a) \u300cA \u304b B \u304b\u300d. Ex:\u300c(\u56fd\u5185\u3067\u904b\u7528\u3059\u308b) \u304b,(\u6d77\u5916\u3067\u904b\u7528\u3059\u308b) \u304b (.)\u300d
-# (b) Inside an adverb phrase. Ex:\u300c(\u5e78\u3044\u3068\u3044\u3046) \u304b (, \u6b7b\u8005\u306f\u3044\u306a\u304b\u3063\u305f.)\u300d
-# \u300c(\u7948\u308a\u304c\u5c4a\u3044\u305f\u305b\u3044) \u304b (, \u8a66\u9a13\u306b\u5408\u683c\u3057\u305f.)\u300d
-# (c) \u300c\u304b\u306e\u3088\u3046\u306b\u300d. Ex:\u300c(\u4f55\u3082\u306a\u304b\u3063\u305f) \u304b (\u306e\u3088\u3046\u306b\u632f\u308b\u821e\u3063\u305f.)\u300d
-# e.g. \u304b
-\u52a9\u8a5e-\u526f\u52a9\u8a5e\uff0f\u4e26\u7acb\u52a9\u8a5e\uff0f\u7d42\u52a9\u8a5e
-#
-# particle-adnominalizer: The "no" that attaches to nouns and modifies
-# non-inflectional words.
-\u52a9\u8a5e-\u9023\u4f53\u5316
-#
-# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
-# that are giongo, giseigo, or gitaigo.
-# e.g. \u306b, \u3068
-\u52a9\u8a5e-\u526f\u8a5e\u5316
-#
-# particle-special: A particle that does not fit into one of the above classifications.
-# This includes particles that are used in Tanka, Haiku, and other poetry.
-# e.g. \u304b\u306a, \u3051\u3080, ( \u3057\u305f\u3060\u308d\u3046) \u306b, (\u3042\u3093\u305f) \u306b\u3083(\u308f\u304b\u3089\u3093), (\u4ffa) \u3093 (\u5bb6)
-\u52a9\u8a5e-\u7279\u6b8a
-#
-#####
-# auxiliary-verb:
-\u52a9\u52d5\u8a5e
-#
-#####
-# interjection: Greetings and other exclamations.
-# e.g. \u304a\u306f\u3088\u3046, \u304a\u306f\u3088\u3046\u3054\u3056\u3044\u307e\u3059, \u3053\u3093\u306b\u3061\u306f, \u3053\u3093\u3070\u3093\u306f, \u3042\u308a\u304c\u3068\u3046, \u3069\u3046\u3082\u3042\u308a\u304c\u3068\u3046, \u3042\u308a\u304c\u3068\u3046\u3054\u3056\u3044\u307e\u3059,
-# \u3044\u305f\u3060\u304d\u307e\u3059, \u3054\u3061\u305d\u3046\u3055\u307e, \u3055\u3088\u306a\u3089, \u3055\u3088\u3046\u306a\u3089, \u306f\u3044, \u3044\u3044\u3048, \u3054\u3081\u3093, \u3054\u3081\u3093\u306a\u3055\u3044
-#\u611f\u52d5\u8a5e
-#
-#####
-# symbol: unclassified Symbols.
-\u8a18\u53f7
-#
-# symbol-misc: A general symbol not in one of the categories below.
-# e.g. [\u25cb\u25ce@$\u3012\u2192+]
-\u8a18\u53f7-\u4e00\u822c
-#
-# symbol-comma: Commas
-# e.g. [,\u3001]
-\u8a18\u53f7-\u8aad\u70b9
-#
-# symbol-period: Periods and full stops.
-# e.g. [.\uff0e\u3002]
-\u8a18\u53f7-\u53e5\u70b9
-#
-# symbol-space: Full-width whitespace.
-\u8a18\u53f7-\u7a7a\u767d
-#
-# symbol-open_bracket:
-# e.g. [({\u2018\u201c\u300e\u3010]
-\u8a18\u53f7-\u62ec\u5f27\u958b
-#
-# symbol-close_bracket:
-# e.g. [)}\u2019\u201d\u300f\u300d\u3011]
-\u8a18\u53f7-\u62ec\u5f27\u9589
-#
-# symbol-alphabetic:
-#\u8a18\u53f7-\u30a2\u30eb\u30d5\u30a1\u30d9\u30c3\u30c8
-#
-#####
-# other: unclassified other
-#\u305d\u306e\u4ed6
-#
-# other-interjection: Words that are hard to classify as noun-suffixes or
-# sentence-final particles.
-# e.g. (\u3060)\u30a1
-\u305d\u306e\u4ed6-\u9593\u6295
-#
-#####
-# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
-# e.g. \u3042\u306e, \u3046\u3093\u3068, \u3048\u3068
-\u30d5\u30a3\u30e9\u30fc
-#
-#####
-# non-verbal: non-verbal sound.
-\u975e\u8a00\u8a9e\u97f3
-#
-#####
-# fragment:
-#\u8a9e\u65ad\u7247
-#
-#####
-# unknown: unknown part of speech.
-#\u672a\u77e5\u8a9e
-#
-##### End of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt
deleted file mode 100644
index 046829d..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ar.txt
+++ /dev/null
@@ -1,125 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Cleaned on October 11, 2009 (not normalized, so use before normalization)
-# This means that when modifying this list, you might need to add some
-# redundant entries, for example containing forms with both \u0623 and \u0627
-\u0645\u0646
-\u0648\u0645\u0646
-\u0645\u0646\u0647\u0627
-\u0645\u0646\u0647
-\u0641\u064a
-\u0648\u0641\u064a
-\u0641\u064a\u0647\u0627
-\u0641\u064a\u0647
-\u0648
-\u0641
-\u062b\u0645
-\u0627\u0648
-\u0623\u0648
-\u0628
-\u0628\u0647\u0627
-\u0628\u0647
-\u0627
-\u0623
-\u0627\u0649
-\u0627\u064a
-\u0623\u064a
-\u0623\u0649
-\u0644\u0627
-\u0648\u0644\u0627
-\u0627\u0644\u0627
-\u0623\u0644\u0627
-\u0625\u0644\u0627
-\u0644\u0643\u0646
-\u0645\u0627
-\u0648\u0645\u0627
-\u0643\u0645\u0627
-\u0641\u0645\u0627
-\u0639\u0646
-\u0645\u0639
-\u0627\u0630\u0627
-\u0625\u0630\u0627
-\u0627\u0646
-\u0623\u0646
-\u0625\u0646
-\u0627\u0646\u0647\u0627
-\u0623\u0646\u0647\u0627
-\u0625\u0646\u0647\u0627
-\u0627\u0646\u0647
-\u0623\u0646\u0647
-\u0625\u0646\u0647
-\u0628\u0627\u0646
-\u0628\u0623\u0646
-\u0641\u0627\u0646
-\u0641\u0623\u0646
-\u0648\u0627\u0646
-\u0648\u0623\u0646
-\u0648\u0625\u0646
-\u0627\u0644\u062a\u0649
-\u0627\u0644\u062a\u064a
-\u0627\u0644\u0630\u0649
-\u0627\u0644\u0630\u064a
-\u0627\u0644\u0630\u064a\u0646
-\u0627\u0644\u0649
-\u0627\u0644\u064a
-\u0625\u0644\u0649
-\u0625\u0644\u064a
-\u0639\u0644\u0649
-\u0639\u0644\u064a\u0647\u0627
-\u0639\u0644\u064a\u0647
-\u0627\u0645\u0627
-\u0623\u0645\u0627
-\u0625\u0645\u0627
-\u0627\u064a\u0636\u0627
-\u0623\u064a\u0636\u0627
-\u0643\u0644
-\u0648\u0643\u0644
-\u0644\u0645
-\u0648\u0644\u0645
-\u0644\u0646
-\u0648\u0644\u0646
-\u0647\u0649
-\u0647\u064a
-\u0647\u0648
-\u0648\u0647\u0649
-\u0648\u0647\u064a
-\u0648\u0647\u0648
-\u0641\u0647\u0649
-\u0641\u0647\u064a
-\u0641\u0647\u0648
-\u0627\u0646\u062a
-\u0623\u0646\u062a
-\u0644\u0643
-\u0644\u0647\u0627
-\u0644\u0647
-\u0647\u0630\u0647
-\u0647\u0630\u0627
-\u062a\u0644\u0643
-\u0630\u0644\u0643
-\u0647\u0646\u0627\u0643
-\u0643\u0627\u0646\u062a
-\u0643\u0627\u0646
-\u064a\u0643\u0648\u0646
-\u062a\u0643\u0648\u0646
-\u0648\u0643\u0627\u0646\u062a
-\u0648\u0643\u0627\u0646
-\u063a\u064a\u0631
-\u0628\u0639\u0636
-\u0642\u062f
-\u0646\u062d\u0648
-\u0628\u064a\u0646
-\u0628\u064a\u0646\u0645\u0627
-\u0645\u0646\u0630
-\u0636\u0645\u0646
-\u062d\u064a\u062b
-\u0627\u0644\u0627\u0646
-\u0627\u0644\u0622\u0646
-\u062e\u0644\u0627\u0644
-\u0628\u0639\u062f
-\u0642\u0628\u0644
-\u062d\u062a\u0649
-\u0639\u0646\u062f
-\u0639\u0646\u062f\u0645\u0627
-\u0644\u062f\u0649
-\u062c\u0645\u064a\u0639
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt
deleted file mode 100644
index 1ae4ba2..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_bg.txt
+++ /dev/null
@@ -1,193 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-\u0430
-\u0430\u0437
-\u0430\u043a\u043e
-\u0430\u043b\u0430
-\u0431\u0435
-\u0431\u0435\u0437
-\u0431\u0435\u0448\u0435
-\u0431\u0438
-\u0431\u0438\u043b
-\u0431\u0438\u043b\u0430
-\u0431\u0438\u043b\u0438
-\u0431\u0438\u043b\u043e
-\u0431\u043b\u0438\u0437\u043e
-\u0431\u044a\u0434\u0430\u0442
-\u0431\u044a\u0434\u0435
-\u0431\u044f\u0445\u0430
-\u0432
-\u0432\u0430\u0441
-\u0432\u0430\u0448
-\u0432\u0430\u0448\u0430
-\u0432\u0435\u0440\u043e\u044f\u0442\u043d\u043e
-\u0432\u0435\u0447\u0435
-\u0432\u0437\u0435\u043c\u0430
-\u0432\u0438
-\u0432\u0438\u0435
-\u0432\u0438\u043d\u0430\u0433\u0438
-\u0432\u0441\u0435
-\u0432\u0441\u0435\u043a\u0438
-\u0432\u0441\u0438\u0447\u043a\u0438
-\u0432\u0441\u0438\u0447\u043a\u043e
-\u0432\u0441\u044f\u043a\u0430
-\u0432\u044a\u0432
-\u0432\u044a\u043f\u0440\u0435\u043a\u0438
-\u0432\u044a\u0440\u0445\u0443
-\u0433
-\u0433\u0438
-\u0433\u043b\u0430\u0432\u043d\u043e
-\u0433\u043e
-\u0434
-\u0434\u0430
-\u0434\u0430\u043b\u0438
-\u0434\u043e
-\u0434\u043e\u043a\u0430\u0442\u043e
-\u0434\u043e\u043a\u043e\u0433\u0430
-\u0434\u043e\u0440\u0438
-\u0434\u043e\u0441\u0435\u0433\u0430
-\u0434\u043e\u0441\u0442\u0430
-\u0435
-\u0435\u0434\u0432\u0430
-\u0435\u0434\u0438\u043d
-\u0435\u0442\u043e
-\u0437\u0430
-\u0437\u0430\u0434
-\u0437\u0430\u0435\u0434\u043d\u043e
-\u0437\u0430\u0440\u0430\u0434\u0438
-\u0437\u0430\u0441\u0435\u0433\u0430
-\u0437\u0430\u0442\u043e\u0432\u0430
-\u0437\u0430\u0449\u043e
-\u0437\u0430\u0449\u043e\u0442\u043e
-\u0438
-\u0438\u0437
-\u0438\u043b\u0438
-\u0438\u043c
-\u0438\u043c\u0430
-\u0438\u043c\u0430\u0442
-\u0438\u0441\u043a\u0430
-\u0439
-\u043a\u0430\u0437\u0430
-\u043a\u0430\u043a
-\u043a\u0430\u043a\u0432\u0430
-\u043a\u0430\u043a\u0432\u043e
-\u043a\u0430\u043a\u0442\u043e
-\u043a\u0430\u043a\u044a\u0432
-\u043a\u0430\u0442\u043e
-\u043a\u043e\u0433\u0430
-\u043a\u043e\u0433\u0430\u0442\u043e
-\u043a\u043e\u0435\u0442\u043e
-\u043a\u043e\u0438\u0442\u043e
-\u043a\u043e\u0439
-\u043a\u043e\u0439\u0442\u043e
-\u043a\u043e\u043b\u043a\u043e
-\u043a\u043e\u044f\u0442\u043e
-\u043a\u044a\u0434\u0435
-\u043a\u044a\u0434\u0435\u0442\u043e
-\u043a\u044a\u043c
-\u043b\u0438
-\u043c
-\u043c\u0435
-\u043c\u0435\u0436\u0434\u0443
-\u043c\u0435\u043d
-\u043c\u0438
-\u043c\u043d\u043e\u0437\u0438\u043d\u0430
-\u043c\u043e\u0433\u0430
-\u043c\u043e\u0433\u0430\u0442
-\u043c\u043e\u0436\u0435
-\u043c\u043e\u043b\u044f
-\u043c\u043e\u043c\u0435\u043d\u0442\u0430
-\u043c\u0443
-\u043d
-\u043d\u0430
-\u043d\u0430\u0434
-\u043d\u0430\u0437\u0430\u0434
-\u043d\u0430\u0439
-\u043d\u0430\u043f\u0440\u0430\u0432\u0438
-\u043d\u0430\u043f\u0440\u0435\u0434
-\u043d\u0430\u043f\u0440\u0438\u043c\u0435\u0440
-\u043d\u0430\u0441
-\u043d\u0435
-\u043d\u0435\u0433\u043e
-\u043d\u0435\u044f
-\u043d\u0438
-\u043d\u0438\u0435
-\u043d\u0438\u043a\u043e\u0439
-\u043d\u0438\u0442\u043e
-\u043d\u043e
-\u043d\u044f\u043a\u043e\u0438
-\u043d\u044f\u043a\u043e\u0439
-\u043d\u044f\u043c\u0430
-\u043e\u0431\u0430\u0447\u0435
-\u043e\u043a\u043e\u043b\u043e
-\u043e\u0441\u0432\u0435\u043d
-\u043e\u0441\u043e\u0431\u0435\u043d\u043e
-\u043e\u0442
-\u043e\u0442\u0433\u043e\u0440\u0435
-\u043e\u0442\u043d\u043e\u0432\u043e
-\u043e\u0449\u0435
-\u043f\u0430\u043a
-\u043f\u043e
-\u043f\u043e\u0432\u0435\u0447\u0435
-\u043f\u043e\u0432\u0435\u0447\u0435\u0442\u043e
-\u043f\u043e\u0434
-\u043f\u043e\u043d\u0435
-\u043f\u043e\u0440\u0430\u0434\u0438
-\u043f\u043e\u0441\u043b\u0435
-\u043f\u043e\u0447\u0442\u0438
-\u043f\u0440\u0430\u0432\u0438
-\u043f\u0440\u0435\u0434
-\u043f\u0440\u0435\u0434\u0438
-\u043f\u0440\u0435\u0437
-\u043f\u0440\u0438
-\u043f\u044a\u043a
-\u043f\u044a\u0440\u0432\u043e
-\u0441
-\u0441\u0430
-\u0441\u0430\u043c\u043e
-\u0441\u0435
-\u0441\u0435\u0433\u0430
-\u0441\u0438
-\u0441\u043a\u043e\u0440\u043e
-\u0441\u043b\u0435\u0434
-\u0441\u043c\u0435
-\u0441\u043f\u043e\u0440\u0435\u0434
-\u0441\u0440\u0435\u0434
-\u0441\u0440\u0435\u0449\u0443
-\u0441\u0442\u0435
-\u0441\u044a\u043c
-\u0441\u044a\u0441
-\u0441\u044a\u0449\u043e
-\u0442
-\u0442\u0430\u0437\u0438
-\u0442\u0430\u043a\u0430
-\u0442\u0430\u043a\u0438\u0432\u0430
-\u0442\u0430\u043a\u044a\u0432
-\u0442\u0430\u043c
-\u0442\u0432\u043e\u0439
-\u0442\u0435
-\u0442\u0435\u0437\u0438
-\u0442\u0438
-\u0442\u043d
-\u0442\u043e
-\u0442\u043e\u0432\u0430
-\u0442\u043e\u0433\u0430\u0432\u0430
-\u0442\u043e\u0437\u0438
-\u0442\u043e\u0439
-\u0442\u043e\u043b\u043a\u043e\u0432\u0430
-\u0442\u043e\u0447\u043d\u043e
-\u0442\u0440\u044f\u0431\u0432\u0430
-\u0442\u0443\u043a
-\u0442\u044a\u0439
-\u0442\u044f
-\u0442\u044f\u0445
-\u0443
-\u0445\u0430\u0440\u0435\u0441\u0432\u0430
-\u0447
-\u0447\u0435
-\u0447\u0435\u0441\u0442\u043e
-\u0447\u0440\u0435\u0437
-\u0449\u0435
-\u0449\u043e\u043c
-\u044f
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt
deleted file mode 100644
index 3da65de..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ca.txt
+++ /dev/null
@@ -1,220 +0,0 @@
-# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
-a
-abans
-ac�
-ah
-aix�
-aix�
-al
-als
-aleshores
-algun
-alguna
-algunes
-alguns
-alhora
-all�
-all�
-all�
-altra
-altre
-altres
-amb
-ambd�s
-ambdues
-apa
-aquell
-aquella
-aquelles
-aquells
-aquest
-aquesta
-aquestes
-aquests
-aqu�
-baix
-cada
-cadasc�
-cadascuna
-cadascunes
-cadascuns
-com
-contra
-d'un
-d'una
-d'unes
-d'uns
-dalt
-de
-del
-dels
-des
-despr�s
-dins
-dintre
-donat
-doncs
-durant
-e
-eh
-el
-els
-em
-en
-encara
-ens
-entre
-�rem
-eren
-�reu
-es
-�s
-esta
-est�
-est�vem
-estaven
-est�veu
-esteu
-et
-etc
-ets
-fins
-fora
-gaireb�
-ha
-han
-has
-havia
-he
-hem
-heu
-hi
-ho
-i
-igual
-iguals
-ja
-l'hi
-la
-les
-li
-li'n
-llavors
-m'he
-ma
-mal
-malgrat
-mateix
-mateixa
-mateixes
-mateixos
-me
-mentre
-m�s
-meu
-meus
-meva
-meves
-molt
-molta
-moltes
-molts
-mon
-mons
-n'he
-n'hi
-ne
-ni
-no
-nogensmenys
-nom�s
-nosaltres
-nostra
-nostre
-nostres
-o
-oh
-oi
-on
-pas
-pel
-pels
-per
-per�
-perqu�
-poc
-poca
-pocs
-poques
-potser
-propi
-qual
-quals
-quan
-quant
-que
-qu�
-quelcom
-qui
-quin
-quina
-quines
-quins
-s'ha
-s'han
-sa
-semblant
-semblants
-ses
-seu
-seus
-seva
-seva
-seves
-si
-sobre
-sobretot
-s�c
-solament
-sols
-son
-s�n
-sons
-sota
-sou
-t'ha
-t'han
-t'he
-ta
-tal
-tamb�
-tampoc
-tan
-tant
-tanta
-tantes
-teu
-teus
-teva
-teves
-ton
-tons
-tot
-tota
-totes
-tots
-un
-una
-unes
-uns
-us
-va
-vaig
-vam
-van
-vas
-veu
-vosaltres
-vostra
-vostre
-vostres
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt
deleted file mode 100644
index 53c6097..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_cz.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-a
-s
-k
-o
-i
-u
-v
-z
-dnes
-cz
-t�mto
-bude\u0161
-budem
-byli
-jse\u0161
-m\u016fj
-sv�m
-ta
-tomto
-tohle
-tuto
-tyto
-jej
-zda
-pro\u010d
-m�te
-tato
-kam
-tohoto
-kdo
-kte\u0159�
-mi
-n�m
-tom
-tomuto
-m�t
-nic
-proto
-kterou
-byla
-toho
-proto\u017ee
-asi
-ho
-na\u0161i
-napi\u0161te
-re
-co\u017e
-t�m
-tak\u017ee
-sv�ch
-jej�
-sv�mi
-jste
-aj
-tu
-tedy
-teto
-bylo
-kde
-ke
-prav�
-ji
-nad
-nejsou
-\u010di
-pod
-t�ma
-mezi
-p\u0159es
-ty
-pak
-v�m
-ani
-kdy\u017e
-v\u0161ak
-neg
-jsem
-tento
-\u010dl�nku
-\u010dl�nky
-aby
-jsme
-p\u0159ed
-pta
-jejich
-byl
-je\u0161t\u011b
-a\u017e
-bez
-tak�
-pouze
-prvn�
-va\u0161e
-kter�
-n�s
-nov�
-tipy
-pokud
-m\u016f\u017ee
-strana
-jeho
-sv�
-jin�
-zpr�vy
-nov�
-nen�
-v�s
-jen
-podle
-zde
-u\u017e
-b�t
-v�ce
-bude
-ji\u017e
-ne\u017e
-kter�
-by
-kter�
-co
-nebo
-ten
-tak
-m�
-p\u0159i
-od
-po
-jsou
-jak
-dal\u0161�
-ale
-si
-se
-ve
-to
-jako
-za
-zp\u011bt
-ze
-do
-pro
-je
-na
-atd
-atp
-jakmile
-p\u0159i\u010dem\u017e
-j�
-on
-ona
-ono
-oni
-ony
-my
-vy
-j�
-ji
-m\u011b
-mne
-jemu
-tomu
-t\u011bm
-t\u011bmu
-n\u011bmu
-n\u011bmu\u017e
-jeho\u017e
-j�\u017e
-jeliko\u017e
-je\u017e
-jako\u017e
-na\u010de\u017e
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt
deleted file mode 100644
index a3ff5fe..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_da.txt
+++ /dev/null
@@ -1,108 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Danish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large text sample.
-
-
-og | and
-i | in
-jeg | I
-det | that (dem. pronoun)/it (pers. pronoun)
-at | that (in front of a sentence)/to (with infinitive)
-en | a/an
-den | it (pers. pronoun)/that (dem. pronoun)
-til | to/at/for/until/against/by/of/into, more
-er | present tense of "to be"
-som | who, as
-p� | on/upon/in/on/at/to/after/of/with/for, on
-de | they
-med | with/by/in, along
-han | he
-af | of/by/from/off/for/in/with/on, off
-for | at/for/to/from/by/of/ago, in front/before, because
-ikke | not
-der | who/which, there/those
-var | past tense of "to be"
-mig | me/myself
-sig | oneself/himself/herself/itself/themselves
-men | but
-et | a/an/one, one (number), someone/somebody/one
-har | present tense of "to have"
-om | round/about/for/in/a, about/around/down, if
-vi | we
-min | my
-havde | past tense of "to have"
-ham | him
-hun | she
-nu | now
-over | over/above/across/by/beyond/past/on/about, over/past
-da | then, when/as/since
-fra | from/off/since, off, since
-du | you
-ud | out
-sin | his/her/its/one's
-dem | them
-os | us/ourselves
-op | up
-man | you/one
-hans | his
-hvor | where
-eller | or
-hvad | what
-skal | must/shall etc.
-selv | myself/youself/herself/ourselves etc., even
-her | here
-alle | all/everyone/everybody etc.
-vil | will (verb)
-blev | past tense of "to stay/to remain/to get/to become"
-kunne | could
-ind | in
-n�r | when
-v�re | present tense of "to be"
-dog | however/yet/after all
-noget | something
-ville | would
-jo | you know/you see (adv), yes
-deres | their/theirs
-efter | after/behind/according to/for/by/from, later/afterwards
-ned | down
-skulle | should
-denne | this
-end | than
-dette | this
-mit | my/mine
-ogs� | also
-under | under/beneath/below/during, below/underneath
-have | have
-dig | you
-anden | other
-hende | her
-mine | my
-alt | everything
-meget | much/very, plenty of
-sit | his, her, its, one's
-sine | his, her, its, one's
-vor | our
-mod | against
-disse | these
-hvis | if
-din | your/yours
-nogle | some
-hos | by/at
-blive | be/become
-mange | many
-ad | by/through
-bliver | present tense of "to be/to become"
-hendes | her/hers
-v�ret | be
-thi | for (conj)
-jer | you
-s�dan | such, like this/like that
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt
deleted file mode 100644
index f770384..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_de.txt
+++ /dev/null
@@ -1,292 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A German stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | The number of forms in this list is reduced significantly by passing it
- | through the German stemmer.
-
-
-aber | but
-
-alle | all
-allem
-allen
-aller
-alles
-
-als | than, as
-also | so
-am | an + dem
-an | at
-
-ander | other
-andere
-anderem
-anderen
-anderer
-anderes
-anderm
-andern
-anderr
-anders
-
-auch | also
-auf | on
-aus | out of
-bei | by
-bin | am
-bis | until
-bist | art
-da | there
-damit | with it
-dann | then
-
-der | the
-den
-des
-dem
-die
-das
-
-da� | that
-
-derselbe | the same
-derselben
-denselben
-desselben
-demselben
-dieselbe
-dieselben
-dasselbe
-
-dazu | to that
-
-dein | thy
-deine
-deinem
-deinen
-deiner
-deines
-
-denn | because
-
-derer | of those
-dessen | of him
-
-dich | thee
-dir | to thee
-du | thou
-
-dies | this
-diese
-diesem
-diesen
-dieser
-dieses
-
-
-doch | (several meanings)
-dort | (over) there
-
-
-durch | through
-
-ein | a
-eine
-einem
-einen
-einer
-eines
-
-einig | some
-einige
-einigem
-einigen
-einiger
-einiges
-
-einmal | once
-
-er | he
-ihn | him
-ihm | to him
-
-es | it
-etwas | something
-
-euer | your
-eure
-eurem
-euren
-eurer
-eures
-
-f�r | for
-gegen | towards
-gewesen | p.p. of sein
-hab | have
-habe | have
-haben | have
-hat | has
-hatte | had
-hatten | had
-hier | here
-hin | there
-hinter | behind
-
-ich | I
-mich | me
-mir | to me
-
-
-ihr | you, to her
-ihre
-ihrem
-ihren
-ihrer
-ihres
-euch | to you
-
-im | in + dem
-in | in
-indem | while
-ins | in + das
-ist | is
-
-jede | each, every
-jedem
-jeden
-jeder
-jedes
-
-jene | that
-jenem
-jenen
-jener
-jenes
-
-jetzt | now
-kann | can
-
-kein | no
-keine
-keinem
-keinen
-keiner
-keines
-
-k�nnen | can
-k�nnte | could
-machen | do
-man | one
-
-manche | some, many a
-manchem
-manchen
-mancher
-manches
-
-mein | my
-meine
-meinem
-meinen
-meiner
-meines
-
-mit | with
-muss | must
-musste | had to
-nach | to(wards)
-nicht | not
-nichts | nothing
-noch | still, yet
-nun | now
-nur | only
-ob | whether
-oder | or
-ohne | without
-sehr | very
-
-sein | his
-seine
-seinem
-seinen
-seiner
-seines
-
-selbst | self
-sich | herself
-
-sie | they, she
-ihnen | to them
-
-sind | are
-so | so
-
-solche | such
-solchem
-solchen
-solcher
-solches
-
-soll | shall
-sollte | should
-sondern | but
-sonst | else
-�ber | over
-um | about, around
-und | and
-
-uns | us
-unse
-unsem
-unsen
-unser
-unses
-
-unter | under
-viel | much
-vom | von + dem
-von | from
-vor | before
-w�hrend | while
-war | was
-waren | were
-warst | wast
-was | what
-weg | away, off
-weil | because
-weiter | further
-
-welche | which
-welchem
-welchen
-welcher
-welches
-
-wenn | when
-werde | will
-werden | will
-wie | how
-wieder | again
-will | want
-wir | we
-wird | will
-wirst | willst
-wo | where
-wollen | want
-wollte | wanted
-w�rde | would
-w�rden | would
-zu | to
-zum | zu + dem
-zur | zu + der
-zwar | indeed
-zwischen | between
-
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt
deleted file mode 100644
index 232681f..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_el.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-# Lucene Greek Stopwords list
-# Note: by default this file is used after GreekLowerCaseFilter,
-# so when modifying this file use '\u03c3' instead of '\u03c2'
-\u03bf
-\u03b7
-\u03c4\u03bf
-\u03bf\u03b9
-\u03c4\u03b1
-\u03c4\u03bf\u03c5
-\u03c4\u03b7\u03c3
-\u03c4\u03c9\u03bd
-\u03c4\u03bf\u03bd
-\u03c4\u03b7\u03bd
-\u03ba\u03b1\u03b9
-\u03ba\u03b9
-\u03ba
-\u03b5\u03b9\u03bc\u03b1\u03b9
-\u03b5\u03b9\u03c3\u03b1\u03b9
-\u03b5\u03b9\u03bd\u03b1\u03b9
-\u03b5\u03b9\u03bc\u03b1\u03c3\u03c4\u03b5
-\u03b5\u03b9\u03c3\u03c4\u03b5
-\u03c3\u03c4\u03bf
-\u03c3\u03c4\u03bf\u03bd
-\u03c3\u03c4\u03b7
-\u03c3\u03c4\u03b7\u03bd
-\u03bc\u03b1
-\u03b1\u03bb\u03bb\u03b1
-\u03b1\u03c0\u03bf
-\u03b3\u03b9\u03b1
-\u03c0\u03c1\u03bf\u03c3
-\u03bc\u03b5
-\u03c3\u03b5
-\u03c9\u03c3
-\u03c0\u03b1\u03c1\u03b1
-\u03b1\u03bd\u03c4\u03b9
-\u03ba\u03b1\u03c4\u03b1
-\u03bc\u03b5\u03c4\u03b1
-\u03b8\u03b1
-\u03bd\u03b1
-\u03b4\u03b5
-\u03b4\u03b5\u03bd
-\u03bc\u03b7
-\u03bc\u03b7\u03bd
-\u03b5\u03c0\u03b9
-\u03b5\u03bd\u03c9
-\u03b5\u03b1\u03bd
-\u03b1\u03bd
-\u03c4\u03bf\u03c4\u03b5
-\u03c0\u03bf\u03c5
-\u03c0\u03c9\u03c3
-\u03c0\u03bf\u03b9\u03bf\u03c3
-\u03c0\u03bf\u03b9\u03b1
-\u03c0\u03bf\u03b9\u03bf
-\u03c0\u03bf\u03b9\u03bf\u03b9
-\u03c0\u03bf\u03b9\u03b5\u03c3
-\u03c0\u03bf\u03b9\u03c9\u03bd
-\u03c0\u03bf\u03b9\u03bf\u03c5\u03c3
-\u03b1\u03c5\u03c4\u03bf\u03c3
-\u03b1\u03c5\u03c4\u03b7
-\u03b1\u03c5\u03c4\u03bf
-\u03b1\u03c5\u03c4\u03bf\u03b9
-\u03b1\u03c5\u03c4\u03c9\u03bd
-\u03b1\u03c5\u03c4\u03bf\u03c5\u03c3
-\u03b1\u03c5\u03c4\u03b5\u03c3
-\u03b1\u03c5\u03c4\u03b1
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03bf\u03c3
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03b7
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03bf
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03bf\u03b9
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03b5\u03c3
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03b1
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03c9\u03bd
-\u03b5\u03ba\u03b5\u03b9\u03bd\u03bf\u03c5\u03c3
-\u03bf\u03c0\u03c9\u03c3
-\u03bf\u03bc\u03c9\u03c3
-\u03b9\u03c3\u03c9\u03c3
-\u03bf\u03c3\u03bf
-\u03bf\u03c4\u03b9
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt
deleted file mode 100644
index 2c164c0..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_en.txt
+++ /dev/null
@@ -1,54 +0,0 @@
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements. See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License. You may obtain a copy of the License at
-#
-# http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# a couple of test stopwords to test that the words are really being
-# configured from this file:
-stopworda
-stopwordb
-
-# Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-such
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt
deleted file mode 100644
index 2db1476..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_es.txt
+++ /dev/null
@@ -1,354 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Spanish stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-
- | The following is a ranked list (commonest to rarest) of stopwords
- | deriving from a large sample of text.
-
- | Extra words have been added at the end.
-
-de | from, of
-la | the, her
-que | who, that
-el | the
-en | in
-y | and
-a | to
-los | the, them
-del | de + el
-se | himself, from him etc
-las | the, them
-por | for, by, etc
-un | a
-para | for
-con | with
-no | no
-una | a
-su | his, her
-al | a + el
- | es from SER
-lo | him
-como | how
-m�s | more
-pero | pero
-sus | su plural
-le | to him, her
-ya | already
-o | or
- | fue from SER
-este | this
- | ha from HABER
-s� | himself etc
-porque | because
-esta | this
- | son from SER
-entre | between
- | est� from ESTAR
-cuando | when
-muy | very
-sin | without
-sobre | on
- | ser from SER
- | tiene from TENER
-tambi�n | also
-me | me
-hasta | until
-hay | there is/are
-donde | where
- | han from HABER
-quien | whom, that
- | est�n from ESTAR
- | estado from ESTAR
-desde | from
-todo | all
-nos | us
-durante | during
- | estados from ESTAR
-todos | all
-uno | a
-les | to them
-ni | nor
-contra | against
-otros | other
- | fueron from SER
-ese | that
-eso | that
- | hab�a from HABER
-ante | before
-ellos | they
-e | and (variant of y)
-esto | this
-m� | me
-antes | before
-algunos | some
-qu� | what?
-unos | a
-yo | I
-otro | other
-otras | other
-otra | other
-�l | he
-tanto | so much, many
-esa | that
-estos | these
-mucho | much, many
-quienes | who
-nada | nothing
-muchos | many
-cual | who
- | sea from SER
-poco | few
-ella | she
-estar | to be
- | haber from HABER
-estas | these
- | estaba from ESTAR
- | estamos from ESTAR
-algunas | some
-algo | something
-nosotros | we
-
- | other forms
-
-mi | me
-mis | mi plural
-t� | thou
-te | thee
-ti | thee
-tu | thy
-tus | tu plural
-ellas | they
-nosotras | we
-vosotros | you
-vosotras | you
-os | you
-m�o | mine
-m�a |
-m�os |
-m�as |
-tuyo | thine
-tuya |
-tuyos |
-tuyas |
-suyo | his, hers, theirs
-suya |
-suyos |
-suyas |
-nuestro | ours
-nuestra |
-nuestros |
-nuestras |
-vuestro | yours
-vuestra |
-vuestros |
-vuestras |
-esos | those
-esas | those
-
- | forms of estar, to be (not including the infinitive):
-estoy
-est�s
-est�
-estamos
-est�is
-est�n
-est�
-est�s
-estemos
-est�is
-est�n
-estar�
-estar�s
-estar�
-estaremos
-estar�is
-estar�n
-estar�a
-estar�as
-estar�amos
-estar�ais
-estar�an
-estaba
-estabas
-est�bamos
-estabais
-estaban
-estuve
-estuviste
-estuvo
-estuvimos
-estuvisteis
-estuvieron
-estuviera
-estuvieras
-estuvi�ramos
-estuvierais
-estuvieran
-estuviese
-estuvieses
-estuvi�semos
-estuvieseis
-estuviesen
-estando
-estado
-estada
-estados
-estadas
-estad
-
- | forms of haber, to have (not including the infinitive):
-he
-has
-ha
-hemos
-hab�is
-han
-haya
-hayas
-hayamos
-hay�is
-hayan
-habr�
-habr�s
-habr�
-habremos
-habr�is
-habr�n
-habr�a
-habr�as
-habr�amos
-habr�ais
-habr�an
-hab�a
-hab�as
-hab�amos
-hab�ais
-hab�an
-hube
-hubiste
-hubo
-hubimos
-hubisteis
-hubieron
-hubiera
-hubieras
-hubi�ramos
-hubierais
-hubieran
-hubiese
-hubieses
-hubi�semos
-hubieseis
-hubiesen
-habiendo
-habido
-habida
-habidos
-habidas
-
- | forms of ser, to be (not including the infinitive):
-soy
-eres
-es
-somos
-sois
-son
-sea
-seas
-seamos
-se�is
-sean
-ser�
-ser�s
-ser�
-seremos
-ser�is
-ser�n
-ser�a
-ser�as
-ser�amos
-ser�ais
-ser�an
-era
-eras
-�ramos
-erais
-eran
-fui
-fuiste
-fue
-fuimos
-fuisteis
-fueron
-fuera
-fueras
-fu�ramos
-fuerais
-fueran
-fuese
-fueses
-fu�semos
-fueseis
-fuesen
-siendo
-sido
- | sed also means 'thirst'
-
- | forms of tener, to have (not including the infinitive):
-tengo
-tienes
-tiene
-tenemos
-ten�is
-tienen
-tenga
-tengas
-tengamos
-teng�is
-tengan
-tendr�
-tendr�s
-tendr�
-tendremos
-tendr�is
-tendr�n
-tendr�a
-tendr�as
-tendr�amos
-tendr�ais
-tendr�an
-ten�a
-ten�as
-ten�amos
-ten�ais
-ten�an
-tuve
-tuviste
-tuvo
-tuvimos
-tuvisteis
-tuvieron
-tuviera
-tuvieras
-tuvi�ramos
-tuvierais
-tuvieran
-tuviese
-tuvieses
-tuvi�semos
-tuvieseis
-tuviesen
-teniendo
-tenido
-tenida
-tenidos
-tenidas
-tened
-
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt
deleted file mode 100644
index 25f1db9..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_eu.txt
+++ /dev/null
@@ -1,99 +0,0 @@
-# example set of basque stopwords
-al
-anitz
-arabera
-asko
-baina
-bat
-batean
-batek
-bati
-batzuei
-batzuek
-batzuetan
-batzuk
-bera
-beraiek
-berau
-berauek
-bere
-berori
-beroriek
-beste
-bezala
-da
-dago
-dira
-ditu
-du
-dute
-edo
-egin
-ere
-eta
-eurak
-ez
-gainera
-gu
-gutxi
-guzti
-haiei
-haiek
-haietan
-hainbeste
-hala
-han
-handik
-hango
-hara
-hari
-hark
-hartan
-hau
-hauei
-hauek
-hauetan
-hemen
-hemendik
-hemengo
-hi
-hona
-honek
-honela
-honetan
-honi
-hor
-hori
-horiei
-horiek
-horietan
-horko
-horra
-horrek
-horrela
-horretan
-horri
-hortik
-hura
-izan
-ni
-noiz
-nola
-non
-nondik
-nongo
-nor
-nora
-ze
-zein
-zen
-zenbait
-zenbat
-zer
-zergatik
-ziren
-zituen
-zu
-zuek
-zuen
-zuten
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt
deleted file mode 100644
index 723641c..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fa.txt
+++ /dev/null
@@ -1,313 +0,0 @@
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# Note: by default this file is used after normalization, so when adding entries
-# to this file, use the arabic '\u064a' instead of '\u06cc'
-\u0627\u0646\u0627\u0646
-\u0646\u062f\u0627\u0634\u062a\u0647
-\u0633\u0631\u0627\u0633\u0631
-\u062e\u064a\u0627\u0647
-\u0627\u064a\u0634\u0627\u0646
-\u0648\u064a
-\u062a\u0627\u0643\u0646\u0648\u0646
-\u0628\u064a\u0634\u062a\u0631\u064a
-\u062f\u0648\u0645
-\u067e\u0633
-\u0646\u0627\u0634\u064a
-\u0648\u06af\u0648
-\u064a\u0627
-\u062f\u0627\u0634\u062a\u0646\u062f
-\u0633\u067e\u0633
-\u0647\u0646\u06af\u0627\u0645
-\u0647\u0631\u06af\u0632
-\u067e\u0646\u062c
-\u0646\u0634\u0627\u0646
-\u0627\u0645\u0633\u0627\u0644
-\u062f\u064a\u06af\u0631
-\u06af\u0631\u0648\u0647\u064a
-\u0634\u062f\u0646\u062f
-\u0686\u0637\u0648\u0631
-\u062f\u0647
-\u0648
-\u062f\u0648
-\u0646\u062e\u0633\u062a\u064a\u0646
-\u0648\u0644\u064a
-\u0686\u0631\u0627
-\u0686\u0647
-\u0648\u0633\u0637
-\u0647
-\u0643\u062f\u0627\u0645
-\u0642\u0627\u0628\u0644
-\u064a\u0643
-\u0631\u0641\u062a
-\u0647\u0641\u062a
-\u0647\u0645\u0686\u0646\u064a\u0646
-\u062f\u0631
-\u0647\u0632\u0627\u0631
-\u0628\u0644\u0647
-\u0628\u0644\u064a
-\u0634\u0627\u064a\u062f
-\u0627\u0645\u0627
-\u0634\u0646\u0627\u0633\u064a
-\u06af\u0631\u0641\u062a\u0647
-\u062f\u0647\u062f
-\u062f\u0627\u0634\u062a\u0647
-\u062f\u0627\u0646\u0633\u062a
-\u062f\u0627\u0634\u062a\u0646
-\u062e\u0648\u0627\u0647\u064a\u0645
-\u0645\u064a\u0644\u064a\u0627\u0631\u062f
-\u0648\u0642\u062a\u064a\u0643\u0647
-\u0627\u0645\u062f
-\u062e\u0648\u0627\u0647\u062f
-\u062c\u0632
-\u0627\u0648\u0631\u062f\u0647
-\u0634\u062f\u0647
-\u0628\u0644\u0643\u0647
-\u062e\u062f\u0645\u0627\u062a
-\u0634\u062f\u0646
-\u0628\u0631\u062e\u064a
-\u0646\u0628\u0648\u062f
-\u0628\u0633\u064a\u0627\u0631\u064a
-\u062c\u0644\u0648\u06af\u064a\u0631\u064a
-\u062d\u0642
-\u0643\u0631\u062f\u0646\u062f
-\u0646\u0648\u0639\u064a
-\u0628\u0639\u0631\u064a
-\u0646\u0643\u0631\u062f\u0647
-\u0646\u0638\u064a\u0631
-\u0646\u0628\u0627\u064a\u062f
-\u0628\u0648\u062f\u0647
-\u0628\u0648\u062f\u0646
-\u062f\u0627\u062f
-\u0627\u0648\u0631\u062f
-\u0647\u0633\u062a
-\u062c\u0627\u064a\u064a
-\u0634\u0648\u062f
-\u062f\u0646\u0628\u0627\u0644
-\u062f\u0627\u062f\u0647
-\u0628\u0627\u064a\u062f
-\u0633\u0627\u0628\u0642
-\u0647\u064a\u0686
-\u0647\u0645\u0627\u0646
-\u0627\u0646\u062c\u0627
-\u0643\u0645\u062a\u0631
-\u0643\u062c\u0627\u0633\u062a
-\u06af\u0631\u062f\u062f
-\u0643\u0633\u064a
-\u062a\u0631
-\u0645\u0631\u062f\u0645
-\u062a\u0627\u0646
-\u062f\u0627\u062f\u0646
-\u0628\u0648\u062f\u0646\u062f
-\u0633\u0631\u064a
-\u062c\u062f\u0627
-\u0646\u062f\u0627\u0631\u0646\u062f
-\u0645\u06af\u0631
-\u064a\u0643\u062f\u064a\u06af\u0631
-\u062f\u0627\u0631\u062f
-\u062f\u0647\u0646\u062f
-\u0628\u0646\u0627\u0628\u0631\u0627\u064a\u0646
-\u0647\u0646\u06af\u0627\u0645\u064a
-\u0633\u0645\u062a
-\u062c\u0627
-\u0627\u0646\u0686\u0647
-\u062e\u0648\u062f
-\u062f\u0627\u062f\u0646\u062f
-\u0632\u064a\u0627\u062f
-\u062f\u0627\u0631\u0646\u062f
-\u0627\u062b\u0631
-\u0628\u062f\u0648\u0646
-\u0628\u0647\u062a\u0631\u064a\u0646
-\u0628\u064a\u0634\u062a\u0631
-\u0627\u0644\u0628\u062a\u0647
-\u0628\u0647
-\u0628\u0631\u0627\u0633\u0627\u0633
-\u0628\u064a\u0631\u0648\u0646
-\u0643\u0631\u062f
-\u0628\u0639\u0636\u064a
-\u06af\u0631\u0641\u062a
-\u062a\u0648\u064a
-\u0627\u064a
-\u0645\u064a\u0644\u064a\u0648\u0646
-\u0627\u0648
-\u062c\u0631\u064a\u0627\u0646
-\u062a\u0648\u0644
-\u0628\u0631
-\u0645\u0627\u0646\u0646\u062f
-\u0628\u0631\u0627\u0628\u0631
-\u0628\u0627\u0634\u064a\u0645
-\u0645\u062f\u062a\u064a
-\u06af\u0648\u064a\u0646\u062f
-\u0627\u0643\u0646\u0648\u0646
-\u062a\u0627
-\u062a\u0646\u0647\u0627
-\u062c\u062f\u064a\u062f
-\u0686\u0646\u062f
-\u0628\u064a
-\u0646\u0634\u062f\u0647
-\u0643\u0631\u062f\u0646
-\u0643\u0631\u062f\u0645
-\u06af\u0648\u064a\u062f
-\u0643\u0631\u062f\u0647
-\u0643\u0646\u064a\u0645
-\u0646\u0645\u064a
-\u0646\u0632\u062f
-\u0631\u0648\u064a
-\u0642\u0635\u062f
-\u0641\u0642\u0637
-\u0628\u0627\u0644\u0627\u064a
-\u062f\u064a\u06af\u0631\u0627\u0646
-\u0627\u064a\u0646
-\u062f\u064a\u0631\u0648\u0632
-\u062a\u0648\u0633\u0637
-\u0633\u0648\u0645
-\u0627\u064a\u0645
-\u062f\u0627\u0646\u0646\u062f
-\u0633\u0648\u064a
-\u0627\u0633\u062a\u0641\u0627\u062f\u0647
-\u0634\u0645\u0627
-\u0643\u0646\u0627\u0631
-\u062f\u0627\u0631\u064a\u0645
-\u0633\u0627\u062e\u062a\u0647
-\u0637\u0648\u0631
-\u0627\u0645\u062f\u0647
-\u0631\u0641\u062a\u0647
-\u0646\u062e\u0633\u062a
-\u0628\u064a\u0633\u062a
-\u0646\u0632\u062f\u064a\u0643
-\u0637\u064a
-\u0643\u0646\u064a\u062f
-\u0627\u0632
-\u0627\u0646\u0647\u0627
-\u062a\u0645\u0627\u0645\u064a
-\u062f\u0627\u0634\u062a
-\u064a\u0643\u064a
-\u0637\u0631\u064a\u0642
-\u0627\u0634
-\u0686\u064a\u0633\u062a
-\u0631\u0648\u0628
-\u0646\u0645\u0627\u064a\u062f
-\u06af\u0641\u062a
-\u0686\u0646\u062f\u064a\u0646
-\u0686\u064a\u0632\u064a
-\u062a\u0648\u0627\u0646\u062f
-\u0627\u0645
-\u0627\u064a\u0627
-\u0628\u0627
-\u0627\u0646
-\u0627\u064a\u062f
-\u062a\u0631\u064a\u0646
-\u0627\u064a\u0646\u0643\u0647
-\u062f\u064a\u06af\u0631\u064a
-\u0631\u0627\u0647
-\u0647\u0627\u064a\u064a
-\u0628\u0631\u0648\u0632
-\u0647\u0645\u0686\u0646\u0627\u0646
-\u067e\u0627\u0639\u064a\u0646
-\u0643\u0633
-\u062d\u062f\u0648\u062f
-\u0645\u062e\u062a\u0644\u0641
-\u0645\u0642\u0627\u0628\u0644
-\u0686\u064a\u0632
-\u06af\u064a\u0631\u062f
-\u0646\u062f\u0627\u0631\u062f
-\u0636\u062f
-\u0647\u0645\u0686\u0648\u0646
-\u0633\u0627\u0632\u064a
-\u0634\u0627\u0646
-\u0645\u0648\u0631\u062f
-\u0628\u0627\u0631\u0647
-\u0645\u0631\u0633\u064a
-\u062e\u0648\u064a\u0634
-\u0628\u0631\u062e\u0648\u0631\u062f\u0627\u0631
-\u0686\u0648\u0646
-\u062e\u0627\u0631\u062c
-\u0634\u0634
-\u0647\u0646\u0648\u0632
-\u062a\u062d\u062a
-\u0636\u0645\u0646
-\u0647\u0633\u062a\u064a\u0645
-\u06af\u0641\u062a\u0647
-\u0641\u0643\u0631
-\u0628\u0633\u064a\u0627\u0631
-\u067e\u064a\u0634
-\u0628\u0631\u0627\u064a
-\u0631\u0648\u0632\u0647\u0627\u064a
-\u0627\u0646\u0643\u0647
-\u0646\u062e\u0648\u0627\u0647\u062f
-\u0628\u0627\u0644\u0627
-\u0643\u0644
-\u0648\u0642\u062a\u064a
-\u0643\u064a
-\u0686\u0646\u064a\u0646
-\u0643\u0647
-\u06af\u064a\u0631\u064a
-\u0646\u064a\u0633\u062a
-\u0627\u0633\u062a
-\u0643\u062c\u0627
-\u0643\u0646\u062f
-\u0646\u064a\u0632
-\u064a\u0627\u0628\u062f
-\u0628\u0646\u062f\u064a
-\u062d\u062a\u064a
-\u062a\u0648\u0627\u0646\u0646\u062f
-\u0639\u0642\u0628
-\u062e\u0648\u0627\u0633\u062a
-\u0643\u0646\u0646\u062f
-\u0628\u064a\u0646
-\u062a\u0645\u0627\u0645
-\u0647\u0645\u0647
-\u0645\u0627
-\u0628\u0627\u0634\u0646\u062f
-\u0645\u062b\u0644
-\u0634\u062f
-\u0627\u0631\u064a
-\u0628\u0627\u0634\u062f
-\u0627\u0631\u0647
-\u0637\u0628\u0642
-\u0628\u0639\u062f
-\u0627\u06af\u0631
-\u0635\u0648\u0631\u062a
-\u063a\u064a\u0631
-\u062c\u0627\u064a
-\u0628\u064a\u0634
-\u0631\u064a\u0632\u064a
-\u0627\u0646\u062f
-\u0632\u064a\u0631\u0627
-\u0686\u06af\u0648\u0646\u0647
-\u0628\u0627\u0631
-\u0644\u0637\u0641\u0627
-\u0645\u064a
-\u062f\u0631\u0628\u0627\u0631\u0647
-\u0645\u0646
-\u062f\u064a\u062f\u0647
-\u0647\u0645\u064a\u0646
-\u06af\u0630\u0627\u0631\u064a
-\u0628\u0631\u062f\u0627\u0631\u064a
-\u0639\u0644\u062a
-\u06af\u0630\u0627\u0634\u062a\u0647
-\u0647\u0645
-\u0641\u0648\u0642
-\u0646\u0647
-\u0647\u0627
-\u0634\u0648\u0646\u062f
-\u0627\u0628\u0627\u062f
-\u0647\u0645\u0648\u0627\u0631\u0647
-\u0647\u0631
-\u0627\u0648\u0644
-\u062e\u0648\u0627\u0647\u0646\u062f
-\u0686\u0647\u0627\u0631
-\u0646\u0627\u0645
-\u0627\u0645\u0631\u0648\u0632
-\u0645\u0627\u0646
-\u0647\u0627\u064a
-\u0642\u0628\u0644
-\u0643\u0646\u0645
-\u0633\u0639\u064a
-\u062a\u0627\u0632\u0647
-\u0631\u0627
-\u0647\u0633\u062a\u0646\u062f
-\u0632\u064a\u0631
-\u062c\u0644\u0648\u064a
-\u0639\u0646\u0648\u0627\u0646
-\u0628\u0648\u062f
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt
deleted file mode 100644
index addad79..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fi.txt
+++ /dev/null
@@ -1,95 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/finnish/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| forms of BE
-
-olla
-olen
-olet
-on
-olemme
-olette
-ovat
-ole | negative form
-
-oli
-olisi
-olisit
-olisin
-olisimme
-olisitte
-olisivat
-olit
-olin
-olimme
-olitte
-olivat
-ollut
-olleet
-
-en | negation
-et
-ei
-emme
-ette
-eiv�t
-
-|Nom Gen Acc Part Iness Elat Illat Adess Ablat Allat Ess Trans
-min� minun minut minua minussa minusta minuun minulla minulta minulle | I
-sin� sinun sinut sinua sinussa sinusta sinuun sinulla sinulta sinulle | you
-h�n h�nen h�net h�nt� h�ness� h�nest� h�neen h�nell� h�nelt� h�nelle | he she
-me meid�n meid�t meit� meiss� meist� meihin meill� meilt� meille | we
-te teid�n teid�t teit� teiss� teist� teihin teill� teilt� teille | you
-he heid�n heid�t heit� heiss� heist� heihin heill� heilt� heille | they
-
-t�m� t�m�n t�t� t�ss� t�st� t�h�n tall� t�lt� t�lle t�n� t�ksi | this
-tuo tuon tuot� tuossa tuosta tuohon tuolla tuolta tuolle tuona tuoksi | that
-se sen sit� siin� siit� siihen sill� silt� sille sin� siksi | it
-n�m� n�iden n�it� n�iss� n�ist� n�ihin n�ill� n�ilt� n�ille n�in� n�iksi | these
-nuo noiden noita noissa noista noihin noilla noilta noille noina noiksi | those
-ne niiden niit� niiss� niist� niihin niill� niilt� niille niin� niiksi | they
-
-kuka kenen kenet ket� keness� kenest� keneen kenell� kenelt� kenelle kenen� keneksi| who
-ketk� keiden ketk� keit� keiss� keist� keihin keill� keilt� keille kein� keiksi | (pl)
-mik� mink� mink� mit� miss� mist� mihin mill� milt� mille min� miksi | which what
-mitk� | (pl)
-
-joka jonka jota jossa josta johon jolla jolta jolle jona joksi | who which
-jotka joiden joita joissa joista joihin joilla joilta joille joina joiksi | (pl)
-
-| conjunctions
-
-ett� | that
-ja | and
-jos | if
-koska | because
-kuin | than
-mutta | but
-niin | so
-sek� | and
-sill� | for
-tai | or
-vaan | but
-vai | or
-vaikka | although
-
-
-| prepositions
-
-kanssa | with
-mukaan | according to
-noin | about
-poikki | across
-yli | over, across
-
-| other
-
-kun | when
-niin | so
-nyt | now
-itse | self
-
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt
deleted file mode 100644
index c00837ea..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_fr.txt
+++ /dev/null
@@ -1,183 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A French stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-au | a + le
-aux | a + les
-avec | with
-ce | this
-ces | these
-dans | with
-de | of
-des | de + les
-du | de + le
-elle | she
-en | `of them' etc
-et | and
-eux | them
-il | he
-je | I
-la | the
-le | the
-leur | their
-lui | him
-ma | my (fem)
-mais | but
-me | me
-m�me | same; as in moi-m�me (myself) etc
-mes | me (pl)
-moi | me
-mon | my (masc)
-ne | not
-nos | our (pl)
-notre | our
-nous | we
-on | one
-ou | where
-par | by
-pas | not
-pour | for
-qu | que before vowel
-que | that
-qui | who
-sa | his, her (fem)
-se | oneself
-ses | his (pl)
-son | his, her (masc)
-sur | on
-ta | thy (fem)
-te | thee
-tes | thy (pl)
-toi | thee
-ton | thy (masc)
-tu | thou
-un | a
-une | a
-vos | your (pl)
-votre | your
-vous | you
-
- | single letter forms
-
-c | c'
-d | d'
-j | j'
-l | l'
-� | to, at
-m | m'
-n | n'
-s | s'
-t | t'
-y | there
-
- | forms of �tre (not including the infinitive):
-�t�
-�t�e
-�t�es
-�t�s
-�tant
-suis
-es
-est
-sommes
-�tes
-sont
-serai
-seras
-sera
-serons
-serez
-seront
-serais
-serait
-serions
-seriez
-seraient
-�tais
-�tait
-�tions
-�tiez
-�taient
-fus
-fut
-f�mes
-f�tes
-furent
-sois
-soit
-soyons
-soyez
-soient
-fusse
-fusses
-f�t
-fussions
-fussiez
-fussent
-
- | forms of avoir (not including the infinitive):
-ayant
-eu
-eue
-eues
-eus
-ai
-as
-avons
-avez
-ont
-aurai
-auras
-aura
-aurons
-aurez
-auront
-aurais
-aurait
-aurions
-auriez
-auraient
-avais
-avait
-avions
-aviez
-avaient
-eut
-e�mes
-e�tes
-eurent
-aie
-aies
-ait
-ayons
-ayez
-aient
-eusse
-eusses
-e�t
-eussions
-eussiez
-eussent
-
- | Later additions (from Jean-Christophe Deschamps)
-ceci | this
-cel� | that
-cet | this
-cette | this
-ici | here
-ils | they
-les | the (pl)
-leurs | their (pl)
-quel | which
-quels | which
-quelle | which
-quelles | which
-sans | without
-soi | oneself
-
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt
deleted file mode 100644
index 9ff88d7..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ga.txt
+++ /dev/null
@@ -1,110 +0,0 @@
-
-a
-ach
-ag
-agus
-an
-aon
-ar
-arna
-as
-b'
-ba
-beirt
-bh�r
-caoga
-ceathair
-ceathrar
-chomh
-cht�
-chuig
-chun
-cois
-c�ad
-c�ig
-c�igear
-d'
-daichead
-dar
-de
-deich
-deichni�r
-den
-dh�
-do
-don
-dt�
-d�
-d�r
-d�
-faoi
-faoin
-faoina
-faoin�r
-fara
-fiche
-gach
-gan
-go
-gur
-haon
-hocht
-i
-iad
-idir
-in
-ina
-ins
-in�r
-is
-le
-leis
-lena
-len�r
-m'
-mar
-mo
-m�
-na
-nach
-naoi
-naon�r
-n�
-n�
-n�or
-n�
-n�cha
-ocht
-ochtar
-os
-roimh
-sa
-seacht
-seachtar
-seacht�
-seasca
-seisear
-siad
-sibh
-sinn
-sna
-s�
-s�
-tar
-thar
-th�
-tri�r
-tr�
-tr�na
-tr�n�r
-tr�ocha
-t�
-um
-�r
-�
-�is
-�
-�
-�n
-�na
-�n�r
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt
deleted file mode 100644
index d8760b1..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_gl.txt
+++ /dev/null
@@ -1,161 +0,0 @@
-# galican stopwords
-a
-a�nda
-al�
-aquel
-aquela
-aquelas
-aqueles
-aquilo
-aqu�
-ao
-aos
-as
-as�
-�
-ben
-cando
-che
-co
-coa
-comigo
-con
-connosco
-contigo
-convosco
-coas
-cos
-cun
-cuns
-cunha
-cunhas
-da
-dalgunha
-dalgunhas
-dalg�n
-dalg�ns
-das
-de
-del
-dela
-delas
-deles
-desde
-deste
-do
-dos
-dun
-duns
-dunha
-dunhas
-e
-el
-ela
-elas
-eles
-en
-era
-eran
-esa
-esas
-ese
-eses
-esta
-estar
-estaba
-est�
-est�n
-este
-estes
-estiven
-estou
-eu
-�
-facer
-foi
-foron
-fun
-hab�a
-hai
-iso
-isto
-la
-las
-lle
-lles
-lo
-los
-mais
-me
-meu
-meus
-min
-mi�a
-mi�as
-moi
-na
-nas
-neste
-nin
-no
-non
-nos
-nosa
-nosas
-noso
-nosos
-n�s
-nun
-nunha
-nuns
-nunhas
-o
-os
-ou
-�
-�s
-para
-pero
-pode
-pois
-pola
-polas
-polo
-polos
-por
-que
-se
-sen�n
-ser
-seu
-seus
-sexa
-sido
-sobre
-s�a
-s�as
-tam�n
-tan
-te
-ten
-te�en
-te�o
-ter
-teu
-teus
-ti
-tido
-ti�a
-tiven
-t�a
-t�as
-un
-unha
-unhas
-uns
-vos
-vosa
-vosas
-voso
-vosos
-v�s
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt
deleted file mode 100644
index 86286bb..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hi.txt
+++ /dev/null
@@ -1,235 +0,0 @@
-# Also see http://www.opensource.org/licenses/bsd-license.html
-# See http://members.unine.ch/jacques.savoy/clef/index.html.
-# This file was created by Jacques Savoy and is distributed under the BSD license.
-# Note: by default this file also contains forms normalized by HindiNormalizer
-# for spelling variation (see section below), such that it can be used whether or
-# not you enable that feature. When adding additional entries to this list,
-# please add the normalized form as well.
-\u0905\u0902\u0926\u0930
-\u0905\u0924
-\u0905\u092a\u0928\u093e
-\u0905\u092a\u0928\u0940
-\u0905\u092a\u0928\u0947
-\u0905\u092d\u0940
-\u0906\u0926\u093f
-\u0906\u092a
-\u0907\u0924\u094d\u092f\u093e\u0926\u093f
-\u0907\u0928
-\u0907\u0928\u0915\u093e
-\u0907\u0928\u094d\u0939\u0940\u0902
-\u0907\u0928\u094d\u0939\u0947\u0902
-\u0907\u0928\u094d\u0939\u094b\u0902
-\u0907\u0938
-\u0907\u0938\u0915\u093e
-\u0907\u0938\u0915\u0940
-\u0907\u0938\u0915\u0947
-\u0907\u0938\u092e\u0947\u0902
-\u0907\u0938\u0940
-\u0907\u0938\u0947
-\u0909\u0928
-\u0909\u0928\u0915\u093e
-\u0909\u0928\u0915\u0940
-\u0909\u0928\u0915\u0947
-\u0909\u0928\u0915\u094b
-\u0909\u0928\u094d\u0939\u0940\u0902
-\u0909\u0928\u094d\u0939\u0947\u0902
-\u0909\u0928\u094d\u0939\u094b\u0902
-\u0909\u0938
-\u0909\u0938\u0915\u0947
-\u0909\u0938\u0940
-\u0909\u0938\u0947
-\u090f\u0915
-\u090f\u0935\u0902
-\u090f\u0938
-\u0910\u0938\u0947
-\u0914\u0930
-\u0915\u0908
-\u0915\u0930
-\u0915\u0930\u0924\u093e
-\u0915\u0930\u0924\u0947
-\u0915\u0930\u0928\u093e
-\u0915\u0930\u0928\u0947
-\u0915\u0930\u0947\u0902
-\u0915\u0939\u0924\u0947
-\u0915\u0939\u093e
-\u0915\u093e
-\u0915\u093e\u095e\u0940
-\u0915\u093f
-\u0915\u093f\u0924\u0928\u093e
-\u0915\u093f\u0928\u094d\u0939\u0947\u0902
-\u0915\u093f\u0928\u094d\u0939\u094b\u0902
-\u0915\u093f\u092f\u093e
-\u0915\u093f\u0930
-\u0915\u093f\u0938
-\u0915\u093f\u0938\u0940
-\u0915\u093f\u0938\u0947
-\u0915\u0940
-\u0915\u0941\u091b
-\u0915\u0941\u0932
-\u0915\u0947
-\u0915\u094b
-\u0915\u094b\u0908
-\u0915\u094c\u0928
-\u0915\u094c\u0928\u0938\u093e
-\u0917\u092f\u093e
-\u0918\u0930
-\u091c\u092c
-\u091c\u0939\u093e\u0901
-\u091c\u093e
-\u091c\u093f\u0924\u0928\u093e
-\u091c\u093f\u0928
-\u091c\u093f\u0928\u094d\u0939\u0947\u0902
-\u091c\u093f\u0928\u094d\u0939\u094b\u0902
-\u091c\u093f\u0938
-\u091c\u093f\u0938\u0947
-\u091c\u0940\u0927\u0930
-\u091c\u0948\u0938\u093e
-\u091c\u0948\u0938\u0947
-\u091c\u094b
-\u0924\u0915
-\u0924\u092c
-\u0924\u0930\u0939
-\u0924\u093f\u0928
-\u0924\u093f\u0928\u094d\u0939\u0947\u0902
-\u0924\u093f\u0928\u094d\u0939\u094b\u0902
-\u0924\u093f\u0938
-\u0924\u093f\u0938\u0947
-\u0924\u094b
-\u0925\u093e
-\u0925\u0940
-\u0925\u0947
-\u0926\u092c\u093e\u0930\u093e
-\u0926\u093f\u092f\u093e
-\u0926\u0941\u0938\u0930\u093e
-\u0926\u0942\u0938\u0930\u0947
-\u0926\u094b
-\u0926\u094d\u0935\u093e\u0930\u093e
-\u0928
-\u0928\u0939\u0940\u0902
-\u0928\u093e
-\u0928\u093f\u0939\u093e\u092f\u0924
-\u0928\u0940\u091a\u0947
-\u0928\u0947
-\u092a\u0930
-\u092a\u0930
-\u092a\u0939\u0932\u0947
-\u092a\u0942\u0930\u093e
-\u092a\u0947
-\u092b\u093f\u0930
-\u092c\u0928\u0940
-\u092c\u0939\u0940
-\u092c\u0939\u0941\u0924
-\u092c\u093e\u0926
-\u092c\u093e\u0932\u093e
-\u092c\u093f\u0932\u0915\u0941\u0932
-\u092d\u0940
-\u092d\u0940\u0924\u0930
-\u092e\u0917\u0930
-\u092e\u093e\u0928\u094b
-\u092e\u0947
-\u092e\u0947\u0902
-\u092f\u0926\u093f
-\u092f\u0939
-\u092f\u0939\u093e\u0901
-\u092f\u0939\u0940
-\u092f\u093e
-\u092f\u093f\u0939
-\u092f\u0947
-\u0930\u0916\u0947\u0902
-\u0930\u0939\u093e
-\u0930\u0939\u0947
-\u0931\u094d\u0935\u093e\u0938\u093e
-\u0932\u093f\u090f
-\u0932\u093f\u092f\u0947
-\u0932\u0947\u0915\u093f\u0928
-\u0935
-\u0935\u0930\u094d\u0917
-\u0935\u0939
-\u0935\u0939
-\u0935\u0939\u093e\u0901
-\u0935\u0939\u0940\u0902
-\u0935\u093e\u0932\u0947
-\u0935\u0941\u0939
-\u0935\u0947
-\u0935\u095a\u0948\u0930\u0939
-\u0938\u0902\u0917
-\u0938\u0915\u0924\u093e
-\u0938\u0915\u0924\u0947
-\u0938\u092c\u0938\u0947
-\u0938\u092d\u0940
-\u0938\u093e\u0925
-\u0938\u093e\u092c\u0941\u0924
-\u0938\u093e\u092d
-\u0938\u093e\u0930\u093e
-\u0938\u0947
-\u0938\u094b
-\u0939\u0940
-\u0939\u0941\u0906
-\u0939\u0941\u0908
-\u0939\u0941\u090f
-\u0939\u0948
-\u0939\u0948\u0902
-\u0939\u094b
-\u0939\u094b\u0924\u093e
-\u0939\u094b\u0924\u0940
-\u0939\u094b\u0924\u0947
-\u0939\u094b\u0928\u093e
-\u0939\u094b\u0928\u0947
-# additional normalized forms of the above
-\u0905\u092a\u0928\u093f
-\u091c\u0947\u0938\u0947
-\u0939\u094b\u0924\u093f
-\u0938\u092d\u093f
-\u0924\u093f\u0902\u0939\u094b\u0902
-\u0907\u0902\u0939\u094b\u0902
-\u0926\u0935\u093e\u0930\u093e
-\u0907\u0938\u093f
-\u0915\u093f\u0902\u0939\u0947\u0902
-\u0925\u093f
-\u0909\u0902\u0939\u094b\u0902
-\u0913\u0930
-\u091c\u093f\u0902\u0939\u0947\u0902
-\u0935\u0939\u093f\u0902
-\u0905\u092d\u093f
-\u092c\u0928\u093f
-\u0939\u093f
-\u0909\u0902\u0939\u093f\u0902
-\u0909\u0902\u0939\u0947\u0902
-\u0939\u0947\u0902
-\u0935\u0917\u0947\u0930\u0939
-\u090f\u0938\u0947
-\u0930\u0935\u093e\u0938\u093e
-\u0915\u094b\u0928
-\u0928\u093f\u091a\u0947
-\u0915\u093e\u092b\u093f
-\u0909\u0938\u093f
-\u092a\u0941\u0930\u093e
-\u092d\u093f\u0924\u0930
-\u0939\u0947
-\u092c\u0939\u093f
-\u0935\u0939\u093e\u0902
-\u0915\u094b\u0907
-\u092f\u0939\u093e\u0902
-\u091c\u093f\u0902\u0939\u094b\u0902
-\u0924\u093f\u0902\u0939\u0947\u0902
-\u0915\u093f\u0938\u093f
-\u0915\u0907
-\u092f\u0939\u093f
-\u0907\u0902\u0939\u093f\u0902
-\u091c\u093f\u0927\u0930
-\u0907\u0902\u0939\u0947\u0902
-\u0905\u0926\u093f
-\u0907\u0924\u092f\u093e\u0926\u093f
-\u0939\u0941\u0907
-\u0915\u094b\u0928\u0938\u093e
-\u0907\u0938\u0915\u093f
-\u0926\u0941\u0938\u0930\u0947
-\u091c\u0939\u093e\u0902
-\u0905\u092a
-\u0915\u093f\u0902\u0939\u094b\u0902
-\u0909\u0928\u0915\u093f
-\u092d\u093f
-\u0935\u0930\u0917
-\u0939\u0941\u0905
-\u091c\u0947\u0938\u093e
-\u0928\u0939\u093f\u0902
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt
deleted file mode 100644
index 1a96f1d..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hu.txt
+++ /dev/null
@@ -1,209 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
-| Hungarian stop word list
-| prepared by Anna Tordai
-
-a
-ahogy
-ahol
-aki
-akik
-akkor
-alatt
-�ltal
-�ltal�ban
-amely
-amelyek
-amelyekben
-amelyeket
-amelyet
-amelynek
-ami
-amit
-amolyan
-am�g
-amikor
-�t
-abban
-ahhoz
-annak
-arra
-arr�l
-az
-azok
-azon
-azt
-azzal
-az�rt
-azt�n
-azut�n
-azonban
-b�r
-be
-bel�l
-benne
-cikk
-cikkek
-cikkeket
-csak
-de
-e
-eddig
-eg�sz
-egy
-egyes
-egyetlen
-egy�b
-egyik
-egyre
-ekkor
-el
-el�g
-ellen
-el\u0151
-el\u0151sz�r
-el\u0151tt
-els\u0151
-�n
-�ppen
-ebben
-ehhez
-emilyen
-ennek
-erre
-ez
-ezt
-ezek
-ezen
-ezzel
-ez�rt
-�s
-fel
-fel�
-hanem
-hiszen
-hogy
-hogyan
-igen
-�gy
-illetve
-ill.
-ill
-ilyen
-ilyenkor
-ison
-ism�t
-itt
-j�
-j�l
-jobban
-kell
-kellett
-kereszt�l
-keress�nk
-ki
-k�v�l
-k�z�tt
-k�z�l
-legal�bb
-lehet
-lehetett
-legyen
-lenne
-lenni
-lesz
-lett
-maga
-mag�t
-majd
-majd
-m�r
-m�s
-m�sik
-meg
-m�g
-mellett
-mert
-mely
-melyek
-mi
-mit
-m�g
-mi�rt
-milyen
-mikor
-minden
-mindent
-mindenki
-mindig
-mint
-mintha
-mivel
-most
-nagy
-nagyobb
-nagyon
-ne
-n�ha
-nekem
-neki
-nem
-n�h�ny
-n�lk�l
-nincs
-olyan
-ott
-�ssze
-\u0151
-\u0151k
-\u0151ket
-pedig
-persze
-r�
-s
-saj�t
-sem
-semmi
-sok
-sokat
-sokkal
-sz�m�ra
-szemben
-szerint
-szinte
-tal�n
-teh�t
-teljes
-tov�bb
-tov�bb�
-t�bb
-�gy
-ugyanis
-�j
-�jabb
-�jra
-ut�n
-ut�na
-utols�
-vagy
-vagyis
-valaki
-valami
-valamint
-val�
-vagyok
-van
-vannak
-volt
-voltam
-voltak
-voltunk
-vissza
-vele
-viszont
-volna
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt
deleted file mode 100644
index 60c1c50..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_hy.txt
+++ /dev/null
@@ -1,46 +0,0 @@
-# example set of Armenian stopwords.
-\u0561\u0575\u0564
-\u0561\u0575\u056c
-\u0561\u0575\u0576
-\u0561\u0575\u057d
-\u0564\u0578\u0582
-\u0564\u0578\u0582\u0584
-\u0565\u0574
-\u0565\u0576
-\u0565\u0576\u0584
-\u0565\u057d
-\u0565\u0584
-\u0567
-\u0567\u056b
-\u0567\u056b\u0576
-\u0567\u056b\u0576\u0584
-\u0567\u056b\u0580
-\u0567\u056b\u0584
-\u0567\u0580
-\u0568\u057d\u057f
-\u0569
-\u056b
-\u056b\u0576
-\u056b\u057d\u056f
-\u056b\u0580
-\u056f\u0561\u0574
-\u0570\u0561\u0574\u0561\u0580
-\u0570\u0565\u057f
-\u0570\u0565\u057f\u0578
-\u0574\u0565\u0576\u0584
-\u0574\u0565\u057b
-\u0574\u056b
-\u0576
-\u0576\u0561
-\u0576\u0561\u0587
-\u0576\u0580\u0561
-\u0576\u0580\u0561\u0576\u0584
-\u0578\u0580
-\u0578\u0580\u0568
-\u0578\u0580\u0578\u0576\u0584
-\u0578\u0580\u057a\u0565\u057d
-\u0578\u0582
-\u0578\u0582\u0574
-\u057a\u056b\u057f\u056b
-\u057e\u0580\u0561
-\u0587
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt
deleted file mode 100644
index 4617f83..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_id.txt
+++ /dev/null
@@ -1,359 +0,0 @@
-# from appendix D of: A Study of Stemming Effects on Information
-# Retrieval in Bahasa Indonesia
-ada
-adanya
-adalah
-adapun
-agak
-agaknya
-agar
-akan
-akankah
-akhirnya
-aku
-akulah
-amat
-amatlah
-anda
-andalah
-antar
-diantaranya
-antara
-antaranya
-diantara
-apa
-apaan
-mengapa
-apabila
-apakah
-apalagi
-apatah
-atau
-ataukah
-ataupun
-bagai
-bagaikan
-sebagai
-sebagainya
-bagaimana
-bagaimanapun
-sebagaimana
-bagaimanakah
-bagi
-bahkan
-bahwa
-bahwasanya
-sebaliknya
-banyak
-sebanyak
-beberapa
-seberapa
-begini
-beginian
-beginikah
-beginilah
-sebegini
-begitu
-begitukah
-begitulah
-begitupun
-sebegitu
-belum
-belumlah
-sebelum
-sebelumnya
-sebenarnya
-berapa
-berapakah
-berapalah
-berapapun
-betulkah
-sebetulnya
-biasa
-biasanya
-bila
-bilakah
-bisa
-bisakah
-sebisanya
-boleh
-bolehkah
-bolehlah
-buat
-bukan
-bukankah
-bukanlah
-bukannya
-cuma
-percuma
-dahulu
-dalam
-dan
-dapat
-dari
-daripada
-dekat
-demi
-demikian
-demikianlah
-sedemikian
-dengan
-depan
-di
-dia
-dialah
-dini
-diri
-dirinya
-terdiri
-dong
-dulu
-enggak
-enggaknya
-entah
-entahlah
-terhadap
-terhadapnya
-hal
-hampir
-hanya
-hanyalah
-harus
-haruslah
-harusnya
-seharusnya
-hendak
-hendaklah
-hendaknya
-hingga
-sehingga
-ia
-ialah
-ibarat
-ingin
-inginkah
-inginkan
-ini
-inikah
-inilah
-itu
-itukah
-itulah
-jangan
-jangankan
-janganlah
-jika
-jikalau
-juga
-justru
-kala
-kalau
-kalaulah
-kalaupun
-kalian
-kami
-kamilah
-kamu
-kamulah
-kan
-kapan
-kapankah
-kapanpun
-dikarenakan
-karena
-karenanya
-ke
-kecil
-kemudian
-kenapa
-kepada
-kepadanya
-ketika
-seketika
-khususnya
-kini
-kinilah
-kiranya
-sekiranya
-kita
-kitalah
-kok
-lagi
-lagian
-selagi
-lah
-lain
-lainnya
-melainkan
-selaku
-lalu
-melalui
-terlalu
-lama
-lamanya
-selama
-selama
-selamanya
-lebih
-terlebih
-bermacam
-macam
-semacam
-maka
-makanya
-makin
-malah
-malahan
-mampu
-mampukah
-mana
-manakala
-manalagi
-masih
-masihkah
-semasih
-masing
-mau
-maupun
-semaunya
-memang
-mereka
-merekalah
-meski
-meskipun
-semula
-mungkin
-mungkinkah
-nah
-namun
-nanti
-nantinya
-nyaris
-oleh
-olehnya
-seorang
-seseorang
-pada
-padanya
-padahal
-paling
-sepanjang
-pantas
-sepantasnya
-sepantasnyalah
-para
-pasti
-pastilah
-per
-pernah
-pula
-pun
-merupakan
-rupanya
-serupa
-saat
-saatnya
-sesaat
-saja
-sajalah
-saling
-bersama
-sama
-sesama
-sambil
-sampai
-sana
-sangat
-sangatlah
-saya
-sayalah
-se
-sebab
-sebabnya
-sebuah
-tersebut
-tersebutlah
-sedang
-sedangkan
-sedikit
-sedikitnya
-segala
-segalanya
-segera
-sesegera
-sejak
-sejenak
-sekali
-sekalian
-sekalipun
-sesekali
-sekaligus
-sekarang
-sekarang
-sekitar
-sekitarnya
-sela
-selain
-selalu
-seluruh
-seluruhnya
-semakin
-sementara
-sempat
-semua
-semuanya
-sendiri
-sendirinya
-seolah
-seperti
-sepertinya
-sering
-seringnya
-serta
-siapa
-siapakah
-siapapun
-disini
-disinilah
-sini
-sinilah
-sesuatu
-sesuatunya
-suatu
-sesudah
-sesudahnya
-sudah
-sudahkah
-sudahlah
-supaya
-tadi
-tadinya
-tak
-tanpa
-setelah
-telah
-tentang
-tentu
-tentulah
-tentunya
-tertentu
-seterusnya
-tapi
-tetapi
-setiap
-tiap
-setidaknya
-tidak
-tidakkah
-tidaklah
-toh
-waduh
-wah
-wahai
-sewaktu
-walau
-walaupun
-wong
-yaitu
-yakni
-yang
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt
deleted file mode 100644
index 4cb5b08..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_it.txt
+++ /dev/null
@@ -1,301 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | An Italian stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
-ad | a (to) before vowel
-al | a + il
-allo | a + lo
-ai | a + i
-agli | a + gli
-all | a + l'
-agl | a + gl'
-alla | a + la
-alle | a + le
-con | with
-col | con + il
-coi | con + i (forms collo, cogli etc are now very rare)
-da | from
-dal | da + il
-dallo | da + lo
-dai | da + i
-dagli | da + gli
-dall | da + l'
-dagl | da + gll'
-dalla | da + la
-dalle | da + le
-di | of
-del | di + il
-dello | di + lo
-dei | di + i
-degli | di + gli
-dell | di + l'
-degl | di + gl'
-della | di + la
-delle | di + le
-in | in
-nel | in + el
-nello | in + lo
-nei | in + i
-negli | in + gli
-nell | in + l'
-negl | in + gl'
-nella | in + la
-nelle | in + le
-su | on
-sul | su + il
-sullo | su + lo
-sui | su + i
-sugli | su + gli
-sull | su + l'
-sugl | su + gl'
-sulla | su + la
-sulle | su + le
-per | through, by
-tra | among
-contro | against
-io | I
-tu | thou
-lui | he
-lei | she
-noi | we
-voi | you
-loro | they
-mio | my
-mia |
-miei |
-mie |
-tuo |
-tua |
-tuoi | thy
-tue |
-suo |
-sua |
-suoi | his, her
-sue |
-nostro | our
-nostra |
-nostri |
-nostre |
-vostro | your
-vostra |
-vostri |
-vostre |
-mi | me
-ti | thee
-ci | us, there
-vi | you, there
-lo | him, the
-la | her, the
-li | them
-le | them, the
-gli | to him, the
-ne | from there etc
-il | the
-un | a
-uno | a
-una | a
-ma | but
-ed | and
-se | if
-perch� | why, because
-anche | also
-come | how
-dov | where (as dov')
-dove | where
-che | who, that
-chi | who
-cui | whom
-non | not
-pi� | more
-quale | who, that
-quanto | how much
-quanti |
-quanta |
-quante |
-quello | that
-quelli |
-quella |
-quelle |
-questo | this
-questi |
-questa |
-queste |
-si | yes
-tutto | all
-tutti | all
-
- | single letter forms:
-
-a | at
-c | as c' for ce or ci
-e | and
-i | the
-l | as l'
-o | or
-
- | forms of avere, to have (not including the infinitive):
-
-ho
-hai
-ha
-abbiamo
-avete
-hanno
-abbia
-abbiate
-abbiano
-avr�
-avrai
-avr�
-avremo
-avrete
-avranno
-avrei
-avresti
-avrebbe
-avremmo
-avreste
-avrebbero
-avevo
-avevi
-aveva
-avevamo
-avevate
-avevano
-ebbi
-avesti
-ebbe
-avemmo
-aveste
-ebbero
-avessi
-avesse
-avessimo
-avessero
-avendo
-avuto
-avuta
-avuti
-avute
-
- | forms of essere, to be (not including the infinitive):
-sono
-sei
-�
-siamo
-siete
-sia
-siate
-siano
-sar�
-sarai
-sar�
-saremo
-sarete
-saranno
-sarei
-saresti
-sarebbe
-saremmo
-sareste
-sarebbero
-ero
-eri
-era
-eravamo
-eravate
-erano
-fui
-fosti
-fu
-fummo
-foste
-furono
-fossi
-fosse
-fossimo
-fossero
-essendo
-
- | forms of fare, to do (not including the infinitive, fa, fat-):
-faccio
-fai
-facciamo
-fanno
-faccia
-facciate
-facciano
-far�
-farai
-far�
-faremo
-farete
-faranno
-farei
-faresti
-farebbe
-faremmo
-fareste
-farebbero
-facevo
-facevi
-faceva
-facevamo
-facevate
-facevano
-feci
-facesti
-fece
-facemmo
-faceste
-fecero
-facessi
-facesse
-facessimo
-facessero
-facendo
-
- | forms of stare, to be (not including the infinitive):
-sto
-stai
-sta
-stiamo
-stanno
-stia
-stiate
-stiano
-star�
-starai
-star�
-staremo
-starete
-staranno
-starei
-staresti
-starebbe
-staremmo
-stareste
-starebbero
-stavo
-stavi
-stava
-stavamo
-stavate
-stavano
-stetti
-stesti
-stette
-stemmo
-steste
-stettero
-stessi
-stesse
-stessimo
-stessero
-stando
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt
deleted file mode 100644
index d4321be..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_ja.txt
+++ /dev/null
@@ -1,127 +0,0 @@
-#
-# This file defines a stopword set for Japanese.
-#
-# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
-# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
-# for frequency lists, etc. that can be useful for making your own set (if desired)
-#
-# Note that there is an overlap between these stopwords and the terms stopped when used
-# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
-# that comments are not allowed on the same line as stopwords.
-#
-# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
-# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
-# using the same character width as the entries in this file. Since this StopFilter is
-# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
-# entries to be in half-width and your kana entries to be in full-width.
-#
-\u306e
-\u306b
-\u306f
-\u3092
-\u305f
-\u304c
-\u3067
-\u3066
-\u3068
-\u3057
-\u308c
-\u3055
-\u3042\u308b
-\u3044\u308b
-\u3082
-\u3059\u308b
-\u304b\u3089
-\u306a
-\u3053\u3068
-\u3068\u3057\u3066
-\u3044
-\u3084
-\u308c\u308b
-\u306a\u3069
-\u306a\u3063
-\u306a\u3044
-\u3053\u306e
-\u305f\u3081
-\u305d\u306e
-\u3042\u3063
-\u3088\u3046
-\u307e\u305f
-\u3082\u306e
-\u3068\u3044\u3046
-\u3042\u308a
-\u307e\u3067
-\u3089\u308c
-\u306a\u308b
-\u3078
-\u304b
-\u3060
-\u3053\u308c
-\u306b\u3088\u3063\u3066
-\u306b\u3088\u308a
-\u304a\u308a
-\u3088\u308a
-\u306b\u3088\u308b
-\u305a
-\u306a\u308a
-\u3089\u308c\u308b
-\u306b\u304a\u3044\u3066
-\u3070
-\u306a\u304b\u3063
-\u306a\u304f
-\u3057\u304b\u3057
-\u306b\u3064\u3044\u3066
-\u305b
-\u3060\u3063
-\u305d\u306e\u5f8c
-\u3067\u304d\u308b
-\u305d\u308c
-\u3046
-\u306e\u3067
-\u306a\u304a
-\u306e\u307f
-\u3067\u304d
-\u304d
-\u3064
-\u306b\u304a\u3051\u308b
-\u304a\u3088\u3073
-\u3044\u3046
-\u3055\u3089\u306b
-\u3067\u3082
-\u3089
-\u305f\u308a
-\u305d\u306e\u4ed6
-\u306b\u95a2\u3059\u308b
-\u305f\u3061
-\u307e\u3059
-\u3093
-\u306a\u3089
-\u306b\u5bfe\u3057\u3066
-\u7279\u306b
-\u305b\u308b
-\u53ca\u3073
-\u3053\u308c\u3089
-\u3068\u304d
-\u3067\u306f
-\u306b\u3066
-\u307b\u304b
-\u306a\u304c\u3089
-\u3046\u3061
-\u305d\u3057\u3066
-\u3068\u3068\u3082\u306b
-\u305f\u3060\u3057
-\u304b\u3064\u3066
-\u305d\u308c\u305e\u308c
-\u307e\u305f\u306f
-\u304a
-\u307b\u3069
-\u3082\u306e\u306e
-\u306b\u5bfe\u3059\u308b
-\u307b\u3068\u3093\u3069
-\u3068\u5171\u306b
-\u3068\u3044\u3063\u305f
-\u3067\u3059
-\u3068\u3082
-\u3068\u3053\u308d
-\u3053\u3053
-##### End of file
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt
deleted file mode 100644
index e21a23c..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_lv.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
-# the original list of over 800 forms was refined:
-# pronouns, adverbs, interjections were removed
-#
-# prepositions
-aiz
-ap
-ar
-apak\u0161
-\u0101rpus
-aug\u0161pus
-bez
-caur
-d\u0113\u013c
-gar
-iek\u0161
-iz
-kop\u0161
-labad
-lejpus
-l\u012bdz
-no
-otrpus
-pa
-par
-p\u0101r
-p\u0113c
-pie
-pirms
-pret
-priek\u0161
-starp
-\u0161aipus
-uz
-vi\u0146pus
-virs
-virspus
-zem
-apak\u0161pus
-# Conjunctions
-un
-bet
-jo
-ja
-ka
-lai
-tom\u0113r
-tikko
-turpret\u012b
-ar\u012b
-kaut
-gan
-t\u0101d\u0113\u013c
-t\u0101
-ne
-tikvien
-vien
-k\u0101
-ir
-te
-vai
-kam\u0113r
-# Particles
-ar
-diezin
-dro\u0161i
-diem\u017e\u0113l
-neb\u016bt
-ik
-it
-ta\u010du
-nu
-pat
-tiklab
-iek\u0161pus
-nedz
-tik
-nevis
-turpretim
-jeb
-iekam
-iek\u0101m
-iek\u0101ms
-kol\u012bdz
-l\u012bdzko
-tikl\u012bdz
-jeb\u0161u
-t\u0101lab
-t\u0101p\u0113c
-nek\u0101
-itin
-j\u0101
-jau
-jel
-n\u0113
-nezin
-tad
-tikai
-vis
-tak
-iekams
-vien
-# modal verbs
-b\u016bt
-biju
-biji
-bija
-bij\u0101m
-bij\u0101t
-esmu
-esi
-esam
-esat
-b\u016b\u0161u
-b\u016bsi
-b\u016bs
-b\u016bsim
-b\u016bsiet
-tikt
-tiku
-tiki
-tika
-tik\u0101m
-tik\u0101t
-tieku
-tiec
-tiek
-tiekam
-tiekat
-tik\u0161u
-tiks
-tiksim
-tiksiet
-tapt
-tapi
-tap\u0101t
-topat
-tap\u0161u
-tapsi
-taps
-tapsim
-tapsiet
-k\u013c\u016bt
-k\u013cuvu
-k\u013cuvi
-k\u013cuva
-k\u013cuv\u0101m
-k\u013cuv\u0101t
-k\u013c\u016bstu
-k\u013c\u016bsti
-k\u013c\u016bst
-k\u013c\u016bstam
-k\u013c\u016bstat
-k\u013c\u016b\u0161u
-k\u013c\u016bsi
-k\u013c\u016bs
-k\u013c\u016bsim
-k\u013c\u016bsiet
-# verbs
-var\u0113t
-var\u0113ju
-var\u0113j\u0101m
-var\u0113\u0161u
-var\u0113sim
-var
-var\u0113ji
-var\u0113j\u0101t
-var\u0113si
-var\u0113siet
-varat
-var\u0113ja
-var\u0113s
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/53e5f34f/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt
----------------------------------------------------------------------
diff --git a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt b/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt
deleted file mode 100644
index f4d61f5..0000000
--- a/solr/contrib/morphlines-core/src/test-files/solr/mrunit/conf/lang/stopwords_nl.txt
+++ /dev/null
@@ -1,117 +0,0 @@
- | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
- | This file is distributed under the BSD License.
- | See http://snowball.tartarus.org/license.php
- | Also see http://www.opensource.org/licenses/bsd-license.html
- | - Encoding was converted to UTF-8.
- | - This notice was added.
-
- | A Dutch stop word list. Comments begin with vertical bar. Each stop
- | word is at the start of a line.
-
- | This is a ranked list (commonest to rarest) of stopwords derived from
- | a large sample of Dutch text.
-
- | Dutch stop words frequently exhibit homonym clashes. These are indicated
- | clearly below.
-
-de | the
-en | and
-van | of, from
-ik | I, the ego
-te | (1) chez, at etc, (2) to, (3) too
-dat | that, which
-die | that, those, who, which
-in | in, inside
-een | a, an, one
-hij | he
-het | the, it
-niet | not, nothing, naught
-zijn | (1) to be, being, (2) his, one's, its
-is | is
-was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
-op | on, upon, at, in, up, used up
-aan | on, upon, to (as dative)
-met | with, by
-als | like, such as, when
-voor | (1) before, in front of, (2) furrow
-had | had, past tense all persons sing. of 'hebben' (have)
-er | there
-maar | but, only
-om | round, about, for etc
-hem | him
-dan | then
-zou | should/would, past tense all persons sing. of 'zullen'
-of | or, whether, if
-wat | what, something, anything
-mijn | possessive and noun 'mine'
-men | people, 'one'
-dit | this
-zo | so, thus, in this way
-door | through by
-over | over, across
-ze | she, her, they, them
-zich | oneself
-bij | (1) a bee, (2) by, near, at
-ook | also, too
-tot | till, until
-je | you
-mij | me
-uit | out of, from
-der | Old Dutch form of 'van der' still found in surnames
-daar | (1) there, (2) because
-haar | (1) her, their, them, (2) hair
-naar | (1) unpleasant, unwell etc, (2) towards, (3) as
-heb | present first person sing. of 'to have'
-hoe | how, why
-heeft | present third person sing. of 'to have'
-hebben | 'to have' and various parts thereof
-deze | this
-u | you
-want | (1) for, (2) mitten, (3) rigging
-nog | yet, still
-zal | 'shall', first and third person sing. of verb 'zullen' (will)
-me | me
-zij | she, they
-nu | now
-ge | 'thou', still used in Belgium and south Netherlands
-geen | none
-omdat | because
-iets | something, somewhat
-worden | to become, grow, get
-toch | yet, still
-al | all, every, each
-waren | (1) 'were' (2) to wander, (3) wares, (3)
-veel | much, many
-meer | (1) more, (2) lake
-doen | to do, to make
-toen | then, when
-moet | noun 'spot/mote' and present form of 'to must'
-ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
-zonder | without
-kan | noun 'can' and present form of 'to be able'
-hun | their, them
-dus | so, consequently
-alles | all, everything, anything
-onder | under, beneath
-ja | yes, of course
-eens | once, one day
-hier | here
-wie | who
-werd | imperfect third person sing. of 'become'
-altijd | always
-doch | yet, but etc
-wordt | present third person sing. of 'become'
-wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
-kunnen | to be able
-ons | us/our
-zelf | self
-tegen | against, towards, at
-na | after, near
-reeds | already
-wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
-kon | could; past tense of 'to be able'
-niets | nothing
-uw | your
-iemand | somebody
-geweest | been; past participle of 'be'
-andere | other