You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2016/07/08 19:45:59 UTC
[3/4] lucene-solr:branch_6x: SOLR-9163: sync basic_configs w/
data_driven_schema_configs
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_lv.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_lv.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_lv.txt
new file mode 100644
index 0000000..e21a23c
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_lv.txt
@@ -0,0 +1,172 @@
+# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
+# the original list of over 800 forms was refined:
+# pronouns, adverbs, interjections were removed
+#
+# prepositions
+aiz
+ap
+ar
+apak\u0161
+\u0101rpus
+aug\u0161pus
+bez
+caur
+d\u0113\u013c
+gar
+iek\u0161
+iz
+kop\u0161
+labad
+lejpus
+l\u012bdz
+no
+otrpus
+pa
+par
+p\u0101r
+p\u0113c
+pie
+pirms
+pret
+priek\u0161
+starp
+\u0161aipus
+uz
+vi\u0146pus
+virs
+virspus
+zem
+apak\u0161pus
+# Conjunctions
+un
+bet
+jo
+ja
+ka
+lai
+tom\u0113r
+tikko
+turpret\u012b
+ar\u012b
+kaut
+gan
+t\u0101d\u0113\u013c
+t\u0101
+ne
+tikvien
+vien
+k\u0101
+ir
+te
+vai
+kam\u0113r
+# Particles
+ar
+diezin
+dro\u0161i
+diem\u017e\u0113l
+neb\u016bt
+ik
+it
+ta\u010du
+nu
+pat
+tiklab
+iek\u0161pus
+nedz
+tik
+nevis
+turpretim
+jeb
+iekam
+iek\u0101m
+iek\u0101ms
+kol\u012bdz
+l\u012bdzko
+tikl\u012bdz
+jeb\u0161u
+t\u0101lab
+t\u0101p\u0113c
+nek\u0101
+itin
+j\u0101
+jau
+jel
+n\u0113
+nezin
+tad
+tikai
+vis
+tak
+iekams
+vien
+# modal verbs
+b\u016bt
+biju
+biji
+bija
+bij\u0101m
+bij\u0101t
+esmu
+esi
+esam
+esat
+b\u016b\u0161u
+b\u016bsi
+b\u016bs
+b\u016bsim
+b\u016bsiet
+tikt
+tiku
+tiki
+tika
+tik\u0101m
+tik\u0101t
+tieku
+tiec
+tiek
+tiekam
+tiekat
+tik\u0161u
+tiks
+tiksim
+tiksiet
+tapt
+tapi
+tap\u0101t
+topat
+tap\u0161u
+tapsi
+taps
+tapsim
+tapsiet
+k\u013c\u016bt
+k\u013cuvu
+k\u013cuvi
+k\u013cuva
+k\u013cuv\u0101m
+k\u013cuv\u0101t
+k\u013c\u016bstu
+k\u013c\u016bsti
+k\u013c\u016bst
+k\u013c\u016bstam
+k\u013c\u016bstat
+k\u013c\u016b\u0161u
+k\u013c\u016bsi
+k\u013c\u016bs
+k\u013c\u016bsim
+k\u013c\u016bsiet
+# verbs
+var\u0113t
+var\u0113ju
+var\u0113j\u0101m
+var\u0113\u0161u
+var\u0113sim
+var
+var\u0113ji
+var\u0113j\u0101t
+var\u0113si
+var\u0113siet
+varat
+var\u0113ja
+var\u0113s
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_nl.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_nl.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_nl.txt
new file mode 100644
index 0000000..47a2aea
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_nl.txt
@@ -0,0 +1,119 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de | the
+en | and
+van | of, from
+ik | I, the ego
+te | (1) chez, at etc, (2) to, (3) too
+dat | that, which
+die | that, those, who, which
+in | in, inside
+een | a, an, one
+hij | he
+het | the, it
+niet | not, nothing, naught
+zijn | (1) to be, being, (2) his, one's, its
+is | is
+was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op | on, upon, at, in, up, used up
+aan | on, upon, to (as dative)
+met | with, by
+als | like, such as, when
+voor | (1) before, in front of, (2) furrow
+had | had, past tense all persons sing. of 'hebben' (have)
+er | there
+maar | but, only
+om | round, about, for etc
+hem | him
+dan | then
+zou | should/would, past tense all persons sing. of 'zullen'
+of | or, whether, if
+wat | what, something, anything
+mijn | possessive and noun 'mine'
+men | people, 'one'
+dit | this
+zo | so, thus, in this way
+door | through by
+over | over, across
+ze | she, her, they, them
+zich | oneself
+bij | (1) a bee, (2) by, near, at
+ook | also, too
+tot | till, until
+je | you
+mij | me
+uit | out of, from
+der | Old Dutch form of 'van der' still found in surnames
+daar | (1) there, (2) because
+haar | (1) her, their, them, (2) hair
+naar | (1) unpleasant, unwell etc, (2) towards, (3) as
+heb | present first person sing. of 'to have'
+hoe | how, why
+heeft | present third person sing. of 'to have'
+hebben | 'to have' and various parts thereof
+deze | this
+u | you
+want | (1) for, (2) mitten, (3) rigging
+nog | yet, still
+zal | 'shall', first and third person sing. of verb 'zullen' (will)
+me | me
+zij | she, they
+nu | now
+ge | 'thou', still used in Belgium and south Netherlands
+geen | none
+omdat | because
+iets | something, somewhat
+worden | to become, grow, get
+toch | yet, still
+al | all, every, each
+waren | (1) 'were' (2) to wander, (3) wares, (3)
+veel | much, many
+meer | (1) more, (2) lake
+doen | to do, to make
+toen | then, when
+moet | noun 'spot/mote' and present form of 'to must'
+ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder | without
+kan | noun 'can' and present form of 'to be able'
+hun | their, them
+dus | so, consequently
+alles | all, everything, anything
+onder | under, beneath
+ja | yes, of course
+eens | once, one day
+hier | here
+wie | who
+werd | imperfect third person sing. of 'become'
+altijd | always
+doch | yet, but etc
+wordt | present third person sing. of 'become'
+wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen | to be able
+ons | us/our
+zelf | self
+tegen | against, towards, at
+na | after, near
+reeds | already
+wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon | could; past tense of 'to be able'
+niets | nothing
+uw | your
+iemand | somebody
+geweest | been; past participle of 'be'
+andere | other
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_no.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_no.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_no.txt
new file mode 100644
index 0000000..a7a2c28
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_no.txt
@@ -0,0 +1,194 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokm�l dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard <Ja...@ssb.no>, Jan 2005
+
+og | and
+i | in
+jeg | I
+det | it/this/that
+at | to (w. inf.)
+en | a/an
+et | a/an
+den | it/this/that
+til | to
+er | is/am/are
+som | who/that
+p� | on
+de | they / you(formal)
+med | with
+han | he
+av | of
+ikke | not
+ikkje | not *
+der | there
+s� | so
+var | was/were
+meg | me
+seg | you
+men | but
+ett | one
+har | have
+om | about
+vi | we
+min | my
+mitt | my
+ha | have
+hadde | had
+hun | she
+n� | now
+over | over
+da | when/as
+ved | by/know
+fra | from
+du | you
+ut | out
+sin | your
+dem | them
+oss | us
+opp | up
+man | you/one
+kan | can
+hans | his
+hvor | where
+eller | or
+hva | what
+skal | shall/must
+selv | self (reflective)
+sj�l | self (reflective)
+her | here
+alle | all
+vil | will
+bli | become
+ble | became
+blei | became *
+blitt | have become
+kunne | could
+inn | in
+n�r | when
+v�re | be
+kom | come
+noen | some
+noe | some
+ville | would
+dere | you
+som | who/which/that
+deres | their/theirs
+kun | only/just
+ja | yes
+etter | after
+ned | down
+skulle | should
+denne | this
+for | for/because
+deg | you
+si | hers/his
+sine | hers/his
+sitt | hers/his
+mot | against
+� | to
+meget | much
+hvorfor | why
+dette | this
+disse | these/those
+uten | without
+hvordan | how
+ingen | none
+din | your
+ditt | your
+blir | become
+samme | same
+hvilken | which
+hvilke | which (plural)
+s�nn | such a
+inni | inside/within
+mellom | between
+v�r | our
+hver | each
+hvem | who
+vors | us/ours
+hvis | whose
+b�de | both
+bare | only/just
+enn | than
+fordi | as/because
+f�r | before
+mange | many
+ogs� | also
+slik | just
+v�rt | been
+v�re | to be
+b�e | both *
+begge | both
+siden | since
+dykk | your *
+dykkar | yours *
+dei | they *
+deira | them *
+deires | theirs *
+deim | them *
+di | your (fem.) *
+d� | as/when *
+eg | I *
+ein | a/an *
+eit | a/an *
+eitt | a/an *
+elles | or *
+honom | he *
+hj� | at *
+ho | she *
+hoe | she *
+henne | her
+hennar | her/hers
+hennes | hers
+hoss | how *
+hossen | how *
+ikkje | not *
+ingi | noone *
+inkje | noone *
+korleis | how *
+korso | how *
+kva | what/which *
+kvar | where *
+kvarhelst | where *
+kven | who/whom *
+kvi | why *
+kvifor | why *
+me | we *
+medan | while *
+mi | my *
+mine | my *
+mykje | much *
+no | now *
+nokon | some (masc./neut.) *
+noka | some (fem.) *
+nokor | some *
+noko | some *
+nokre | some *
+si | his/hers *
+sia | since *
+sidan | since *
+so | so *
+somt | some *
+somme | some *
+um | about*
+upp | up *
+vere | be *
+vore | was *
+verte | become *
+vort | become *
+varte | became *
+vart | became *
+
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_pt.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_pt.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_pt.txt
new file mode 100644
index 0000000..acfeb01
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_pt.txt
@@ -0,0 +1,253 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | of, from
+a | the; to, at; her
+o | the; him
+que | who, that
+e | and
+do | de + o
+da | de + a
+em | in
+um | a
+para | for
+ | � from SER
+com | with
+n�o | not, no
+uma | a
+os | the; them
+no | em + o
+se | himself etc
+na | em + a
+por | for
+mais | more
+as | the; them
+dos | de + os
+como | as, like
+mas | but
+ | foi from SER
+ao | a + o
+ele | he
+das | de + as
+ | tem from TER
+� | a + a
+seu | his
+sua | her
+ou | or
+ | ser from SER
+quando | when
+muito | much
+ | h� from HAV
+nos | em + os; us
+j� | already, now
+ | est� from EST
+eu | I
+tamb�m | also
+s� | only, just
+pelo | per + o
+pela | per + a
+at� | up to
+isso | that
+ela | he
+entre | between
+ | era from SER
+depois | after
+sem | without
+mesmo | same
+aos | a + os
+ | ter from TER
+seus | his
+quem | whom
+nas | em + as
+me | me
+esse | that
+eles | they
+ | est�o from EST
+voc� | you
+ | tinha from TER
+ | foram from SER
+essa | that
+num | em + um
+nem | nor
+suas | her
+meu | my
+�s | a + as
+minha | my
+ | t�m from TER
+numa | em + uma
+pelos | per + os
+elas | they
+ | havia from HAV
+ | seja from SER
+qual | which
+ | ser� from SER
+n�s | we
+ | tenho from TER
+lhe | to him, her
+deles | of them
+essas | those
+esses | those
+pelas | per + as
+este | this
+ | fosse from SER
+dele | of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu | thou
+te | thee
+voc�s | you (plural)
+vos | you
+lhes | to them
+meus | my
+minhas
+teu | thy
+tua
+teus
+tuas
+nosso | our
+nossa
+nossos
+nossas
+
+dela | of her
+delas | of them
+
+esta | this
+estes | these
+estas | these
+aquele | that
+aquela | that
+aqueles | those
+aquelas | those
+isto | this
+aquilo | that
+
+ | forms of estar, to be (not including the infinitive):
+estou
+est�
+estamos
+est�o
+estive
+esteve
+estivemos
+estiveram
+estava
+est�vamos
+estavam
+estivera
+estiv�ramos
+esteja
+estejamos
+estejam
+estivesse
+estiv�ssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+ | forms of haver, to have (not including the infinitive):
+hei
+h�
+havemos
+h�o
+houve
+houvemos
+houveram
+houvera
+houv�ramos
+haja
+hajamos
+hajam
+houvesse
+houv�ssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houver�
+houveremos
+houver�o
+houveria
+houver�amos
+houveriam
+
+ | forms of ser, to be (not including the infinitive):
+sou
+somos
+s�o
+era
+�ramos
+eram
+fui
+foi
+fomos
+foram
+fora
+f�ramos
+seja
+sejamos
+sejam
+fosse
+f�ssemos
+fossem
+for
+formos
+forem
+serei
+ser�
+seremos
+ser�o
+seria
+ser�amos
+seriam
+
+ | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+t�m
+tinha
+t�nhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tiv�ramos
+tenha
+tenhamos
+tenham
+tivesse
+tiv�ssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+ter�
+teremos
+ter�o
+teria
+ter�amos
+teriam
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ro.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ro.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ro.txt
new file mode 100644
index 0000000..4fdee90
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ro.txt
@@ -0,0 +1,233 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+aceast\u0103
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+ace\u015fti
+ace\u015ftia
+acolo
+acum
+ai
+aia
+aib\u0103
+aici
+al
+\u0103la
+ale
+alea
+\u0103lea
+altceva
+altcineva
+am
+ar
+are
+a\u015f
+a\u015fadar
+asemenea
+asta
+\u0103sta
+ast\u0103zi
+astea
+\u0103stea
+\u0103\u015ftia
+asupra
+a\u0163i
+au
+avea
+avem
+ave\u0163i
+azi
+bine
+bucur
+bun\u0103
+ca
+c\u0103
+c\u0103ci
+c�nd
+care
+c\u0103rei
+c\u0103ror
+c\u0103rui
+c�t
+c�te
+c�\u0163i
+c\u0103tre
+c�tva
+ce
+cel
+ceva
+chiar
+c�nd
+cine
+cineva
+c�t
+c�te
+c�\u0163i
+c�tva
+contra
+cu
+cum
+cumva
+cur�nd
+cur�nd
+da
+d\u0103
+dac\u0103
+dar
+datorit\u0103
+de
+deci
+deja
+deoarece
+departe
+de\u015fi
+din
+dinaintea
+dintr
+dintre
+drept
+dup\u0103
+ea
+ei
+el
+ele
+eram
+este
+e\u015fti
+eu
+face
+f\u0103r\u0103
+fi
+fie
+fiecare
+fii
+fim
+fi\u0163i
+iar
+ieri
+�i
+�l
+�mi
+�mpotriva
+�n
+�nainte
+�naintea
+�nc�t
+�nc�t
+�ncotro
+�ntre
+�ntruc�t
+�ntruc�t
+�\u0163i
+la
+l�ng\u0103
+le
+li
+l�ng\u0103
+lor
+lui
+m\u0103
+m�ine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+mult\u0103
+mul\u0163i
+ne
+nic\u0103ieri
+nici
+nimeni
+ni\u015fte
+noastr\u0103
+noastre
+noi
+no\u015ftri
+nostru
+nu
+ori
+oric�nd
+oricare
+oric�t
+orice
+oric�nd
+oricine
+oric�t
+oricum
+oriunde
+p�n\u0103
+pe
+pentru
+peste
+p�n\u0103
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+s\u0103
+s\u0103i
+sale
+sau
+s\u0103u
+se
+\u015fi
+s�nt
+s�ntem
+s�nte\u0163i
+spre
+sub
+sunt
+suntem
+sunte\u0163i
+ta
+t\u0103i
+tale
+t\u0103u
+te
+\u0163i
+\u0163ie
+tine
+toat\u0103
+toate
+tot
+to\u0163i
+totu\u015fi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+v\u0103
+vi
+voastr\u0103
+voastre
+voi
+vo\u015ftri
+vostru
+vou\u0103
+vreo
+vreun
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ru.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ru.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ru.txt
new file mode 100644
index 0000000..5527140
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ru.txt
@@ -0,0 +1,243 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `\u0451' is translated to `\u0435'.
+
+\u0438 | and
+\u0432 | in/into
+\u0432\u043e | alternative form
+\u043d\u0435 | not
+\u0447\u0442\u043e | what/that
+\u043e\u043d | he
+\u043d\u0430 | on/onto
+\u044f | i
+\u0441 | from
+\u0441\u043e | alternative form
+\u043a\u0430\u043a | how
+\u0430 | milder form of `no' (but)
+\u0442\u043e | conjunction and form of `that'
+\u0432\u0441\u0435 | all
+\u043e\u043d\u0430 | she
+\u0442\u0430\u043a | so, thus
+\u0435\u0433\u043e | him
+\u043d\u043e | but
+\u0434\u0430 | yes/and
+\u0442\u044b | thou
+\u043a | towards, by
+\u0443 | around, chez
+\u0436\u0435 | intensifier particle
+\u0432\u044b | you
+\u0437\u0430 | beyond, behind
+\u0431\u044b | conditional/subj. particle
+\u043f\u043e | up to, along
+\u0442\u043e\u043b\u044c\u043a\u043e | only
+\u0435\u0435 | her
+\u043c\u043d\u0435 | to me
+\u0431\u044b\u043b\u043e | it was
+\u0432\u043e\u0442 | here is/are, particle
+\u043e\u0442 | away from
+\u043c\u0435\u043d\u044f | me
+\u0435\u0449\u0435 | still, yet, more
+\u043d\u0435\u0442 | no, there isnt/arent
+\u043e | about
+\u0438\u0437 | out of
+\u0435\u043c\u0443 | to him
+\u0442\u0435\u043f\u0435\u0440\u044c | now
+\u043a\u043e\u0433\u0434\u0430 | when
+\u0434\u0430\u0436\u0435 | even
+\u043d\u0443 | so, well
+\u0432\u0434\u0440\u0443\u0433 | suddenly
+\u043b\u0438 | interrogative particle
+\u0435\u0441\u043b\u0438 | if
+\u0443\u0436\u0435 | already, but homonym of `narrower'
+\u0438\u043b\u0438 | or
+\u043d\u0438 | neither
+\u0431\u044b\u0442\u044c | to be
+\u0431\u044b\u043b | he was
+\u043d\u0435\u0433\u043e | prepositional form of \u0435\u0433\u043e
+\u0434\u043e | up to
+\u0432\u0430\u0441 | you accusative
+\u043d\u0438\u0431\u0443\u0434\u044c | indef. suffix preceded by hyphen
+\u043e\u043f\u044f\u0442\u044c | again
+\u0443\u0436 | already, but homonym of `adder'
+\u0432\u0430\u043c | to you
+\u0441\u043a\u0430\u0437\u0430\u043b | he said
+\u0432\u0435\u0434\u044c | particle `after all'
+\u0442\u0430\u043c | there
+\u043f\u043e\u0442\u043e\u043c | then
+\u0441\u0435\u0431\u044f | oneself
+\u043d\u0438\u0447\u0435\u0433\u043e | nothing
+\u0435\u0439 | to her
+\u043c\u043e\u0436\u0435\u0442 | usually with `\u0431\u044b\u0442\u044c' as `maybe'
+\u043e\u043d\u0438 | they
+\u0442\u0443\u0442 | here
+\u0433\u0434\u0435 | where
+\u0435\u0441\u0442\u044c | there is/are
+\u043d\u0430\u0434\u043e | got to, must
+\u043d\u0435\u0439 | prepositional form of \u0435\u0439
+\u0434\u043b\u044f | for
+\u043c\u044b | we
+\u0442\u0435\u0431\u044f | thee
+\u0438\u0445 | them, their
+\u0447\u0435\u043c | than
+\u0431\u044b\u043b\u0430 | she was
+\u0441\u0430\u043c | self
+\u0447\u0442\u043e\u0431 | in order to
+\u0431\u0435\u0437 | without
+\u0431\u0443\u0434\u0442\u043e | as if
+\u0447\u0435\u043b\u043e\u0432\u0435\u043a | man, person, one
+\u0447\u0435\u0433\u043e | genitive form of `what'
+\u0440\u0430\u0437 | once
+\u0442\u043e\u0436\u0435 | also
+\u0441\u0435\u0431\u0435 | to oneself
+\u043f\u043e\u0434 | beneath
+\u0436\u0438\u0437\u043d\u044c | life
+\u0431\u0443\u0434\u0435\u0442 | will be
+\u0436 | short form of intensifer particle `\u0436\u0435'
+\u0442\u043e\u0433\u0434\u0430 | then
+\u043a\u0442\u043e | who
+\u044d\u0442\u043e\u0442 | this
+\u0433\u043e\u0432\u043e\u0440\u0438\u043b | was saying
+\u0442\u043e\u0433\u043e | genitive form of `that'
+\u043f\u043e\u0442\u043e\u043c\u0443 | for that reason
+\u044d\u0442\u043e\u0433\u043e | genitive form of `this'
+\u043a\u0430\u043a\u043e\u0439 | which
+\u0441\u043e\u0432\u0441\u0435\u043c | altogether
+\u043d\u0438\u043c | prepositional form of `\u0435\u0433\u043e', `\u043e\u043d\u0438'
+\u0437\u0434\u0435\u0441\u044c | here
+\u044d\u0442\u043e\u043c | prepositional form of `\u044d\u0442\u043e\u0442'
+\u043e\u0434\u0438\u043d | one
+\u043f\u043e\u0447\u0442\u0438 | almost
+\u043c\u043e\u0439 | my
+\u0442\u0435\u043c | instrumental/dative plural of `\u0442\u043e\u0442', `\u0442\u043e'
+\u0447\u0442\u043e\u0431\u044b | full form of `in order that'
+\u043d\u0435\u0435 | her (acc.)
+\u043a\u0430\u0436\u0435\u0442\u0441\u044f | it seems
+\u0441\u0435\u0439\u0447\u0430\u0441 | now
+\u0431\u044b\u043b\u0438 | they were
+\u043a\u0443\u0434\u0430 | where to
+\u0437\u0430\u0447\u0435\u043c | why
+\u0441\u043a\u0430\u0437\u0430\u0442\u044c | to say
+\u0432\u0441\u0435\u0445 | all (acc., gen. preposn. plural)
+\u043d\u0438\u043a\u043e\u0433\u0434\u0430 | never
+\u0441\u0435\u0433\u043e\u0434\u043d\u044f | today
+\u043c\u043e\u0436\u043d\u043e | possible, one can
+\u043f\u0440\u0438 | by
+\u043d\u0430\u043a\u043e\u043d\u0435\u0446 | finally
+\u0434\u0432\u0430 | two
+\u043e\u0431 | alternative form of `\u043e', about
+\u0434\u0440\u0443\u0433\u043e\u0439 | another
+\u0445\u043e\u0442\u044c | even
+\u043f\u043e\u0441\u043b\u0435 | after
+\u043d\u0430\u0434 | above
+\u0431\u043e\u043b\u044c\u0448\u0435 | more
+\u0442\u043e\u0442 | that one (masc.)
+\u0447\u0435\u0440\u0435\u0437 | across, in
+\u044d\u0442\u0438 | these
+\u043d\u0430\u0441 | us
+\u043f\u0440\u043e | about
+\u0432\u0441\u0435\u0433\u043e | in all, only, of all
+\u043d\u0438\u0445 | prepositional form of `\u043e\u043d\u0438' (they)
+\u043a\u0430\u043a\u0430\u044f | which, feminine
+\u043c\u043d\u043e\u0433\u043e | lots
+\u0440\u0430\u0437\u0432\u0435 | interrogative particle
+\u0441\u043a\u0430\u0437\u0430\u043b\u0430 | she said
+\u0442\u0440\u0438 | three
+\u044d\u0442\u0443 | this, acc. fem. sing.
+\u043c\u043e\u044f | my, feminine
+\u0432\u043f\u0440\u043e\u0447\u0435\u043c | moreover, besides
+\u0445\u043e\u0440\u043e\u0448\u043e | good
+\u0441\u0432\u043e\u044e | ones own, acc. fem. sing.
+\u044d\u0442\u043e\u0439 | oblique form of `\u044d\u0442\u0430', fem. `this'
+\u043f\u0435\u0440\u0435\u0434 | in front of
+\u0438\u043d\u043e\u0433\u0434\u0430 | sometimes
+\u043b\u0443\u0447\u0448\u0435 | better
+\u0447\u0443\u0442\u044c | a little
+\u0442\u043e\u043c | preposn. form of `that one'
+\u043d\u0435\u043b\u044c\u0437\u044f | one must not
+\u0442\u0430\u043a\u043e\u0439 | such a one
+\u0438\u043c | to them
+\u0431\u043e\u043b\u0435\u0435 | more
+\u0432\u0441\u0435\u0433\u0434\u0430 | always
+\u043a\u043e\u043d\u0435\u0447\u043d\u043e | of course
+\u0432\u0441\u044e | acc. fem. sing of `all'
+\u043c\u0435\u0436\u0434\u0443 | between
+
+
+ | b: some paradigms
+ |
+ | personal pronouns
+ |
+ | \u044f \u043c\u0435\u043d\u044f \u043c\u043d\u0435 \u043c\u043d\u043e\u0439 [\u043c\u043d\u043e\u044e]
+ | \u0442\u044b \u0442\u0435\u0431\u044f \u0442\u0435\u0431\u0435 \u0442\u043e\u0431\u043e\u0439 [\u0442\u043e\u0431\u043e\u044e]
+ | \u043e\u043d \u0435\u0433\u043e \u0435\u043c\u0443 \u0438\u043c [\u043d\u0435\u0433\u043e, \u043d\u0435\u043c\u0443, \u043d\u0438\u043c]
+ | \u043e\u043d\u0430 \u0435\u0435 \u044d\u0438 \u0435\u044e [\u043d\u0435\u0435, \u043d\u044d\u0438, \u043d\u0435\u044e]
+ | \u043e\u043d\u043e \u0435\u0433\u043e \u0435\u043c\u0443 \u0438\u043c [\u043d\u0435\u0433\u043e, \u043d\u0435\u043c\u0443, \u043d\u0438\u043c]
+ |
+ | \u043c\u044b \u043d\u0430\u0441 \u043d\u0430\u043c \u043d\u0430\u043c\u0438
+ | \u0432\u044b \u0432\u0430\u0441 \u0432\u0430\u043c \u0432\u0430\u043c\u0438
+ | \u043e\u043d\u0438 \u0438\u0445 \u0438\u043c \u0438\u043c\u0438 [\u043d\u0438\u0445, \u043d\u0438\u043c, \u043d\u0438\u043c\u0438]
+ |
+ | \u0441\u0435\u0431\u044f \u0441\u0435\u0431\u0435 \u0441\u043e\u0431\u043e\u0439 [\u0441\u043e\u0431\u043e\u044e]
+ |
+ | demonstrative pronouns: \u044d\u0442\u043e\u0442 (this), \u0442\u043e\u0442 (that)
+ |
+ | \u044d\u0442\u043e\u0442 \u044d\u0442\u0430 \u044d\u0442\u043e \u044d\u0442\u0438
+ | \u044d\u0442\u043e\u0433\u043e \u044d\u0442\u044b \u044d\u0442\u043e \u044d\u0442\u0438
+ | \u044d\u0442\u043e\u0433\u043e \u044d\u0442\u043e\u0439 \u044d\u0442\u043e\u0433\u043e \u044d\u0442\u0438\u0445
+ | \u044d\u0442\u043e\u043c\u0443 \u044d\u0442\u043e\u0439 \u044d\u0442\u043e\u043c\u0443 \u044d\u0442\u0438\u043c
+ | \u044d\u0442\u0438\u043c \u044d\u0442\u043e\u0439 \u044d\u0442\u0438\u043c [\u044d\u0442\u043e\u044e] \u044d\u0442\u0438\u043c\u0438
+ | \u044d\u0442\u043e\u043c \u044d\u0442\u043e\u0439 \u044d\u0442\u043e\u043c \u044d\u0442\u0438\u0445
+ |
+ | \u0442\u043e\u0442 \u0442\u0430 \u0442\u043e \u0442\u0435
+ | \u0442\u043e\u0433\u043e \u0442\u0443 \u0442\u043e \u0442\u0435
+ | \u0442\u043e\u0433\u043e \u0442\u043e\u0439 \u0442\u043e\u0433\u043e \u0442\u0435\u0445
+ | \u0442\u043e\u043c\u0443 \u0442\u043e\u0439 \u0442\u043e\u043c\u0443 \u0442\u0435\u043c
+ | \u0442\u0435\u043c \u0442\u043e\u0439 \u0442\u0435\u043c [\u0442\u043e\u044e] \u0442\u0435\u043c\u0438
+ | \u0442\u043e\u043c \u0442\u043e\u0439 \u0442\u043e\u043c \u0442\u0435\u0445
+ |
+ | determinative pronouns
+ |
+ | (a) \u0432\u0435\u0441\u044c (all)
+ |
+ | \u0432\u0435\u0441\u044c \u0432\u0441\u044f \u0432\u0441\u0435 \u0432\u0441\u0435
+ | \u0432\u0441\u0435\u0433\u043e \u0432\u0441\u044e \u0432\u0441\u0435 \u0432\u0441\u0435
+ | \u0432\u0441\u0435\u0433\u043e \u0432\u0441\u0435\u0439 \u0432\u0441\u0435\u0433\u043e \u0432\u0441\u0435\u0445
+ | \u0432\u0441\u0435\u043c\u0443 \u0432\u0441\u0435\u0439 \u0432\u0441\u0435\u043c\u0443 \u0432\u0441\u0435\u043c
+ | \u0432\u0441\u0435\u043c \u0432\u0441\u0435\u0439 \u0432\u0441\u0435\u043c [\u0432\u0441\u0435\u044e] \u0432\u0441\u0435\u043c\u0438
+ | \u0432\u0441\u0435\u043c \u0432\u0441\u0435\u0439 \u0432\u0441\u0435\u043c \u0432\u0441\u0435\u0445
+ |
+ | (b) \u0441\u0430\u043c (himself etc)
+ |
+ | \u0441\u0430\u043c \u0441\u0430\u043c\u0430 \u0441\u0430\u043c\u043e \u0441\u0430\u043c\u0438
+ | \u0441\u0430\u043c\u043e\u0433\u043e \u0441\u0430\u043c\u0443 \u0441\u0430\u043c\u043e \u0441\u0430\u043c\u0438\u0445
+ | \u0441\u0430\u043c\u043e\u0433\u043e \u0441\u0430\u043c\u043e\u0439 \u0441\u0430\u043c\u043e\u0433\u043e \u0441\u0430\u043c\u0438\u0445
+ | \u0441\u0430\u043c\u043e\u043c\u0443 \u0441\u0430\u043c\u043e\u0439 \u0441\u0430\u043c\u043e\u043c\u0443 \u0441\u0430\u043c\u0438\u043c
+ | \u0441\u0430\u043c\u0438\u043c \u0441\u0430\u043c\u043e\u0439 \u0441\u0430\u043c\u0438\u043c [\u0441\u0430\u043c\u043e\u044e] \u0441\u0430\u043c\u0438\u043c\u0438
+ | \u0441\u0430\u043c\u043e\u043c \u0441\u0430\u043c\u043e\u0439 \u0441\u0430\u043c\u043e\u043c \u0441\u0430\u043c\u0438\u0445
+ |
+ | stems of verbs `to be', `to have', `to do' and modal
+ |
+ | \u0431\u044b\u0442\u044c \u0431\u044b \u0431\u0443\u0434 \u0431\u044b\u0432 \u0435\u0441\u0442\u044c \u0441\u0443\u0442\u044c
+ | \u0438\u043c\u0435
+ | \u0434\u0435\u043b
+ | \u043c\u043e\u0433 \u043c\u043e\u0436 \u043c\u043e\u0447\u044c
+ | \u0443\u043c\u0435
+ | \u0445\u043e\u0447 \u0445\u043e\u0442
+ | \u0434\u043e\u043b\u0436
+ | \u043c\u043e\u0436\u043d
+ | \u043d\u0443\u0436\u043d
+ | \u043d\u0435\u043b\u044c\u0437\u044f
+
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_sv.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_sv.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_sv.txt
new file mode 100644
index 0000000..096f87f
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_sv.txt
@@ -0,0 +1,133 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ | s� = so, but also seed. These are indicated clearly below.
+
+och | and
+det | it, this/that
+att | to (with infinitive)
+i | in, at
+en | a
+jag | I
+hon | she
+som | who, that
+han | he
+p� | on
+den | it, this/that
+med | with
+var | where, each
+sig | him(self) etc
+f�r | for
+s� | so (also: seed)
+till | to
+�r | is
+men | but
+ett | a
+om | if; around, about
+hade | had
+de | they, these/those
+av | of
+icke | not, no
+mig | me
+du | you
+henne | her
+d� | then, when
+sin | his
+nu | now
+har | have
+inte | inte n�gon = no one
+hans | his
+honom | him
+skulle | 'sake'
+hennes | her
+d�r | there
+min | my
+man | one (pronoun)
+ej | nor
+vid | at, by, on (also: vast)
+kunde | could
+n�got | some etc
+fr�n | from, off
+ut | out
+n�r | when
+efter | after, behind
+upp | up
+vi | we
+dem | them
+vara | be
+vad | what
+�ver | over
+�n | than
+dig | you
+kan | can
+sina | his
+h�r | here
+ha | have
+mot | towards
+alla | all
+under | under (also: wonder)
+n�gon | some etc
+eller | or (else)
+allt | all
+mycket | much
+sedan | since
+ju | why
+denna | this/that
+sj�lv | myself, yourself etc
+detta | this/that
+�t | to
+utan | without
+varit | was
+hur | how
+ingen | no
+mitt | my
+ni | you
+bli | to be, become
+blev | from bli
+oss | us
+din | thy
+dessa | these/those
+n�gra | some etc
+deras | their
+blir | from bli
+mina | my
+samma | (the) same
+vilken | who, that
+er | you, your
+s�dan | such a
+v�r | our
+blivit | from bli
+dess | its
+inom | within
+mellan | between
+s�dant | such a
+varf�r | why
+varje | each
+vilka | who, that
+ditt | thy
+vem | who
+vilket | who, that
+sitta | his
+s�dana | such a
+vart | each
+dina | thy
+vars | whose
+v�rt | our
+v�ra | our
+ert | your
+era | your
+vilkas | whose
+
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_th.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_th.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_th.txt
new file mode 100644
index 0000000..07f0fab
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_th.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+\u0e44\u0e27\u0e49
+\u0e44\u0e21\u0e48
+\u0e44\u0e1b
+\u0e44\u0e14\u0e49
+\u0e43\u0e2b\u0e49
+\u0e43\u0e19
+\u0e42\u0e14\u0e22
+\u0e41\u0e2b\u0e48\u0e07
+\u0e41\u0e25\u0e49\u0e27
+\u0e41\u0e25\u0e30
+\u0e41\u0e23\u0e01
+\u0e41\u0e1a\u0e1a
+\u0e41\u0e15\u0e48
+\u0e40\u0e2d\u0e07
+\u0e40\u0e2b\u0e47\u0e19
+\u0e40\u0e25\u0e22
+\u0e40\u0e23\u0e34\u0e48\u0e21
+\u0e40\u0e23\u0e32
+\u0e40\u0e21\u0e37\u0e48\u0e2d
+\u0e40\u0e1e\u0e37\u0e48\u0e2d
+\u0e40\u0e1e\u0e23\u0e32\u0e30
+\u0e40\u0e1b\u0e47\u0e19\u0e01\u0e32\u0e23
+\u0e40\u0e1b\u0e47\u0e19
+\u0e40\u0e1b\u0e34\u0e14\u0e40\u0e1c\u0e22
+\u0e40\u0e1b\u0e34\u0e14
+\u0e40\u0e19\u0e37\u0e48\u0e2d\u0e07\u0e08\u0e32\u0e01
+\u0e40\u0e14\u0e35\u0e22\u0e27\u0e01\u0e31\u0e19
+\u0e40\u0e14\u0e35\u0e22\u0e27
+\u0e40\u0e0a\u0e48\u0e19
+\u0e40\u0e09\u0e1e\u0e32\u0e30
+\u0e40\u0e04\u0e22
+\u0e40\u0e02\u0e49\u0e32
+\u0e40\u0e02\u0e32
+\u0e2d\u0e35\u0e01
+\u0e2d\u0e32\u0e08
+\u0e2d\u0e30\u0e44\u0e23
+\u0e2d\u0e2d\u0e01
+\u0e2d\u0e22\u0e48\u0e32\u0e07
+\u0e2d\u0e22\u0e39\u0e48
+\u0e2d\u0e22\u0e32\u0e01
+\u0e2b\u0e32\u0e01
+\u0e2b\u0e25\u0e32\u0e22
+\u0e2b\u0e25\u0e31\u0e07\u0e08\u0e32\u0e01
+\u0e2b\u0e25\u0e31\u0e07
+\u0e2b\u0e23\u0e37\u0e2d
+\u0e2b\u0e19\u0e36\u0e48\u0e07
+\u0e2a\u0e48\u0e27\u0e19
+\u0e2a\u0e48\u0e07
+\u0e2a\u0e38\u0e14
+\u0e2a\u0e4d\u0e32\u0e2b\u0e23\u0e31\u0e1a
+\u0e27\u0e48\u0e32
+\u0e27\u0e31\u0e19
+\u0e25\u0e07
+\u0e23\u0e48\u0e27\u0e21
+\u0e23\u0e32\u0e22
+\u0e23\u0e31\u0e1a
+\u0e23\u0e30\u0e2b\u0e27\u0e48\u0e32\u0e07
+\u0e23\u0e27\u0e21
+\u0e22\u0e31\u0e07
+\u0e21\u0e35
+\u0e21\u0e32\u0e01
+\u0e21\u0e32
+\u0e1e\u0e23\u0e49\u0e2d\u0e21
+\u0e1e\u0e1a
+\u0e1c\u0e48\u0e32\u0e19
+\u0e1c\u0e25
+\u0e1a\u0e32\u0e07
+\u0e19\u0e48\u0e32
+\u0e19\u0e35\u0e49
+\u0e19\u0e4d\u0e32
+\u0e19\u0e31\u0e49\u0e19
+\u0e19\u0e31\u0e01
+\u0e19\u0e2d\u0e01\u0e08\u0e32\u0e01
+\u0e17\u0e38\u0e01
+\u0e17\u0e35\u0e48\u0e2a\u0e38\u0e14
+\u0e17\u0e35\u0e48
+\u0e17\u0e4d\u0e32\u0e43\u0e2b\u0e49
+\u0e17\u0e4d\u0e32
+\u0e17\u0e32\u0e07
+\u0e17\u0e31\u0e49\u0e07\u0e19\u0e35\u0e49
+\u0e17\u0e31\u0e49\u0e07
+\u0e16\u0e49\u0e32
+\u0e16\u0e39\u0e01
+\u0e16\u0e36\u0e07
+\u0e15\u0e49\u0e2d\u0e07
+\u0e15\u0e48\u0e32\u0e07\u0e46
+\u0e15\u0e48\u0e32\u0e07
+\u0e15\u0e48\u0e2d
+\u0e15\u0e32\u0e21
+\u0e15\u0e31\u0e49\u0e07\u0e41\u0e15\u0e48
+\u0e15\u0e31\u0e49\u0e07
+\u0e14\u0e49\u0e32\u0e19
+\u0e14\u0e49\u0e27\u0e22
+\u0e14\u0e31\u0e07
+\u0e0b\u0e36\u0e48\u0e07
+\u0e0a\u0e48\u0e27\u0e07
+\u0e08\u0e36\u0e07
+\u0e08\u0e32\u0e01
+\u0e08\u0e31\u0e14
+\u0e08\u0e30
+\u0e04\u0e37\u0e2d
+\u0e04\u0e27\u0e32\u0e21
+\u0e04\u0e23\u0e31\u0e49\u0e07
+\u0e04\u0e07
+\u0e02\u0e36\u0e49\u0e19
+\u0e02\u0e2d\u0e07
+\u0e02\u0e2d
+\u0e02\u0e13\u0e30
+\u0e01\u0e48\u0e2d\u0e19
+\u0e01\u0e47
+\u0e01\u0e32\u0e23
+\u0e01\u0e31\u0e1a
+\u0e01\u0e31\u0e19
+\u0e01\u0e27\u0e48\u0e32
+\u0e01\u0e25\u0e48\u0e32\u0e27
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_tr.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_tr.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_tr.txt
new file mode 100644
index 0000000..84d9408
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_tr.txt
@@ -0,0 +1,212 @@
+# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altm\u0131\u015f
+alt\u0131
+ama
+ancak
+arada
+asl\u0131nda
+ayr\u0131ca
+bana
+baz\u0131
+belki
+ben
+benden
+beni
+benim
+beri
+be\u015f
+bile
+bin
+bir
+bir�ok
+biri
+birka�
+birkez
+bir\u015fey
+bir\u015feyi
+biz
+bize
+bizden
+bizi
+bizim
+b�yle
+b�ylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunlar\u0131
+bunlar\u0131n
+bunu
+bunun
+burada
+�ok
+��nk�
+da
+daha
+dahi
+de
+defa
+de\u011fil
+di\u011fer
+diye
+doksan
+dokuz
+dolay\u0131
+dolay\u0131s\u0131yla
+d�rt
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+e\u011fer
+elli
+en
+etmesi
+etti
+etti\u011fi
+etti\u011fini
+gibi
+g�re
+halen
+hangi
+hatta
+hem
+hen�z
+hep
+hepsi
+her
+herhangi
+herkesin
+hi�
+hi�bir
+i�in
+iki
+ile
+ilgili
+ise
+i\u015fte
+itibaren
+itibariyle
+kadar
+kar\u015f\u0131n
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+k\u0131rk
+milyar
+milyon
+mu
+m�
+m\u0131
+nas\u0131l
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+ni�in
+o
+olan
+olarak
+oldu
+oldu\u011fu
+oldu\u011funu
+olduklar\u0131n\u0131
+olmad\u0131
+olmad\u0131\u011f\u0131
+olmak
+olmas\u0131
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onlar\u0131
+onlar\u0131n
+onu
+onun
+otuz
+oysa
+�yle
+pek
+ra\u011fmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+\u015fey
+\u015feyden
+\u015feyi
+\u015feyler
+\u015f�yle
+\u015fu
+\u015funa
+\u015funda
+\u015fundan
+\u015funlar\u0131
+\u015funu
+taraf\u0131ndan
+trilyon
+t�m
+��
+�zere
+var
+vard\u0131
+ve
+veya
+ya
+yani
+yapacak
+yap\u0131lan
+yap\u0131lmas\u0131
+yap\u0131yor
+yapmak
+yapt\u0131
+yapt\u0131\u011f\u0131
+yapt\u0131\u011f\u0131n\u0131
+yapt\u0131klar\u0131
+yedi
+yerine
+yetmi\u015f
+yine
+yirmi
+yoksa
+y�z
+zaten
http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/userdict_ja.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/userdict_ja.txt b/solr/server/solr/configsets/basic_configs/conf/lang/userdict_ja.txt
new file mode 100644
index 0000000..6f0368e
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/userdict_ja.txt
@@ -0,0 +1,29 @@
+#
+# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
+#
+# Add entries to this file in order to override the statistical model in terms
+# of segmentation, readings and part-of-speech tags. Notice that entries do
+# not have weights since they are always used when found. This is by-design
+# in order to maximize ease-of-use.
+#
+# Entries are defined using the following CSV format:
+# <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag>
+#
+# Notice that a single half-width space separates tokens and readings, and
+# that the number tokens and readings must match exactly.
+#
+# Also notice that multiple entries with the same <text> is undefined.
+#
+# Whitespace only lines are ignored. Comments are not allowed on entry lines.
+#
+
+# Custom segmentation for kanji compounds
+\u65e5\u672c\u7d4c\u6e08\u65b0\u805e,\u65e5\u672c \u7d4c\u6e08 \u65b0\u805e,\u30cb\u30db\u30f3 \u30b1\u30a4\u30b6\u30a4 \u30b7\u30f3\u30d6\u30f3,\u30ab\u30b9\u30bf\u30e0\u540d\u8a5e
+\u95a2\u897f\u56fd\u969b\u7a7a\u6e2f,\u95a2\u897f \u56fd\u969b \u7a7a\u6e2f,\u30ab\u30f3\u30b5\u30a4 \u30b3\u30af\u30b5\u30a4 \u30af\u30a6\u30b3\u30a6,\u30ab\u30b9\u30bf\u30e0\u540d\u8a5e
+
+# Custom segmentation for compound katakana
+\u30c8\u30fc\u30c8\u30d0\u30c3\u30b0,\u30c8\u30fc\u30c8 \u30d0\u30c3\u30b0,\u30c8\u30fc\u30c8 \u30d0\u30c3\u30b0,\u304b\u305a\u30ab\u30ca\u540d\u8a5e
+\u30b7\u30e7\u30eb\u30c0\u30fc\u30d0\u30c3\u30b0,\u30b7\u30e7\u30eb\u30c0\u30fc \u30d0\u30c3\u30b0,\u30b7\u30e7\u30eb\u30c0\u30fc \u30d0\u30c3\u30b0,\u304b\u305a\u30ab\u30ca\u540d\u8a5e
+
+# Custom reading for former sumo wrestler
+\u671d\u9752\u9f8d,\u671d\u9752\u9f8d,\u30a2\u30b5\u30b7\u30e7\u30a6\u30ea\u30e5\u30a6,\u30ab\u30b9\u30bf\u30e0\u4eba\u540d