You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by yo...@apache.org on 2016/07/08 19:45:59 UTC

[3/4] lucene-solr:branch_6x: SOLR-9163: sync basic_configs w/ data_driven_schema_configs

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_lv.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_lv.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_lv.txt
new file mode 100644
index 0000000..e21a23c
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_lv.txt
@@ -0,0 +1,172 @@
+# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
+# the original list of over 800 forms was refined: 
+#   pronouns, adverbs, interjections were removed
+# 
+# prepositions
+aiz
+ap
+ar
+apak\u0161
+\u0101rpus
+aug\u0161pus
+bez
+caur
+d\u0113\u013c
+gar
+iek\u0161
+iz
+kop\u0161
+labad
+lejpus
+l\u012bdz
+no
+otrpus
+pa
+par
+p\u0101r
+p\u0113c
+pie
+pirms
+pret
+priek\u0161
+starp
+\u0161aipus
+uz
+vi\u0146pus
+virs
+virspus
+zem
+apak\u0161pus
+# Conjunctions
+un
+bet
+jo
+ja
+ka
+lai
+tom\u0113r
+tikko
+turpret\u012b
+ar\u012b
+kaut
+gan
+t\u0101d\u0113\u013c
+t\u0101
+ne
+tikvien
+vien
+k\u0101
+ir
+te
+vai
+kam\u0113r
+# Particles
+ar
+diezin
+dro\u0161i
+diem\u017e\u0113l
+neb\u016bt
+ik
+it
+ta\u010du
+nu
+pat
+tiklab
+iek\u0161pus
+nedz
+tik
+nevis
+turpretim
+jeb
+iekam
+iek\u0101m
+iek\u0101ms
+kol\u012bdz
+l\u012bdzko
+tikl\u012bdz
+jeb\u0161u
+t\u0101lab
+t\u0101p\u0113c
+nek\u0101
+itin
+j\u0101
+jau
+jel
+n\u0113
+nezin
+tad
+tikai
+vis
+tak
+iekams
+vien
+# modal verbs
+b\u016bt  
+biju 
+biji
+bija
+bij\u0101m
+bij\u0101t
+esmu
+esi
+esam
+esat 
+b\u016b\u0161u     
+b\u016bsi
+b\u016bs
+b\u016bsim
+b\u016bsiet
+tikt
+tiku
+tiki
+tika
+tik\u0101m
+tik\u0101t
+tieku
+tiec
+tiek
+tiekam
+tiekat
+tik\u0161u
+tiks
+tiksim
+tiksiet
+tapt
+tapi
+tap\u0101t
+topat
+tap\u0161u
+tapsi
+taps
+tapsim
+tapsiet
+k\u013c\u016bt
+k\u013cuvu
+k\u013cuvi
+k\u013cuva
+k\u013cuv\u0101m
+k\u013cuv\u0101t
+k\u013c\u016bstu
+k\u013c\u016bsti
+k\u013c\u016bst
+k\u013c\u016bstam
+k\u013c\u016bstat
+k\u013c\u016b\u0161u
+k\u013c\u016bsi
+k\u013c\u016bs
+k\u013c\u016bsim
+k\u013c\u016bsiet
+# verbs
+var\u0113t
+var\u0113ju
+var\u0113j\u0101m
+var\u0113\u0161u
+var\u0113sim
+var
+var\u0113ji
+var\u0113j\u0101t
+var\u0113si
+var\u0113siet
+varat
+var\u0113ja
+var\u0113s

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_nl.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_nl.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_nl.txt
new file mode 100644
index 0000000..47a2aea
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_nl.txt
@@ -0,0 +1,119 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de             |  the
+en             |  and
+van            |  of, from
+ik             |  I, the ego
+te             |  (1) chez, at etc, (2) to, (3) too
+dat            |  that, which
+die            |  that, those, who, which
+in             |  in, inside
+een            |  a, an, one
+hij            |  he
+het            |  the, it
+niet           |  not, nothing, naught
+zijn           |  (1) to be, being, (2) his, one's, its
+is             |  is
+was            |  (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op             |  on, upon, at, in, up, used up
+aan            |  on, upon, to (as dative)
+met            |  with, by
+als            |  like, such as, when
+voor           |  (1) before, in front of, (2) furrow
+had            |  had, past tense all persons sing. of 'hebben' (have)
+er             |  there
+maar           |  but, only
+om             |  round, about, for etc
+hem            |  him
+dan            |  then
+zou            |  should/would, past tense all persons sing. of 'zullen'
+of             |  or, whether, if
+wat            |  what, something, anything
+mijn           |  possessive and noun 'mine'
+men            |  people, 'one'
+dit            |  this
+zo             |  so, thus, in this way
+door           |  through by
+over           |  over, across
+ze             |  she, her, they, them
+zich           |  oneself
+bij            |  (1) a bee, (2) by, near, at
+ook            |  also, too
+tot            |  till, until
+je             |  you
+mij            |  me
+uit            |  out of, from
+der            |  Old Dutch form of 'van der' still found in surnames
+daar           |  (1) there, (2) because
+haar           |  (1) her, their, them, (2) hair
+naar           |  (1) unpleasant, unwell etc, (2) towards, (3) as
+heb            |  present first person sing. of 'to have'
+hoe            |  how, why
+heeft          |  present third person sing. of 'to have'
+hebben         |  'to have' and various parts thereof
+deze           |  this
+u              |  you
+want           |  (1) for, (2) mitten, (3) rigging
+nog            |  yet, still
+zal            |  'shall', first and third person sing. of verb 'zullen' (will)
+me             |  me
+zij            |  she, they
+nu             |  now
+ge             |  'thou', still used in Belgium and south Netherlands
+geen           |  none
+omdat          |  because
+iets           |  something, somewhat
+worden         |  to become, grow, get
+toch           |  yet, still
+al             |  all, every, each
+waren          |  (1) 'were' (2) to wander, (3) wares, (3)
+veel           |  much, many
+meer           |  (1) more, (2) lake
+doen           |  to do, to make
+toen           |  then, when
+moet           |  noun 'spot/mote' and present form of 'to must'
+ben            |  (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder         |  without
+kan            |  noun 'can' and present form of 'to be able'
+hun            |  their, them
+dus            |  so, consequently
+alles          |  all, everything, anything
+onder          |  under, beneath
+ja             |  yes, of course
+eens           |  once, one day
+hier           |  here
+wie            |  who
+werd           |  imperfect third person sing. of 'become'
+altijd         |  always
+doch           |  yet, but etc
+wordt          |  present third person sing. of 'become'
+wezen          |  (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen         |  to be able
+ons            |  us/our
+zelf           |  self
+tegen          |  against, towards, at
+na             |  after, near
+reeds          |  already
+wil            |  (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon            |  could; past tense of 'to be able'
+niets          |  nothing
+uw             |  your
+iemand         |  somebody
+geweest        |  been; past participle of 'be'
+andere         |  other

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_no.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_no.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_no.txt
new file mode 100644
index 0000000..a7a2c28
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_no.txt
@@ -0,0 +1,194 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokm�l dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard <Ja...@ssb.no>, Jan 2005
+
+og             | and
+i              | in
+jeg            | I
+det            | it/this/that
+at             | to (w. inf.)
+en             | a/an
+et             | a/an
+den            | it/this/that
+til            | to
+er             | is/am/are
+som            | who/that
+p�             | on
+de             | they / you(formal)
+med            | with
+han            | he
+av             | of
+ikke           | not
+ikkje          | not *
+der            | there
+s�             | so
+var            | was/were
+meg            | me
+seg            | you
+men            | but
+ett            | one
+har            | have
+om             | about
+vi             | we
+min            | my
+mitt           | my
+ha             | have
+hadde          | had
+hun            | she
+n�             | now
+over           | over
+da             | when/as
+ved            | by/know
+fra            | from
+du             | you
+ut             | out
+sin            | your
+dem            | them
+oss            | us
+opp            | up
+man            | you/one
+kan            | can
+hans           | his
+hvor           | where
+eller          | or
+hva            | what
+skal           | shall/must
+selv           | self (reflective)
+sj�l           | self (reflective)
+her            | here
+alle           | all
+vil            | will
+bli            | become
+ble            | became
+blei           | became *
+blitt          | have become
+kunne          | could
+inn            | in
+n�r            | when
+v�re           | be
+kom            | come
+noen           | some
+noe            | some
+ville          | would
+dere           | you
+som            | who/which/that
+deres          | their/theirs
+kun            | only/just
+ja             | yes
+etter          | after
+ned            | down
+skulle         | should
+denne          | this
+for            | for/because
+deg            | you
+si             | hers/his
+sine           | hers/his
+sitt           | hers/his
+mot            | against
+�              | to
+meget          | much
+hvorfor        | why
+dette          | this
+disse          | these/those
+uten           | without
+hvordan        | how
+ingen          | none
+din            | your
+ditt           | your
+blir           | become
+samme          | same
+hvilken        | which
+hvilke         | which (plural)
+s�nn           | such a
+inni           | inside/within
+mellom         | between
+v�r            | our
+hver           | each
+hvem           | who
+vors           | us/ours
+hvis           | whose
+b�de           | both
+bare           | only/just
+enn            | than
+fordi          | as/because
+f�r            | before
+mange          | many
+ogs�           | also
+slik           | just
+v�rt           | been
+v�re           | to be
+b�e            | both *
+begge          | both
+siden          | since
+dykk           | your *
+dykkar         | yours *
+dei            | they *
+deira          | them *
+deires         | theirs *
+deim           | them *
+di             | your (fem.) *
+d�             | as/when *
+eg             | I *
+ein            | a/an *
+eit            | a/an *
+eitt           | a/an *
+elles          | or *
+honom          | he *
+hj�            | at *
+ho             | she *
+hoe            | she *
+henne          | her
+hennar         | her/hers
+hennes         | hers
+hoss           | how *
+hossen         | how *
+ikkje          | not *
+ingi           | noone *
+inkje          | noone *
+korleis        | how *
+korso          | how *
+kva            | what/which *
+kvar           | where *
+kvarhelst      | where *
+kven           | who/whom *
+kvi            | why *
+kvifor         | why *
+me             | we *
+medan          | while *
+mi             | my *
+mine           | my *
+mykje          | much *
+no             | now *
+nokon          | some (masc./neut.) *
+noka           | some (fem.) *
+nokor          | some *
+noko           | some *
+nokre          | some *
+si             | his/hers *
+sia            | since *
+sidan          | since *
+so             | so *
+somt           | some *
+somme          | some *
+um             | about*
+upp            | up *
+vere           | be *
+vore           | was *
+verte          | become *
+vort           | become *
+varte          | became *
+vart           | became *
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_pt.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_pt.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_pt.txt
new file mode 100644
index 0000000..acfeb01
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_pt.txt
@@ -0,0 +1,253 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de             |  of, from
+a              |  the; to, at; her
+o              |  the; him
+que            |  who, that
+e              |  and
+do             |  de + o
+da             |  de + a
+em             |  in
+um             |  a
+para           |  for
+  | �          from SER
+com            |  with
+n�o            |  not, no
+uma            |  a
+os             |  the; them
+no             |  em + o
+se             |  himself etc
+na             |  em + a
+por            |  for
+mais           |  more
+as             |  the; them
+dos            |  de + os
+como           |  as, like
+mas            |  but
+  | foi        from SER
+ao             |  a + o
+ele            |  he
+das            |  de + as
+  | tem        from TER
+�              |  a + a
+seu            |  his
+sua            |  her
+ou             |  or
+  | ser        from SER
+quando         |  when
+muito          |  much
+  | h�         from HAV
+nos            |  em + os; us
+j�             |  already, now
+  | est�       from EST
+eu             |  I
+tamb�m         |  also
+s�             |  only, just
+pelo           |  per + o
+pela           |  per + a
+at�            |  up to
+isso           |  that
+ela            |  he
+entre          |  between
+  | era        from SER
+depois         |  after
+sem            |  without
+mesmo          |  same
+aos            |  a + os
+  | ter        from TER
+seus           |  his
+quem           |  whom
+nas            |  em + as
+me             |  me
+esse           |  that
+eles           |  they
+  | est�o      from EST
+voc�           |  you
+  | tinha      from TER
+  | foram      from SER
+essa           |  that
+num            |  em + um
+nem            |  nor
+suas           |  her
+meu            |  my
+�s             |  a + as
+minha          |  my
+  | t�m        from TER
+numa           |  em + uma
+pelos          |  per + os
+elas           |  they
+  | havia      from HAV
+  | seja       from SER
+qual           |  which
+  | ser�       from SER
+n�s            |  we
+  | tenho      from TER
+lhe            |  to him, her
+deles          |  of them
+essas          |  those
+esses          |  those
+pelas          |  per + as
+este           |  this
+  | fosse      from SER
+dele           |  of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu             |  thou
+te             |  thee
+voc�s          |  you (plural)
+vos            |  you
+lhes           |  to them
+meus           |  my
+minhas
+teu            |  thy
+tua
+teus
+tuas
+nosso          | our
+nossa
+nossos
+nossas
+
+dela           |  of her
+delas          |  of them
+
+esta           |  this
+estes          |  these
+estas          |  these
+aquele         |  that
+aquela         |  that
+aqueles        |  those
+aquelas        |  those
+isto           |  this
+aquilo         |  that
+
+               | forms of estar, to be (not including the infinitive):
+estou
+est�
+estamos
+est�o
+estive
+esteve
+estivemos
+estiveram
+estava
+est�vamos
+estavam
+estivera
+estiv�ramos
+esteja
+estejamos
+estejam
+estivesse
+estiv�ssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+               | forms of haver, to have (not including the infinitive):
+hei
+h�
+havemos
+h�o
+houve
+houvemos
+houveram
+houvera
+houv�ramos
+haja
+hajamos
+hajam
+houvesse
+houv�ssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houver�
+houveremos
+houver�o
+houveria
+houver�amos
+houveriam
+
+               | forms of ser, to be (not including the infinitive):
+sou
+somos
+s�o
+era
+�ramos
+eram
+fui
+foi
+fomos
+foram
+fora
+f�ramos
+seja
+sejamos
+sejam
+fosse
+f�ssemos
+fossem
+for
+formos
+forem
+serei
+ser�
+seremos
+ser�o
+seria
+ser�amos
+seriam
+
+               | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+t�m
+tinha
+t�nhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tiv�ramos
+tenha
+tenhamos
+tenham
+tivesse
+tiv�ssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+ter�
+teremos
+ter�o
+teria
+ter�amos
+teriam

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ro.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ro.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ro.txt
new file mode 100644
index 0000000..4fdee90
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ro.txt
@@ -0,0 +1,233 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+aceast\u0103
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+ace\u015fti
+ace\u015ftia
+acolo
+acum
+ai
+aia
+aib\u0103
+aici
+al
+\u0103la
+ale
+alea
+\u0103lea
+altceva
+altcineva
+am
+ar
+are
+a\u015f
+a\u015fadar
+asemenea
+asta
+\u0103sta
+ast\u0103zi
+astea
+\u0103stea
+\u0103\u015ftia
+asupra
+a\u0163i
+au
+avea
+avem
+ave\u0163i
+azi
+bine
+bucur
+bun\u0103
+ca
+c\u0103
+c\u0103ci
+c�nd
+care
+c\u0103rei
+c\u0103ror
+c\u0103rui
+c�t
+c�te
+c�\u0163i
+c\u0103tre
+c�tva
+ce
+cel
+ceva
+chiar
+c�nd
+cine
+cineva
+c�t
+c�te
+c�\u0163i
+c�tva
+contra
+cu
+cum
+cumva
+cur�nd
+cur�nd
+da
+d\u0103
+dac\u0103
+dar
+datorit\u0103
+de
+deci
+deja
+deoarece
+departe
+de\u015fi
+din
+dinaintea
+dintr
+dintre
+drept
+dup\u0103
+ea
+ei
+el
+ele
+eram
+este
+e\u015fti
+eu
+face
+f\u0103r\u0103
+fi
+fie
+fiecare
+fii
+fim
+fi\u0163i
+iar
+ieri
+�i
+�l
+�mi
+�mpotriva
+�n 
+�nainte
+�naintea
+�nc�t
+�nc�t
+�ncotro
+�ntre
+�ntruc�t
+�ntruc�t
+�\u0163i
+la
+l�ng\u0103
+le
+li
+l�ng\u0103
+lor
+lui
+m\u0103
+m�ine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+mult\u0103
+mul\u0163i
+ne
+nic\u0103ieri
+nici
+nimeni
+ni\u015fte
+noastr\u0103
+noastre
+noi
+no\u015ftri
+nostru
+nu
+ori
+oric�nd
+oricare
+oric�t
+orice
+oric�nd
+oricine
+oric�t
+oricum
+oriunde
+p�n\u0103
+pe
+pentru
+peste
+p�n\u0103
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+s\u0103
+s\u0103i
+sale
+sau
+s\u0103u
+se
+\u015fi
+s�nt
+s�ntem
+s�nte\u0163i
+spre
+sub
+sunt
+suntem
+sunte\u0163i
+ta
+t\u0103i
+tale
+t\u0103u
+te
+\u0163i
+\u0163ie
+tine
+toat\u0103
+toate
+tot
+to\u0163i
+totu\u015fi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+v\u0103
+vi
+voastr\u0103
+voastre
+voi
+vo\u015ftri
+vostru
+vou\u0103
+vreo
+vreun

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ru.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ru.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ru.txt
new file mode 100644
index 0000000..5527140
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_ru.txt
@@ -0,0 +1,243 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `\u0451' is translated to `\u0435'.
+
+\u0438              | and
+\u0432              | in/into
+\u0432\u043e             | alternative form
+\u043d\u0435             | not
+\u0447\u0442\u043e            | what/that
+\u043e\u043d             | he
+\u043d\u0430             | on/onto
+\u044f              | i
+\u0441              | from
+\u0441\u043e             | alternative form
+\u043a\u0430\u043a            | how
+\u0430              | milder form of `no' (but)
+\u0442\u043e             | conjunction and form of `that'
+\u0432\u0441\u0435            | all
+\u043e\u043d\u0430            | she
+\u0442\u0430\u043a            | so, thus
+\u0435\u0433\u043e            | him
+\u043d\u043e             | but
+\u0434\u0430             | yes/and
+\u0442\u044b             | thou
+\u043a              | towards, by
+\u0443              | around, chez
+\u0436\u0435             | intensifier particle
+\u0432\u044b             | you
+\u0437\u0430             | beyond, behind
+\u0431\u044b             | conditional/subj. particle
+\u043f\u043e             | up to, along
+\u0442\u043e\u043b\u044c\u043a\u043e         | only
+\u0435\u0435             | her
+\u043c\u043d\u0435            | to me
+\u0431\u044b\u043b\u043e           | it was
+\u0432\u043e\u0442            | here is/are, particle
+\u043e\u0442             | away from
+\u043c\u0435\u043d\u044f           | me
+\u0435\u0449\u0435            | still, yet, more
+\u043d\u0435\u0442            | no, there isnt/arent
+\u043e              | about
+\u0438\u0437             | out of
+\u0435\u043c\u0443            | to him
+\u0442\u0435\u043f\u0435\u0440\u044c         | now
+\u043a\u043e\u0433\u0434\u0430          | when
+\u0434\u0430\u0436\u0435           | even
+\u043d\u0443             | so, well
+\u0432\u0434\u0440\u0443\u0433          | suddenly
+\u043b\u0438             | interrogative particle
+\u0435\u0441\u043b\u0438           | if
+\u0443\u0436\u0435            | already, but homonym of `narrower'
+\u0438\u043b\u0438            | or
+\u043d\u0438             | neither
+\u0431\u044b\u0442\u044c           | to be
+\u0431\u044b\u043b            | he was
+\u043d\u0435\u0433\u043e           | prepositional form of \u0435\u0433\u043e
+\u0434\u043e             | up to
+\u0432\u0430\u0441            | you accusative
+\u043d\u0438\u0431\u0443\u0434\u044c         | indef. suffix preceded by hyphen
+\u043e\u043f\u044f\u0442\u044c          | again
+\u0443\u0436             | already, but homonym of `adder'
+\u0432\u0430\u043c            | to you
+\u0441\u043a\u0430\u0437\u0430\u043b         | he said
+\u0432\u0435\u0434\u044c           | particle `after all'
+\u0442\u0430\u043c            | there
+\u043f\u043e\u0442\u043e\u043c          | then
+\u0441\u0435\u0431\u044f           | oneself
+\u043d\u0438\u0447\u0435\u0433\u043e         | nothing
+\u0435\u0439             | to her
+\u043c\u043e\u0436\u0435\u0442          | usually with `\u0431\u044b\u0442\u044c' as `maybe'
+\u043e\u043d\u0438            | they
+\u0442\u0443\u0442            | here
+\u0433\u0434\u0435            | where
+\u0435\u0441\u0442\u044c           | there is/are
+\u043d\u0430\u0434\u043e           | got to, must
+\u043d\u0435\u0439            | prepositional form of  \u0435\u0439
+\u0434\u043b\u044f            | for
+\u043c\u044b             | we
+\u0442\u0435\u0431\u044f           | thee
+\u0438\u0445             | them, their
+\u0447\u0435\u043c            | than
+\u0431\u044b\u043b\u0430           | she was
+\u0441\u0430\u043c            | self
+\u0447\u0442\u043e\u0431           | in order to
+\u0431\u0435\u0437            | without
+\u0431\u0443\u0434\u0442\u043e          | as if
+\u0447\u0435\u043b\u043e\u0432\u0435\u043a        | man, person, one
+\u0447\u0435\u0433\u043e           | genitive form of `what'
+\u0440\u0430\u0437            | once
+\u0442\u043e\u0436\u0435           | also
+\u0441\u0435\u0431\u0435           | to oneself
+\u043f\u043e\u0434            | beneath
+\u0436\u0438\u0437\u043d\u044c          | life
+\u0431\u0443\u0434\u0435\u0442          | will be
+\u0436              | short form of intensifer particle `\u0436\u0435'
+\u0442\u043e\u0433\u0434\u0430          | then
+\u043a\u0442\u043e            | who
+\u044d\u0442\u043e\u0442           | this
+\u0433\u043e\u0432\u043e\u0440\u0438\u043b        | was saying
+\u0442\u043e\u0433\u043e           | genitive form of `that'
+\u043f\u043e\u0442\u043e\u043c\u0443         | for that reason
+\u044d\u0442\u043e\u0433\u043e          | genitive form of `this'
+\u043a\u0430\u043a\u043e\u0439          | which
+\u0441\u043e\u0432\u0441\u0435\u043c         | altogether
+\u043d\u0438\u043c            | prepositional form of `\u0435\u0433\u043e', `\u043e\u043d\u0438'
+\u0437\u0434\u0435\u0441\u044c          | here
+\u044d\u0442\u043e\u043c           | prepositional form of `\u044d\u0442\u043e\u0442'
+\u043e\u0434\u0438\u043d           | one
+\u043f\u043e\u0447\u0442\u0438          | almost
+\u043c\u043e\u0439            | my
+\u0442\u0435\u043c            | instrumental/dative plural of `\u0442\u043e\u0442', `\u0442\u043e'
+\u0447\u0442\u043e\u0431\u044b          | full form of `in order that'
+\u043d\u0435\u0435            | her (acc.)
+\u043a\u0430\u0436\u0435\u0442\u0441\u044f        | it seems
+\u0441\u0435\u0439\u0447\u0430\u0441         | now
+\u0431\u044b\u043b\u0438           | they were
+\u043a\u0443\u0434\u0430           | where to
+\u0437\u0430\u0447\u0435\u043c          | why
+\u0441\u043a\u0430\u0437\u0430\u0442\u044c        | to say
+\u0432\u0441\u0435\u0445           | all (acc., gen. preposn. plural)
+\u043d\u0438\u043a\u043e\u0433\u0434\u0430        | never
+\u0441\u0435\u0433\u043e\u0434\u043d\u044f        | today
+\u043c\u043e\u0436\u043d\u043e          | possible, one can
+\u043f\u0440\u0438            | by
+\u043d\u0430\u043a\u043e\u043d\u0435\u0446        | finally
+\u0434\u0432\u0430            | two
+\u043e\u0431             | alternative form of `\u043e', about
+\u0434\u0440\u0443\u0433\u043e\u0439         | another
+\u0445\u043e\u0442\u044c           | even
+\u043f\u043e\u0441\u043b\u0435          | after
+\u043d\u0430\u0434            | above
+\u0431\u043e\u043b\u044c\u0448\u0435         | more
+\u0442\u043e\u0442            | that one (masc.)
+\u0447\u0435\u0440\u0435\u0437          | across, in
+\u044d\u0442\u0438            | these
+\u043d\u0430\u0441            | us
+\u043f\u0440\u043e            | about
+\u0432\u0441\u0435\u0433\u043e          | in all, only, of all
+\u043d\u0438\u0445            | prepositional form of `\u043e\u043d\u0438' (they)
+\u043a\u0430\u043a\u0430\u044f          | which, feminine
+\u043c\u043d\u043e\u0433\u043e          | lots
+\u0440\u0430\u0437\u0432\u0435          | interrogative particle
+\u0441\u043a\u0430\u0437\u0430\u043b\u0430        | she said
+\u0442\u0440\u0438            | three
+\u044d\u0442\u0443            | this, acc. fem. sing.
+\u043c\u043e\u044f            | my, feminine
+\u0432\u043f\u0440\u043e\u0447\u0435\u043c        | moreover, besides
+\u0445\u043e\u0440\u043e\u0448\u043e         | good
+\u0441\u0432\u043e\u044e           | ones own, acc. fem. sing.
+\u044d\u0442\u043e\u0439           | oblique form of `\u044d\u0442\u0430', fem. `this'
+\u043f\u0435\u0440\u0435\u0434          | in front of
+\u0438\u043d\u043e\u0433\u0434\u0430         | sometimes
+\u043b\u0443\u0447\u0448\u0435          | better
+\u0447\u0443\u0442\u044c           | a little
+\u0442\u043e\u043c            | preposn. form of `that one'
+\u043d\u0435\u043b\u044c\u0437\u044f         | one must not
+\u0442\u0430\u043a\u043e\u0439          | such a one
+\u0438\u043c             | to them
+\u0431\u043e\u043b\u0435\u0435          | more
+\u0432\u0441\u0435\u0433\u0434\u0430         | always
+\u043a\u043e\u043d\u0435\u0447\u043d\u043e        | of course
+\u0432\u0441\u044e            | acc. fem. sing of `all'
+\u043c\u0435\u0436\u0434\u0443          | between
+
+
+  | b: some paradigms
+  |
+  | personal pronouns
+  |
+  | \u044f  \u043c\u0435\u043d\u044f  \u043c\u043d\u0435  \u043c\u043d\u043e\u0439  [\u043c\u043d\u043e\u044e]
+  | \u0442\u044b  \u0442\u0435\u0431\u044f  \u0442\u0435\u0431\u0435  \u0442\u043e\u0431\u043e\u0439  [\u0442\u043e\u0431\u043e\u044e]
+  | \u043e\u043d  \u0435\u0433\u043e  \u0435\u043c\u0443  \u0438\u043c  [\u043d\u0435\u0433\u043e, \u043d\u0435\u043c\u0443, \u043d\u0438\u043c]
+  | \u043e\u043d\u0430  \u0435\u0435  \u044d\u0438  \u0435\u044e  [\u043d\u0435\u0435, \u043d\u044d\u0438, \u043d\u0435\u044e]
+  | \u043e\u043d\u043e  \u0435\u0433\u043e  \u0435\u043c\u0443  \u0438\u043c  [\u043d\u0435\u0433\u043e, \u043d\u0435\u043c\u0443, \u043d\u0438\u043c]
+  |
+  | \u043c\u044b  \u043d\u0430\u0441  \u043d\u0430\u043c  \u043d\u0430\u043c\u0438
+  | \u0432\u044b  \u0432\u0430\u0441  \u0432\u0430\u043c  \u0432\u0430\u043c\u0438
+  | \u043e\u043d\u0438  \u0438\u0445  \u0438\u043c  \u0438\u043c\u0438  [\u043d\u0438\u0445, \u043d\u0438\u043c, \u043d\u0438\u043c\u0438]
+  |
+  |   \u0441\u0435\u0431\u044f  \u0441\u0435\u0431\u0435  \u0441\u043e\u0431\u043e\u0439   [\u0441\u043e\u0431\u043e\u044e]
+  |
+  | demonstrative pronouns: \u044d\u0442\u043e\u0442 (this), \u0442\u043e\u0442 (that)
+  |
+  | \u044d\u0442\u043e\u0442  \u044d\u0442\u0430  \u044d\u0442\u043e  \u044d\u0442\u0438
+  | \u044d\u0442\u043e\u0433\u043e  \u044d\u0442\u044b  \u044d\u0442\u043e  \u044d\u0442\u0438
+  | \u044d\u0442\u043e\u0433\u043e  \u044d\u0442\u043e\u0439  \u044d\u0442\u043e\u0433\u043e  \u044d\u0442\u0438\u0445
+  | \u044d\u0442\u043e\u043c\u0443  \u044d\u0442\u043e\u0439  \u044d\u0442\u043e\u043c\u0443  \u044d\u0442\u0438\u043c
+  | \u044d\u0442\u0438\u043c  \u044d\u0442\u043e\u0439  \u044d\u0442\u0438\u043c  [\u044d\u0442\u043e\u044e]  \u044d\u0442\u0438\u043c\u0438
+  | \u044d\u0442\u043e\u043c  \u044d\u0442\u043e\u0439  \u044d\u0442\u043e\u043c  \u044d\u0442\u0438\u0445
+  |
+  | \u0442\u043e\u0442  \u0442\u0430  \u0442\u043e  \u0442\u0435
+  | \u0442\u043e\u0433\u043e  \u0442\u0443  \u0442\u043e  \u0442\u0435
+  | \u0442\u043e\u0433\u043e  \u0442\u043e\u0439  \u0442\u043e\u0433\u043e  \u0442\u0435\u0445
+  | \u0442\u043e\u043c\u0443  \u0442\u043e\u0439  \u0442\u043e\u043c\u0443  \u0442\u0435\u043c
+  | \u0442\u0435\u043c  \u0442\u043e\u0439  \u0442\u0435\u043c  [\u0442\u043e\u044e]  \u0442\u0435\u043c\u0438
+  | \u0442\u043e\u043c  \u0442\u043e\u0439  \u0442\u043e\u043c  \u0442\u0435\u0445
+  |
+  | determinative pronouns
+  |
+  | (a) \u0432\u0435\u0441\u044c (all)
+  |
+  | \u0432\u0435\u0441\u044c  \u0432\u0441\u044f  \u0432\u0441\u0435  \u0432\u0441\u0435
+  | \u0432\u0441\u0435\u0433\u043e  \u0432\u0441\u044e  \u0432\u0441\u0435  \u0432\u0441\u0435
+  | \u0432\u0441\u0435\u0433\u043e  \u0432\u0441\u0435\u0439  \u0432\u0441\u0435\u0433\u043e  \u0432\u0441\u0435\u0445
+  | \u0432\u0441\u0435\u043c\u0443  \u0432\u0441\u0435\u0439  \u0432\u0441\u0435\u043c\u0443  \u0432\u0441\u0435\u043c
+  | \u0432\u0441\u0435\u043c  \u0432\u0441\u0435\u0439  \u0432\u0441\u0435\u043c  [\u0432\u0441\u0435\u044e]  \u0432\u0441\u0435\u043c\u0438
+  | \u0432\u0441\u0435\u043c  \u0432\u0441\u0435\u0439  \u0432\u0441\u0435\u043c  \u0432\u0441\u0435\u0445
+  |
+  | (b) \u0441\u0430\u043c (himself etc)
+  |
+  | \u0441\u0430\u043c  \u0441\u0430\u043c\u0430  \u0441\u0430\u043c\u043e  \u0441\u0430\u043c\u0438
+  | \u0441\u0430\u043c\u043e\u0433\u043e \u0441\u0430\u043c\u0443  \u0441\u0430\u043c\u043e  \u0441\u0430\u043c\u0438\u0445
+  | \u0441\u0430\u043c\u043e\u0433\u043e \u0441\u0430\u043c\u043e\u0439 \u0441\u0430\u043c\u043e\u0433\u043e  \u0441\u0430\u043c\u0438\u0445
+  | \u0441\u0430\u043c\u043e\u043c\u0443 \u0441\u0430\u043c\u043e\u0439 \u0441\u0430\u043c\u043e\u043c\u0443  \u0441\u0430\u043c\u0438\u043c
+  | \u0441\u0430\u043c\u0438\u043c  \u0441\u0430\u043c\u043e\u0439  \u0441\u0430\u043c\u0438\u043c  [\u0441\u0430\u043c\u043e\u044e]  \u0441\u0430\u043c\u0438\u043c\u0438
+  | \u0441\u0430\u043c\u043e\u043c \u0441\u0430\u043c\u043e\u0439 \u0441\u0430\u043c\u043e\u043c  \u0441\u0430\u043c\u0438\u0445
+  |
+  | stems of verbs `to be', `to have', `to do' and modal
+  |
+  | \u0431\u044b\u0442\u044c  \u0431\u044b  \u0431\u0443\u0434  \u0431\u044b\u0432  \u0435\u0441\u0442\u044c  \u0441\u0443\u0442\u044c
+  | \u0438\u043c\u0435
+  | \u0434\u0435\u043b
+  | \u043c\u043e\u0433   \u043c\u043e\u0436  \u043c\u043e\u0447\u044c
+  | \u0443\u043c\u0435
+  | \u0445\u043e\u0447  \u0445\u043e\u0442
+  | \u0434\u043e\u043b\u0436
+  | \u043c\u043e\u0436\u043d
+  | \u043d\u0443\u0436\u043d
+  | \u043d\u0435\u043b\u044c\u0437\u044f
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_sv.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_sv.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_sv.txt
new file mode 100644
index 0000000..096f87f
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_sv.txt
@@ -0,0 +1,133 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ |  - Encoding was converted to UTF-8.
+ |  - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ |  s� = so, but also seed. These are indicated clearly below.
+
+och            | and
+det            | it, this/that
+att            | to (with infinitive)
+i              | in, at
+en             | a
+jag            | I
+hon            | she
+som            | who, that
+han            | he
+p�             | on
+den            | it, this/that
+med            | with
+var            | where, each
+sig            | him(self) etc
+f�r            | for
+s�             | so (also: seed)
+till           | to
+�r             | is
+men            | but
+ett            | a
+om             | if; around, about
+hade           | had
+de             | they, these/those
+av             | of
+icke           | not, no
+mig            | me
+du             | you
+henne          | her
+d�             | then, when
+sin            | his
+nu             | now
+har            | have
+inte           | inte n�gon = no one
+hans           | his
+honom          | him
+skulle         | 'sake'
+hennes         | her
+d�r            | there
+min            | my
+man            | one (pronoun)
+ej             | nor
+vid            | at, by, on (also: vast)
+kunde          | could
+n�got          | some etc
+fr�n           | from, off
+ut             | out
+n�r            | when
+efter          | after, behind
+upp            | up
+vi             | we
+dem            | them
+vara           | be
+vad            | what
+�ver           | over
+�n             | than
+dig            | you
+kan            | can
+sina           | his
+h�r            | here
+ha             | have
+mot            | towards
+alla           | all
+under          | under (also: wonder)
+n�gon          | some etc
+eller          | or (else)
+allt           | all
+mycket         | much
+sedan          | since
+ju             | why
+denna          | this/that
+sj�lv          | myself, yourself etc
+detta          | this/that
+�t             | to
+utan           | without
+varit          | was
+hur            | how
+ingen          | no
+mitt           | my
+ni             | you
+bli            | to be, become
+blev           | from bli
+oss            | us
+din            | thy
+dessa          | these/those
+n�gra          | some etc
+deras          | their
+blir           | from bli
+mina           | my
+samma          | (the) same
+vilken         | who, that
+er             | you, your
+s�dan          | such a
+v�r            | our
+blivit         | from bli
+dess           | its
+inom           | within
+mellan         | between
+s�dant         | such a
+varf�r         | why
+varje          | each
+vilka          | who, that
+ditt           | thy
+vem            | who
+vilket         | who, that
+sitta          | his
+s�dana         | such a
+vart           | each
+dina           | thy
+vars           | whose
+v�rt           | our
+v�ra           | our
+ert            | your
+era            | your
+vilkas         | whose
+

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_th.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_th.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_th.txt
new file mode 100644
index 0000000..07f0fab
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_th.txt
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+\u0e44\u0e27\u0e49
+\u0e44\u0e21\u0e48
+\u0e44\u0e1b
+\u0e44\u0e14\u0e49
+\u0e43\u0e2b\u0e49
+\u0e43\u0e19
+\u0e42\u0e14\u0e22
+\u0e41\u0e2b\u0e48\u0e07
+\u0e41\u0e25\u0e49\u0e27
+\u0e41\u0e25\u0e30
+\u0e41\u0e23\u0e01
+\u0e41\u0e1a\u0e1a
+\u0e41\u0e15\u0e48
+\u0e40\u0e2d\u0e07
+\u0e40\u0e2b\u0e47\u0e19
+\u0e40\u0e25\u0e22
+\u0e40\u0e23\u0e34\u0e48\u0e21
+\u0e40\u0e23\u0e32
+\u0e40\u0e21\u0e37\u0e48\u0e2d
+\u0e40\u0e1e\u0e37\u0e48\u0e2d
+\u0e40\u0e1e\u0e23\u0e32\u0e30
+\u0e40\u0e1b\u0e47\u0e19\u0e01\u0e32\u0e23
+\u0e40\u0e1b\u0e47\u0e19
+\u0e40\u0e1b\u0e34\u0e14\u0e40\u0e1c\u0e22
+\u0e40\u0e1b\u0e34\u0e14
+\u0e40\u0e19\u0e37\u0e48\u0e2d\u0e07\u0e08\u0e32\u0e01
+\u0e40\u0e14\u0e35\u0e22\u0e27\u0e01\u0e31\u0e19
+\u0e40\u0e14\u0e35\u0e22\u0e27
+\u0e40\u0e0a\u0e48\u0e19
+\u0e40\u0e09\u0e1e\u0e32\u0e30
+\u0e40\u0e04\u0e22
+\u0e40\u0e02\u0e49\u0e32
+\u0e40\u0e02\u0e32
+\u0e2d\u0e35\u0e01
+\u0e2d\u0e32\u0e08
+\u0e2d\u0e30\u0e44\u0e23
+\u0e2d\u0e2d\u0e01
+\u0e2d\u0e22\u0e48\u0e32\u0e07
+\u0e2d\u0e22\u0e39\u0e48
+\u0e2d\u0e22\u0e32\u0e01
+\u0e2b\u0e32\u0e01
+\u0e2b\u0e25\u0e32\u0e22
+\u0e2b\u0e25\u0e31\u0e07\u0e08\u0e32\u0e01
+\u0e2b\u0e25\u0e31\u0e07
+\u0e2b\u0e23\u0e37\u0e2d
+\u0e2b\u0e19\u0e36\u0e48\u0e07
+\u0e2a\u0e48\u0e27\u0e19
+\u0e2a\u0e48\u0e07
+\u0e2a\u0e38\u0e14
+\u0e2a\u0e4d\u0e32\u0e2b\u0e23\u0e31\u0e1a
+\u0e27\u0e48\u0e32
+\u0e27\u0e31\u0e19
+\u0e25\u0e07
+\u0e23\u0e48\u0e27\u0e21
+\u0e23\u0e32\u0e22
+\u0e23\u0e31\u0e1a
+\u0e23\u0e30\u0e2b\u0e27\u0e48\u0e32\u0e07
+\u0e23\u0e27\u0e21
+\u0e22\u0e31\u0e07
+\u0e21\u0e35
+\u0e21\u0e32\u0e01
+\u0e21\u0e32
+\u0e1e\u0e23\u0e49\u0e2d\u0e21
+\u0e1e\u0e1a
+\u0e1c\u0e48\u0e32\u0e19
+\u0e1c\u0e25
+\u0e1a\u0e32\u0e07
+\u0e19\u0e48\u0e32
+\u0e19\u0e35\u0e49
+\u0e19\u0e4d\u0e32
+\u0e19\u0e31\u0e49\u0e19
+\u0e19\u0e31\u0e01
+\u0e19\u0e2d\u0e01\u0e08\u0e32\u0e01
+\u0e17\u0e38\u0e01
+\u0e17\u0e35\u0e48\u0e2a\u0e38\u0e14
+\u0e17\u0e35\u0e48
+\u0e17\u0e4d\u0e32\u0e43\u0e2b\u0e49
+\u0e17\u0e4d\u0e32
+\u0e17\u0e32\u0e07
+\u0e17\u0e31\u0e49\u0e07\u0e19\u0e35\u0e49
+\u0e17\u0e31\u0e49\u0e07
+\u0e16\u0e49\u0e32
+\u0e16\u0e39\u0e01
+\u0e16\u0e36\u0e07
+\u0e15\u0e49\u0e2d\u0e07
+\u0e15\u0e48\u0e32\u0e07\u0e46
+\u0e15\u0e48\u0e32\u0e07
+\u0e15\u0e48\u0e2d
+\u0e15\u0e32\u0e21
+\u0e15\u0e31\u0e49\u0e07\u0e41\u0e15\u0e48
+\u0e15\u0e31\u0e49\u0e07
+\u0e14\u0e49\u0e32\u0e19
+\u0e14\u0e49\u0e27\u0e22
+\u0e14\u0e31\u0e07
+\u0e0b\u0e36\u0e48\u0e07
+\u0e0a\u0e48\u0e27\u0e07
+\u0e08\u0e36\u0e07
+\u0e08\u0e32\u0e01
+\u0e08\u0e31\u0e14
+\u0e08\u0e30
+\u0e04\u0e37\u0e2d
+\u0e04\u0e27\u0e32\u0e21
+\u0e04\u0e23\u0e31\u0e49\u0e07
+\u0e04\u0e07
+\u0e02\u0e36\u0e49\u0e19
+\u0e02\u0e2d\u0e07
+\u0e02\u0e2d
+\u0e02\u0e13\u0e30
+\u0e01\u0e48\u0e2d\u0e19
+\u0e01\u0e47
+\u0e01\u0e32\u0e23
+\u0e01\u0e31\u0e1a
+\u0e01\u0e31\u0e19
+\u0e01\u0e27\u0e48\u0e32
+\u0e01\u0e25\u0e48\u0e32\u0e27

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_tr.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_tr.txt b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_tr.txt
new file mode 100644
index 0000000..84d9408
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/stopwords_tr.txt
@@ -0,0 +1,212 @@
+# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+#   (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altm\u0131\u015f
+alt\u0131
+ama
+ancak
+arada
+asl\u0131nda
+ayr\u0131ca
+bana
+baz\u0131
+belki
+ben
+benden
+beni
+benim
+beri
+be\u015f
+bile
+bin
+bir
+bir�ok
+biri
+birka�
+birkez
+bir\u015fey
+bir\u015feyi
+biz
+bize
+bizden
+bizi
+bizim
+b�yle
+b�ylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunlar\u0131
+bunlar\u0131n
+bunu
+bunun
+burada
+�ok
+��nk�
+da
+daha
+dahi
+de
+defa
+de\u011fil
+di\u011fer
+diye
+doksan
+dokuz
+dolay\u0131
+dolay\u0131s\u0131yla
+d�rt
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+e\u011fer
+elli
+en
+etmesi
+etti
+etti\u011fi
+etti\u011fini
+gibi
+g�re
+halen
+hangi
+hatta
+hem
+hen�z
+hep
+hepsi
+her
+herhangi
+herkesin
+hi�
+hi�bir
+i�in
+iki
+ile
+ilgili
+ise
+i\u015fte
+itibaren
+itibariyle
+kadar
+kar\u015f\u0131n
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+k\u0131rk
+milyar
+milyon
+mu
+m�
+m\u0131
+nas\u0131l
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+ni�in
+o
+olan
+olarak
+oldu
+oldu\u011fu
+oldu\u011funu
+olduklar\u0131n\u0131
+olmad\u0131
+olmad\u0131\u011f\u0131
+olmak
+olmas\u0131
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onlar\u0131
+onlar\u0131n
+onu
+onun
+otuz
+oysa
+�yle
+pek
+ra\u011fmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+\u015fey
+\u015feyden
+\u015feyi
+\u015feyler
+\u015f�yle
+\u015fu
+\u015funa
+\u015funda
+\u015fundan
+\u015funlar\u0131
+\u015funu
+taraf\u0131ndan
+trilyon
+t�m
+��
+�zere
+var
+vard\u0131
+ve
+veya
+ya
+yani
+yapacak
+yap\u0131lan
+yap\u0131lmas\u0131
+yap\u0131yor
+yapmak
+yapt\u0131
+yapt\u0131\u011f\u0131
+yapt\u0131\u011f\u0131n\u0131
+yapt\u0131klar\u0131
+yedi
+yerine
+yetmi\u015f
+yine
+yirmi
+yoksa
+y�z
+zaten

http://git-wip-us.apache.org/repos/asf/lucene-solr/blob/1a53346c/solr/server/solr/configsets/basic_configs/conf/lang/userdict_ja.txt
----------------------------------------------------------------------
diff --git a/solr/server/solr/configsets/basic_configs/conf/lang/userdict_ja.txt b/solr/server/solr/configsets/basic_configs/conf/lang/userdict_ja.txt
new file mode 100644
index 0000000..6f0368e
--- /dev/null
+++ b/solr/server/solr/configsets/basic_configs/conf/lang/userdict_ja.txt
@@ -0,0 +1,29 @@
+#
+# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
+#
+# Add entries to this file in order to override the statistical model in terms
+# of segmentation, readings and part-of-speech tags.  Notice that entries do
+# not have weights since they are always used when found.  This is by-design
+# in order to maximize ease-of-use.
+#
+# Entries are defined using the following CSV format:
+#  <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag>
+#
+# Notice that a single half-width space separates tokens and readings, and
+# that the number tokens and readings must match exactly.
+#
+# Also notice that multiple entries with the same <text> is undefined.
+#
+# Whitespace only lines are ignored.  Comments are not allowed on entry lines.
+#
+
+# Custom segmentation for kanji compounds
+\u65e5\u672c\u7d4c\u6e08\u65b0\u805e,\u65e5\u672c \u7d4c\u6e08 \u65b0\u805e,\u30cb\u30db\u30f3 \u30b1\u30a4\u30b6\u30a4 \u30b7\u30f3\u30d6\u30f3,\u30ab\u30b9\u30bf\u30e0\u540d\u8a5e
+\u95a2\u897f\u56fd\u969b\u7a7a\u6e2f,\u95a2\u897f \u56fd\u969b \u7a7a\u6e2f,\u30ab\u30f3\u30b5\u30a4 \u30b3\u30af\u30b5\u30a4 \u30af\u30a6\u30b3\u30a6,\u30ab\u30b9\u30bf\u30e0\u540d\u8a5e
+
+# Custom segmentation for compound katakana
+\u30c8\u30fc\u30c8\u30d0\u30c3\u30b0,\u30c8\u30fc\u30c8 \u30d0\u30c3\u30b0,\u30c8\u30fc\u30c8 \u30d0\u30c3\u30b0,\u304b\u305a\u30ab\u30ca\u540d\u8a5e
+\u30b7\u30e7\u30eb\u30c0\u30fc\u30d0\u30c3\u30b0,\u30b7\u30e7\u30eb\u30c0\u30fc \u30d0\u30c3\u30b0,\u30b7\u30e7\u30eb\u30c0\u30fc \u30d0\u30c3\u30b0,\u304b\u305a\u30ab\u30ca\u540d\u8a5e
+
+# Custom reading for former sumo wrestler
+\u671d\u9752\u9f8d,\u671d\u9752\u9f8d,\u30a2\u30b5\u30b7\u30e7\u30a6\u30ea\u30e5\u30a6,\u30ab\u30b9\u30bf\u30e0\u4eba\u540d