You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by eh...@apache.org on 2015/05/22 03:23:29 UTC
svn commit: r1680973 [2/5] - in /lucene/dev/trunk/solr: ./ example/files/
example/files/conf/ example/files/conf/lang/ example/files/conf/velocity/
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_fr.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_fr.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_fr.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_fr.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,186 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/french/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A French stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+au | a + le
+aux | a + les
+avec | with
+ce | this
+ces | these
+dans | with
+de | of
+des | de + les
+du | de + le
+elle | she
+en | `of them' etc
+et | and
+eux | them
+il | he
+je | I
+la | the
+le | the
+leur | their
+lui | him
+ma | my (fem)
+mais | but
+me | me
+même | same; as in moi-même (myself) etc
+mes | me (pl)
+moi | me
+mon | my (masc)
+ne | not
+nos | our (pl)
+notre | our
+nous | we
+on | one
+ou | where
+par | by
+pas | not
+pour | for
+qu | que before vowel
+que | that
+qui | who
+sa | his, her (fem)
+se | oneself
+ses | his (pl)
+son | his, her (masc)
+sur | on
+ta | thy (fem)
+te | thee
+tes | thy (pl)
+toi | thee
+ton | thy (masc)
+tu | thou
+un | a
+une | a
+vos | your (pl)
+votre | your
+vous | you
+
+ | single letter forms
+
+c | c'
+d | d'
+j | j'
+l | l'
+Ã | to, at
+m | m'
+n | n'
+s | s'
+t | t'
+y | there
+
+ | forms of être (not including the infinitive):
+été
+étée
+étées
+étés
+étant
+suis
+es
+est
+sommes
+êtes
+sont
+serai
+seras
+sera
+serons
+serez
+seront
+serais
+serait
+serions
+seriez
+seraient
+étais
+était
+étions
+étiez
+étaient
+fus
+fut
+fûmes
+fûtes
+furent
+sois
+soit
+soyons
+soyez
+soient
+fusse
+fusses
+fût
+fussions
+fussiez
+fussent
+
+ | forms of avoir (not including the infinitive):
+ayant
+eu
+eue
+eues
+eus
+ai
+as
+avons
+avez
+ont
+aurai
+auras
+aura
+aurons
+aurez
+auront
+aurais
+aurait
+aurions
+auriez
+auraient
+avais
+avait
+avions
+aviez
+avaient
+eut
+eûmes
+eûtes
+eurent
+aie
+aies
+ait
+ayons
+ayez
+aient
+eusse
+eusses
+eût
+eussions
+eussiez
+eussent
+
+ | Later additions (from Jean-Christophe Deschamps)
+ceci | this
+cela | that
+celà | that
+cet | this
+cette | this
+ici | here
+ils | they
+les | the (pl)
+leurs | their (pl)
+quel | which
+quels | which
+quelle | which
+quelles | which
+sans | without
+soi | oneself
+
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ga.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ga.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ga.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ga.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,110 @@
+
+a
+ach
+ag
+agus
+an
+aon
+ar
+arna
+as
+b'
+ba
+beirt
+bhúr
+caoga
+ceathair
+ceathrar
+chomh
+chtó
+chuig
+chun
+cois
+céad
+cúig
+cúigear
+d'
+daichead
+dar
+de
+deich
+deichniúr
+den
+dhá
+do
+don
+dtÃ
+dá
+dár
+dó
+faoi
+faoin
+faoina
+faoinár
+fara
+fiche
+gach
+gan
+go
+gur
+haon
+hocht
+i
+iad
+idir
+in
+ina
+ins
+inár
+is
+le
+leis
+lena
+lenár
+m'
+mar
+mo
+mé
+na
+nach
+naoi
+naonúr
+ná
+nÃ
+nÃor
+nó
+nócha
+ocht
+ochtar
+os
+roimh
+sa
+seacht
+seachtar
+seachtó
+seasca
+seisear
+siad
+sibh
+sinn
+sna
+sé
+sÃ
+tar
+thar
+thú
+triúr
+trÃ
+trÃna
+trÃnár
+trÃocha
+tú
+um
+ár
+é
+éis
+Ã
+ó
+ón
+óna
+ónár
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_gl.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_gl.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_gl.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_gl.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,161 @@
+# galican stopwords
+a
+aÃnda
+alÃ
+aquel
+aquela
+aquelas
+aqueles
+aquilo
+aquÃ
+ao
+aos
+as
+asÃ
+á
+ben
+cando
+che
+co
+coa
+comigo
+con
+connosco
+contigo
+convosco
+coas
+cos
+cun
+cuns
+cunha
+cunhas
+da
+dalgunha
+dalgunhas
+dalgún
+dalgúns
+das
+de
+del
+dela
+delas
+deles
+desde
+deste
+do
+dos
+dun
+duns
+dunha
+dunhas
+e
+el
+ela
+elas
+eles
+en
+era
+eran
+esa
+esas
+ese
+eses
+esta
+estar
+estaba
+está
+están
+este
+estes
+estiven
+estou
+eu
+é
+facer
+foi
+foron
+fun
+habÃa
+hai
+iso
+isto
+la
+las
+lle
+lles
+lo
+los
+mais
+me
+meu
+meus
+min
+miña
+miñas
+moi
+na
+nas
+neste
+nin
+no
+non
+nos
+nosa
+nosas
+noso
+nosos
+nós
+nun
+nunha
+nuns
+nunhas
+o
+os
+ou
+ó
+ós
+para
+pero
+pode
+pois
+pola
+polas
+polo
+polos
+por
+que
+se
+senón
+ser
+seu
+seus
+sexa
+sido
+sobre
+súa
+súas
+tamén
+tan
+te
+ten
+teñen
+teño
+ter
+teu
+teus
+ti
+tido
+tiña
+tiven
+túa
+túas
+un
+unha
+unhas
+uns
+vos
+vosa
+vosas
+voso
+vosos
+vós
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_hi.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_hi.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_hi.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_hi.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,235 @@
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# Note: by default this file also contains forms normalized by HindiNormalizer
+# for spelling variation (see section below), such that it can be used whether or
+# not you enable that feature. When adding additional entries to this list,
+# please add the normalized form as well.
+à¤
à¤à¤¦à¤°
+à¤
त
+à¤
पना
+à¤
पनà¥
+à¤
पनà¥
+à¤
à¤à¥
+à¤à¤¦à¤¿
+à¤à¤ª
+à¤à¤¤à¥à¤¯à¤¾à¤¦à¤¿
+à¤à¤¨
+à¤à¤¨à¤à¤¾
+à¤à¤¨à¥à¤¹à¥à¤
+à¤à¤¨à¥à¤¹à¥à¤
+à¤à¤¨à¥à¤¹à¥à¤
+à¤à¤¸
+à¤à¤¸à¤à¤¾
+à¤à¤¸à¤à¥
+à¤à¤¸à¤à¥
+à¤à¤¸à¤®à¥à¤
+à¤à¤¸à¥
+à¤à¤¸à¥
+à¤à¤¨
+à¤à¤¨à¤à¤¾
+à¤à¤¨à¤à¥
+à¤à¤¨à¤à¥
+à¤à¤¨à¤à¥
+à¤à¤¨à¥à¤¹à¥à¤
+à¤à¤¨à¥à¤¹à¥à¤
+à¤à¤¨à¥à¤¹à¥à¤
+à¤à¤¸
+à¤à¤¸à¤à¥
+à¤à¤¸à¥
+à¤à¤¸à¥
+à¤à¤
+à¤à¤µà¤
+à¤à¤¸
+à¤à¤¸à¥
+à¤à¤°
+à¤à¤
+à¤à¤°
+à¤à¤°à¤¤à¤¾
+à¤à¤°à¤¤à¥
+à¤à¤°à¤¨à¤¾
+à¤à¤°à¤¨à¥
+à¤à¤°à¥à¤
+à¤à¤¹à¤¤à¥
+à¤à¤¹à¤¾
+à¤à¤¾
+à¤à¤¾à¥à¥
+à¤à¤¿
+à¤à¤¿à¤¤à¤¨à¤¾
+à¤à¤¿à¤¨à¥à¤¹à¥à¤
+à¤à¤¿à¤¨à¥à¤¹à¥à¤
+à¤à¤¿à¤¯à¤¾
+à¤à¤¿à¤°
+à¤à¤¿à¤¸
+à¤à¤¿à¤¸à¥
+à¤à¤¿à¤¸à¥
+à¤à¥
+à¤à¥à¤
+à¤à¥à¤²
+à¤à¥
+à¤à¥
+à¤à¥à¤
+à¤à¥à¤¨
+à¤à¥à¤¨à¤¸à¤¾
+à¤à¤¯à¤¾
+à¤à¤°
+à¤à¤¬
+à¤à¤¹à¤¾à¤
+à¤à¤¾
+à¤à¤¿à¤¤à¤¨à¤¾
+à¤à¤¿à¤¨
+à¤à¤¿à¤¨à¥à¤¹à¥à¤
+à¤à¤¿à¤¨à¥à¤¹à¥à¤
+à¤à¤¿à¤¸
+à¤à¤¿à¤¸à¥
+à¤à¥à¤§à¤°
+à¤à¥à¤¸à¤¾
+à¤à¥à¤¸à¥
+à¤à¥
+तà¤
+तब
+तरह
+तिन
+तिनà¥à¤¹à¥à¤
+तिनà¥à¤¹à¥à¤
+तिस
+तिसà¥
+तà¥
+था
+थà¥
+थà¥
+दबारा
+दिया
+दà¥à¤¸à¤°à¤¾
+दà¥à¤¸à¤°à¥
+दà¥
+दà¥à¤µà¤¾à¤°à¤¾
+न
+नहà¥à¤
+ना
+निहायत
+नà¥à¤à¥
+नà¥
+पर
+पर
+पहलà¥
+पà¥à¤°à¤¾
+पà¥
+फिर
+बनà¥
+बहà¥
+बहà¥à¤¤
+बाद
+बाला
+बिलà¤à¥à¤²
+à¤à¥
+à¤à¥à¤¤à¤°
+मà¤à¤°
+मानà¥
+मà¥
+मà¥à¤
+यदि
+यह
+यहाà¤
+यहà¥
+या
+यिह
+यà¥
+रà¤à¥à¤
+रहा
+रहà¥
+ऱà¥à¤µà¤¾à¤¸à¤¾
+लिà¤
+लियà¥
+लà¥à¤à¤¿à¤¨
+व
+वरà¥à¤
+वह
+वह
+वहाà¤
+वहà¥à¤
+वालà¥
+वà¥à¤¹
+वà¥
+वà¥à¥à¤°à¤¹
+सà¤à¤
+सà¤à¤¤à¤¾
+सà¤à¤¤à¥
+सबसà¥
+सà¤à¥
+साथ
+साबà¥à¤¤
+साà¤
+सारा
+सà¥
+सà¥
+हà¥
+हà¥à¤
+हà¥à¤
+हà¥à¤
+हà¥
+हà¥à¤
+हà¥
+हà¥à¤¤à¤¾
+हà¥à¤¤à¥
+हà¥à¤¤à¥
+हà¥à¤¨à¤¾
+हà¥à¤¨à¥
+# additional normalized forms of the above
+à¤
पनि
+à¤à¥à¤¸à¥
+हà¥à¤¤à¤¿
+सà¤à¤¿
+तिà¤à¤¹à¥à¤
+à¤à¤à¤¹à¥à¤
+दवारा
+à¤à¤¸à¤¿
+à¤à¤¿à¤à¤¹à¥à¤
+थि
+à¤à¤à¤¹à¥à¤
+à¤à¤°
+à¤à¤¿à¤à¤¹à¥à¤
+वहिà¤
+à¤
à¤à¤¿
+बनि
+हि
+à¤à¤à¤¹à¤¿à¤
+à¤à¤à¤¹à¥à¤
+हà¥à¤
+वà¤à¥à¤°à¤¹
+à¤à¤¸à¥
+रवासा
+à¤à¥à¤¨
+निà¤à¥
+à¤à¤¾à¤«à¤¿
+à¤à¤¸à¤¿
+पà¥à¤°à¤¾
+à¤à¤¿à¤¤à¤°
+हà¥
+बहि
+वहाà¤
+à¤à¥à¤
+यहाà¤
+à¤à¤¿à¤à¤¹à¥à¤
+तिà¤à¤¹à¥à¤
+à¤à¤¿à¤¸à¤¿
+à¤à¤
+यहि
+à¤à¤à¤¹à¤¿à¤
+à¤à¤¿à¤§à¤°
+à¤à¤à¤¹à¥à¤
+à¤
दि
+à¤à¤¤à¤¯à¤¾à¤¦à¤¿
+हà¥à¤
+à¤à¥à¤¨à¤¸à¤¾
+à¤à¤¸à¤à¤¿
+दà¥à¤¸à¤°à¥
+à¤à¤¹à¤¾à¤
+à¤
प
+à¤à¤¿à¤à¤¹à¥à¤
+à¤à¤¨à¤à¤¿
+à¤à¤¿
+वरà¤
+हà¥à¤
+à¤à¥à¤¸à¤¾
+नहिà¤
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_hu.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_hu.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_hu.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_hu.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,211 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/hungarian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+| Hungarian stop word list
+| prepared by Anna Tordai
+
+a
+ahogy
+ahol
+aki
+akik
+akkor
+alatt
+által
+általában
+amely
+amelyek
+amelyekben
+amelyeket
+amelyet
+amelynek
+ami
+amit
+amolyan
+amÃg
+amikor
+át
+abban
+ahhoz
+annak
+arra
+arról
+az
+azok
+azon
+azt
+azzal
+azért
+aztán
+azután
+azonban
+bár
+be
+belül
+benne
+cikk
+cikkek
+cikkeket
+csak
+de
+e
+eddig
+egész
+egy
+egyes
+egyetlen
+egyéb
+egyik
+egyre
+ekkor
+el
+elég
+ellen
+elÅ
+elÅször
+elÅtt
+elsÅ
+én
+éppen
+ebben
+ehhez
+emilyen
+ennek
+erre
+ez
+ezt
+ezek
+ezen
+ezzel
+ezért
+és
+fel
+felé
+hanem
+hiszen
+hogy
+hogyan
+igen
+Ãgy
+illetve
+ill.
+ill
+ilyen
+ilyenkor
+ison
+ismét
+itt
+jó
+jól
+jobban
+kell
+kellett
+keresztül
+keressünk
+ki
+kÃvül
+között
+közül
+legalább
+lehet
+lehetett
+legyen
+lenne
+lenni
+lesz
+lett
+maga
+magát
+majd
+majd
+már
+más
+másik
+meg
+még
+mellett
+mert
+mely
+melyek
+mi
+mit
+mÃg
+miért
+milyen
+mikor
+minden
+mindent
+mindenki
+mindig
+mint
+mintha
+mivel
+most
+nagy
+nagyobb
+nagyon
+ne
+néha
+nekem
+neki
+nem
+néhány
+nélkül
+nincs
+olyan
+ott
+össze
+Å
+Åk
+Åket
+pedig
+persze
+rá
+s
+saját
+sem
+semmi
+sok
+sokat
+sokkal
+számára
+szemben
+szerint
+szinte
+talán
+tehát
+teljes
+tovább
+továbbá
+több
+úgy
+ugyanis
+új
+újabb
+újra
+után
+utána
+utolsó
+vagy
+vagyis
+valaki
+valami
+valamint
+való
+vagyok
+van
+vannak
+volt
+voltam
+voltak
+voltunk
+vissza
+vele
+viszont
+volna
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_hy.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_hy.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_hy.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_hy.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,46 @@
+# example set of Armenian stopwords.
+Õ¡ÕµÕ¤
+Õ¡ÕµÕ¬
+Õ¡ÕµÕ¶
+Õ¡ÕµÕ½
+Õ¤Õ¸Ö
+Õ¤Õ¸ÖÖ
+Õ¥Õ´
+Õ¥Õ¶
+Õ¥Õ¶Ö
+Õ¥Õ½
+Õ¥Ö
+Õ§
+Õ§Õ«
+Õ§Õ«Õ¶
+Õ§Õ«Õ¶Ö
+Õ§Õ«Ö
+Õ§Õ«Ö
+Õ§Ö
+Õ¨Õ½Õ¿
+Õ©
+Õ«
+Õ«Õ¶
+Õ«Õ½Õ¯
+Õ«Ö
+Õ¯Õ¡Õ´
+Õ°Õ¡Õ´Õ¡Ö
+Õ°Õ¥Õ¿
+Õ°Õ¥Õ¿Õ¸
+Õ´Õ¥Õ¶Ö
+Õ´Õ¥Õ»
+Õ´Õ«
+Õ¶
+Õ¶Õ¡
+Õ¶Õ¡Ö
+Õ¶ÖÕ¡
+Õ¶ÖÕ¡Õ¶Ö
+Õ¸Ö
+Õ¸ÖÕ¨
+Õ¸ÖÕ¸Õ¶Ö
+Õ¸ÖÕºÕ¥Õ½
+Õ¸Ö
+Õ¸ÖÕ´
+ÕºÕ«Õ¿Õ«
+Õ¾ÖÕ¡
+Ö
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_id.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_id.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_id.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_id.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,359 @@
+# from appendix D of: A Study of Stemming Effects on Information
+# Retrieval in Bahasa Indonesia
+ada
+adanya
+adalah
+adapun
+agak
+agaknya
+agar
+akan
+akankah
+akhirnya
+aku
+akulah
+amat
+amatlah
+anda
+andalah
+antar
+diantaranya
+antara
+antaranya
+diantara
+apa
+apaan
+mengapa
+apabila
+apakah
+apalagi
+apatah
+atau
+ataukah
+ataupun
+bagai
+bagaikan
+sebagai
+sebagainya
+bagaimana
+bagaimanapun
+sebagaimana
+bagaimanakah
+bagi
+bahkan
+bahwa
+bahwasanya
+sebaliknya
+banyak
+sebanyak
+beberapa
+seberapa
+begini
+beginian
+beginikah
+beginilah
+sebegini
+begitu
+begitukah
+begitulah
+begitupun
+sebegitu
+belum
+belumlah
+sebelum
+sebelumnya
+sebenarnya
+berapa
+berapakah
+berapalah
+berapapun
+betulkah
+sebetulnya
+biasa
+biasanya
+bila
+bilakah
+bisa
+bisakah
+sebisanya
+boleh
+bolehkah
+bolehlah
+buat
+bukan
+bukankah
+bukanlah
+bukannya
+cuma
+percuma
+dahulu
+dalam
+dan
+dapat
+dari
+daripada
+dekat
+demi
+demikian
+demikianlah
+sedemikian
+dengan
+depan
+di
+dia
+dialah
+dini
+diri
+dirinya
+terdiri
+dong
+dulu
+enggak
+enggaknya
+entah
+entahlah
+terhadap
+terhadapnya
+hal
+hampir
+hanya
+hanyalah
+harus
+haruslah
+harusnya
+seharusnya
+hendak
+hendaklah
+hendaknya
+hingga
+sehingga
+ia
+ialah
+ibarat
+ingin
+inginkah
+inginkan
+ini
+inikah
+inilah
+itu
+itukah
+itulah
+jangan
+jangankan
+janganlah
+jika
+jikalau
+juga
+justru
+kala
+kalau
+kalaulah
+kalaupun
+kalian
+kami
+kamilah
+kamu
+kamulah
+kan
+kapan
+kapankah
+kapanpun
+dikarenakan
+karena
+karenanya
+ke
+kecil
+kemudian
+kenapa
+kepada
+kepadanya
+ketika
+seketika
+khususnya
+kini
+kinilah
+kiranya
+sekiranya
+kita
+kitalah
+kok
+lagi
+lagian
+selagi
+lah
+lain
+lainnya
+melainkan
+selaku
+lalu
+melalui
+terlalu
+lama
+lamanya
+selama
+selama
+selamanya
+lebih
+terlebih
+bermacam
+macam
+semacam
+maka
+makanya
+makin
+malah
+malahan
+mampu
+mampukah
+mana
+manakala
+manalagi
+masih
+masihkah
+semasih
+masing
+mau
+maupun
+semaunya
+memang
+mereka
+merekalah
+meski
+meskipun
+semula
+mungkin
+mungkinkah
+nah
+namun
+nanti
+nantinya
+nyaris
+oleh
+olehnya
+seorang
+seseorang
+pada
+padanya
+padahal
+paling
+sepanjang
+pantas
+sepantasnya
+sepantasnyalah
+para
+pasti
+pastilah
+per
+pernah
+pula
+pun
+merupakan
+rupanya
+serupa
+saat
+saatnya
+sesaat
+saja
+sajalah
+saling
+bersama
+sama
+sesama
+sambil
+sampai
+sana
+sangat
+sangatlah
+saya
+sayalah
+se
+sebab
+sebabnya
+sebuah
+tersebut
+tersebutlah
+sedang
+sedangkan
+sedikit
+sedikitnya
+segala
+segalanya
+segera
+sesegera
+sejak
+sejenak
+sekali
+sekalian
+sekalipun
+sesekali
+sekaligus
+sekarang
+sekarang
+sekitar
+sekitarnya
+sela
+selain
+selalu
+seluruh
+seluruhnya
+semakin
+sementara
+sempat
+semua
+semuanya
+sendiri
+sendirinya
+seolah
+seperti
+sepertinya
+sering
+seringnya
+serta
+siapa
+siapakah
+siapapun
+disini
+disinilah
+sini
+sinilah
+sesuatu
+sesuatunya
+suatu
+sesudah
+sesudahnya
+sudah
+sudahkah
+sudahlah
+supaya
+tadi
+tadinya
+tak
+tanpa
+setelah
+telah
+tentang
+tentu
+tentulah
+tentunya
+tertentu
+seterusnya
+tapi
+tetapi
+setiap
+tiap
+setidaknya
+tidak
+tidakkah
+tidaklah
+toh
+waduh
+wah
+wahai
+sewaktu
+walau
+walaupun
+wong
+yaitu
+yakni
+yang
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_it.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_it.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_it.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_it.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,303 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/italian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | An Italian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ad | a (to) before vowel
+al | a + il
+allo | a + lo
+ai | a + i
+agli | a + gli
+all | a + l'
+agl | a + gl'
+alla | a + la
+alle | a + le
+con | with
+col | con + il
+coi | con + i (forms collo, cogli etc are now very rare)
+da | from
+dal | da + il
+dallo | da + lo
+dai | da + i
+dagli | da + gli
+dall | da + l'
+dagl | da + gll'
+dalla | da + la
+dalle | da + le
+di | of
+del | di + il
+dello | di + lo
+dei | di + i
+degli | di + gli
+dell | di + l'
+degl | di + gl'
+della | di + la
+delle | di + le
+in | in
+nel | in + el
+nello | in + lo
+nei | in + i
+negli | in + gli
+nell | in + l'
+negl | in + gl'
+nella | in + la
+nelle | in + le
+su | on
+sul | su + il
+sullo | su + lo
+sui | su + i
+sugli | su + gli
+sull | su + l'
+sugl | su + gl'
+sulla | su + la
+sulle | su + le
+per | through, by
+tra | among
+contro | against
+io | I
+tu | thou
+lui | he
+lei | she
+noi | we
+voi | you
+loro | they
+mio | my
+mia |
+miei |
+mie |
+tuo |
+tua |
+tuoi | thy
+tue |
+suo |
+sua |
+suoi | his, her
+sue |
+nostro | our
+nostra |
+nostri |
+nostre |
+vostro | your
+vostra |
+vostri |
+vostre |
+mi | me
+ti | thee
+ci | us, there
+vi | you, there
+lo | him, the
+la | her, the
+li | them
+le | them, the
+gli | to him, the
+ne | from there etc
+il | the
+un | a
+uno | a
+una | a
+ma | but
+ed | and
+se | if
+perché | why, because
+anche | also
+come | how
+dov | where (as dov')
+dove | where
+che | who, that
+chi | who
+cui | whom
+non | not
+più | more
+quale | who, that
+quanto | how much
+quanti |
+quanta |
+quante |
+quello | that
+quelli |
+quella |
+quelle |
+questo | this
+questi |
+questa |
+queste |
+si | yes
+tutto | all
+tutti | all
+
+ | single letter forms:
+
+a | at
+c | as c' for ce or ci
+e | and
+i | the
+l | as l'
+o | or
+
+ | forms of avere, to have (not including the infinitive):
+
+ho
+hai
+ha
+abbiamo
+avete
+hanno
+abbia
+abbiate
+abbiano
+avrò
+avrai
+avrÃ
+avremo
+avrete
+avranno
+avrei
+avresti
+avrebbe
+avremmo
+avreste
+avrebbero
+avevo
+avevi
+aveva
+avevamo
+avevate
+avevano
+ebbi
+avesti
+ebbe
+avemmo
+aveste
+ebbero
+avessi
+avesse
+avessimo
+avessero
+avendo
+avuto
+avuta
+avuti
+avute
+
+ | forms of essere, to be (not including the infinitive):
+sono
+sei
+è
+siamo
+siete
+sia
+siate
+siano
+sarò
+sarai
+sarÃ
+saremo
+sarete
+saranno
+sarei
+saresti
+sarebbe
+saremmo
+sareste
+sarebbero
+ero
+eri
+era
+eravamo
+eravate
+erano
+fui
+fosti
+fu
+fummo
+foste
+furono
+fossi
+fosse
+fossimo
+fossero
+essendo
+
+ | forms of fare, to do (not including the infinitive, fa, fat-):
+faccio
+fai
+facciamo
+fanno
+faccia
+facciate
+facciano
+farò
+farai
+farÃ
+faremo
+farete
+faranno
+farei
+faresti
+farebbe
+faremmo
+fareste
+farebbero
+facevo
+facevi
+faceva
+facevamo
+facevate
+facevano
+feci
+facesti
+fece
+facemmo
+faceste
+fecero
+facessi
+facesse
+facessimo
+facessero
+facendo
+
+ | forms of stare, to be (not including the infinitive):
+sto
+stai
+sta
+stiamo
+stanno
+stia
+stiate
+stiano
+starò
+starai
+starÃ
+staremo
+starete
+staranno
+starei
+staresti
+starebbe
+staremmo
+stareste
+starebbero
+stavo
+stavi
+stava
+stavamo
+stavate
+stavano
+stetti
+stesti
+stette
+stemmo
+steste
+stettero
+stessi
+stesse
+stessimo
+stessero
+stando
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ja.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ja.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ja.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ja.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,127 @@
+#
+# This file defines a stopword set for Japanese.
+#
+# This set is made up of hand-picked frequent terms from segmented Japanese Wikipedia.
+# Punctuation characters and frequent kanji have mostly been left out. See LUCENE-3745
+# for frequency lists, etc. that can be useful for making your own set (if desired)
+#
+# Note that there is an overlap between these stopwords and the terms stopped when used
+# in combination with the JapanesePartOfSpeechStopFilter. When editing this file, note
+# that comments are not allowed on the same line as stopwords.
+#
+# Also note that stopping is done in a case-insensitive manner. Change your StopFilter
+# configuration if you need case-sensitive stopping. Lastly, note that stopping is done
+# using the same character width as the entries in this file. Since this StopFilter is
+# normally done after a CJKWidthFilter in your chain, you would usually want your romaji
+# entries to be in half-width and your kana entries to be in full-width.
+#
+ã®
+ã«
+ã¯
+ã
+ã
+ã
+ã§
+ã¦
+ã¨
+ã
+ã
+ã
+ãã
+ãã
+ã
+ãã
+ãã
+ãª
+ãã¨
+ã¨ãã¦
+ã
+ã
+ãã
+ãªã©
+ãªã£
+ãªã
+ãã®
+ãã
+ãã®
+ãã£
+ãã
+ã¾ã
+ãã®
+ã¨ãã
+ãã
+ã¾ã§
+ãã
+ãªã
+ã¸
+ã
+ã
+ãã
+ã«ãã£ã¦
+ã«ãã
+ãã
+ãã
+ã«ãã
+ã
+ãªã
+ããã
+ã«ããã¦
+ã°
+ãªãã£
+ãªã
+ããã
+ã«ã¤ãã¦
+ã
+ã ã£
+ãã®å¾
+ã§ãã
+ãã
+ã
+ã®ã§
+ãªã
+ã®ã¿
+ã§ã
+ã
+ã¤
+ã«ããã
+ããã³
+ãã
+ããã«
+ã§ã
+ã
+ãã
+ãã®ä»
+ã«é¢ãã
+ãã¡
+ã¾ã
+ã
+ãªã
+ã«å¯¾ãã¦
+ç¹ã«
+ãã
+åã³
+ããã
+ã¨ã
+ã§ã¯
+ã«ã¦
+ã»ã
+ãªãã
+ãã¡
+ããã¦
+ã¨ã¨ãã«
+ãã ã
+ãã¤ã¦
+ãããã
+ã¾ãã¯
+ã
+ã»ã©
+ãã®ã®
+ã«å¯¾ãã
+ã»ã¨ãã©
+ã¨å
±ã«
+ã¨ãã£ã
+ã§ã
+ã¨ã
+ã¨ãã
+ãã
+##### End of file
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_lv.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_lv.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_lv.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_lv.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,172 @@
+# Set of Latvian stopwords from A Stemming Algorithm for Latvian, Karlis Kreslins
+# the original list of over 800 forms was refined:
+# pronouns, adverbs, interjections were removed
+#
+# prepositions
+aiz
+ap
+ar
+apakš
+Ärpus
+augšpus
+bez
+caur
+dÄļ
+gar
+iekš
+iz
+kopš
+labad
+lejpus
+līdz
+no
+otrpus
+pa
+par
+pÄr
+pÄc
+pie
+pirms
+pret
+priekš
+starp
+Å¡aipus
+uz
+viÅpus
+virs
+virspus
+zem
+apakšpus
+# Conjunctions
+un
+bet
+jo
+ja
+ka
+lai
+tomÄr
+tikko
+turpretī
+arī
+kaut
+gan
+tÄdÄļ
+tÄ
+ne
+tikvien
+vien
+kÄ
+ir
+te
+vai
+kamÄr
+# Particles
+ar
+diezin
+droši
+diemžÄl
+nebūt
+ik
+it
+taÄu
+nu
+pat
+tiklab
+iekšpus
+nedz
+tik
+nevis
+turpretim
+jeb
+iekam
+iekÄm
+iekÄms
+kolīdz
+līdzko
+tiklīdz
+jebšu
+tÄlab
+tÄpÄc
+nekÄ
+itin
+jÄ
+jau
+jel
+nÄ
+nezin
+tad
+tikai
+vis
+tak
+iekams
+vien
+# modal verbs
+būt
+biju
+biji
+bija
+bijÄm
+bijÄt
+esmu
+esi
+esam
+esat
+būšu
+būsi
+būs
+būsim
+būsiet
+tikt
+tiku
+tiki
+tika
+tikÄm
+tikÄt
+tieku
+tiec
+tiek
+tiekam
+tiekat
+tikšu
+tiks
+tiksim
+tiksiet
+tapt
+tapi
+tapÄt
+topat
+tapšu
+tapsi
+taps
+tapsim
+tapsiet
+kļūt
+kļuvu
+kļuvi
+kļuva
+kļuvÄm
+kļuvÄt
+kļūstu
+kļūsti
+kļūst
+kļūstam
+kļūstat
+kļūšu
+kļūsi
+kļūs
+kļūsim
+kļūsiet
+# verbs
+varÄt
+varÄju
+varÄjÄm
+varÄÅ¡u
+varÄsim
+var
+varÄji
+varÄjÄt
+varÄsi
+varÄsiet
+varat
+varÄja
+varÄs
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_nl.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_nl.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_nl.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_nl.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,119 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/dutch/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Dutch stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large sample of Dutch text.
+
+ | Dutch stop words frequently exhibit homonym clashes. These are indicated
+ | clearly below.
+
+de | the
+en | and
+van | of, from
+ik | I, the ego
+te | (1) chez, at etc, (2) to, (3) too
+dat | that, which
+die | that, those, who, which
+in | in, inside
+een | a, an, one
+hij | he
+het | the, it
+niet | not, nothing, naught
+zijn | (1) to be, being, (2) his, one's, its
+is | is
+was | (1) was, past tense of all persons sing. of 'zijn' (to be) (2) wax, (3) the washing, (4) rise of river
+op | on, upon, at, in, up, used up
+aan | on, upon, to (as dative)
+met | with, by
+als | like, such as, when
+voor | (1) before, in front of, (2) furrow
+had | had, past tense all persons sing. of 'hebben' (have)
+er | there
+maar | but, only
+om | round, about, for etc
+hem | him
+dan | then
+zou | should/would, past tense all persons sing. of 'zullen'
+of | or, whether, if
+wat | what, something, anything
+mijn | possessive and noun 'mine'
+men | people, 'one'
+dit | this
+zo | so, thus, in this way
+door | through by
+over | over, across
+ze | she, her, they, them
+zich | oneself
+bij | (1) a bee, (2) by, near, at
+ook | also, too
+tot | till, until
+je | you
+mij | me
+uit | out of, from
+der | Old Dutch form of 'van der' still found in surnames
+daar | (1) there, (2) because
+haar | (1) her, their, them, (2) hair
+naar | (1) unpleasant, unwell etc, (2) towards, (3) as
+heb | present first person sing. of 'to have'
+hoe | how, why
+heeft | present third person sing. of 'to have'
+hebben | 'to have' and various parts thereof
+deze | this
+u | you
+want | (1) for, (2) mitten, (3) rigging
+nog | yet, still
+zal | 'shall', first and third person sing. of verb 'zullen' (will)
+me | me
+zij | she, they
+nu | now
+ge | 'thou', still used in Belgium and south Netherlands
+geen | none
+omdat | because
+iets | something, somewhat
+worden | to become, grow, get
+toch | yet, still
+al | all, every, each
+waren | (1) 'were' (2) to wander, (3) wares, (3)
+veel | much, many
+meer | (1) more, (2) lake
+doen | to do, to make
+toen | then, when
+moet | noun 'spot/mote' and present form of 'to must'
+ben | (1) am, (2) 'are' in interrogative second person singular of 'to be'
+zonder | without
+kan | noun 'can' and present form of 'to be able'
+hun | their, them
+dus | so, consequently
+alles | all, everything, anything
+onder | under, beneath
+ja | yes, of course
+eens | once, one day
+hier | here
+wie | who
+werd | imperfect third person sing. of 'become'
+altijd | always
+doch | yet, but etc
+wordt | present third person sing. of 'become'
+wezen | (1) to be, (2) 'been' as in 'been fishing', (3) orphans
+kunnen | to be able
+ons | us/our
+zelf | self
+tegen | against, towards, at
+na | after, near
+reeds | already
+wil | (1) present tense of 'want', (2) 'will', noun, (3) fender
+kon | could; past tense of 'to be able'
+niets | nothing
+uw | your
+iemand | somebody
+geweest | been; past participle of 'be'
+andere | other
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_no.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_no.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_no.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_no.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,194 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/norwegian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Norwegian stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This stop word list is for the dominant bokmål dialect. Words unique
+ | to nynorsk are marked *.
+
+ | Revised by Jan Bruusgaard <Ja...@ssb.no>, Jan 2005
+
+og | and
+i | in
+jeg | I
+det | it/this/that
+at | to (w. inf.)
+en | a/an
+et | a/an
+den | it/this/that
+til | to
+er | is/am/are
+som | who/that
+på | on
+de | they / you(formal)
+med | with
+han | he
+av | of
+ikke | not
+ikkje | not *
+der | there
+så | so
+var | was/were
+meg | me
+seg | you
+men | but
+ett | one
+har | have
+om | about
+vi | we
+min | my
+mitt | my
+ha | have
+hadde | had
+hun | she
+nå | now
+over | over
+da | when/as
+ved | by/know
+fra | from
+du | you
+ut | out
+sin | your
+dem | them
+oss | us
+opp | up
+man | you/one
+kan | can
+hans | his
+hvor | where
+eller | or
+hva | what
+skal | shall/must
+selv | self (reflective)
+sjøl | self (reflective)
+her | here
+alle | all
+vil | will
+bli | become
+ble | became
+blei | became *
+blitt | have become
+kunne | could
+inn | in
+når | when
+være | be
+kom | come
+noen | some
+noe | some
+ville | would
+dere | you
+som | who/which/that
+deres | their/theirs
+kun | only/just
+ja | yes
+etter | after
+ned | down
+skulle | should
+denne | this
+for | for/because
+deg | you
+si | hers/his
+sine | hers/his
+sitt | hers/his
+mot | against
+Ã¥ | to
+meget | much
+hvorfor | why
+dette | this
+disse | these/those
+uten | without
+hvordan | how
+ingen | none
+din | your
+ditt | your
+blir | become
+samme | same
+hvilken | which
+hvilke | which (plural)
+sånn | such a
+inni | inside/within
+mellom | between
+vår | our
+hver | each
+hvem | who
+vors | us/ours
+hvis | whose
+både | both
+bare | only/just
+enn | than
+fordi | as/because
+før | before
+mange | many
+også | also
+slik | just
+vært | been
+være | to be
+båe | both *
+begge | both
+siden | since
+dykk | your *
+dykkar | yours *
+dei | they *
+deira | them *
+deires | theirs *
+deim | them *
+di | your (fem.) *
+då | as/when *
+eg | I *
+ein | a/an *
+eit | a/an *
+eitt | a/an *
+elles | or *
+honom | he *
+hjå | at *
+ho | she *
+hoe | she *
+henne | her
+hennar | her/hers
+hennes | hers
+hoss | how *
+hossen | how *
+ikkje | not *
+ingi | noone *
+inkje | noone *
+korleis | how *
+korso | how *
+kva | what/which *
+kvar | where *
+kvarhelst | where *
+kven | who/whom *
+kvi | why *
+kvifor | why *
+me | we *
+medan | while *
+mi | my *
+mine | my *
+mykje | much *
+no | now *
+nokon | some (masc./neut.) *
+noka | some (fem.) *
+nokor | some *
+noko | some *
+nokre | some *
+si | his/hers *
+sia | since *
+sidan | since *
+so | so *
+somt | some *
+somme | some *
+um | about*
+upp | up *
+vere | be *
+vore | was *
+verte | become *
+vort | become *
+varte | became *
+vart | became *
+
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_pt.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_pt.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_pt.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_pt.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,253 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/portuguese/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Portuguese stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | of, from
+a | the; to, at; her
+o | the; him
+que | who, that
+e | and
+do | de + o
+da | de + a
+em | in
+um | a
+para | for
+ | é from SER
+com | with
+não | not, no
+uma | a
+os | the; them
+no | em + o
+se | himself etc
+na | em + a
+por | for
+mais | more
+as | the; them
+dos | de + os
+como | as, like
+mas | but
+ | foi from SER
+ao | a + o
+ele | he
+das | de + as
+ | tem from TER
+Ã | a + a
+seu | his
+sua | her
+ou | or
+ | ser from SER
+quando | when
+muito | much
+ | há from HAV
+nos | em + os; us
+já | already, now
+ | está from EST
+eu | I
+também | also
+só | only, just
+pelo | per + o
+pela | per + a
+até | up to
+isso | that
+ela | he
+entre | between
+ | era from SER
+depois | after
+sem | without
+mesmo | same
+aos | a + os
+ | ter from TER
+seus | his
+quem | whom
+nas | em + as
+me | me
+esse | that
+eles | they
+ | estão from EST
+você | you
+ | tinha from TER
+ | foram from SER
+essa | that
+num | em + um
+nem | nor
+suas | her
+meu | my
+Ã s | a + as
+minha | my
+ | têm from TER
+numa | em + uma
+pelos | per + os
+elas | they
+ | havia from HAV
+ | seja from SER
+qual | which
+ | será from SER
+nós | we
+ | tenho from TER
+lhe | to him, her
+deles | of them
+essas | those
+esses | those
+pelas | per + as
+este | this
+ | fosse from SER
+dele | of him
+
+ | other words. There are many contractions such as naquele = em+aquele,
+ | mo = me+o, but they are rare.
+ | Indefinite article plural forms are also rare.
+
+tu | thou
+te | thee
+vocês | you (plural)
+vos | you
+lhes | to them
+meus | my
+minhas
+teu | thy
+tua
+teus
+tuas
+nosso | our
+nossa
+nossos
+nossas
+
+dela | of her
+delas | of them
+
+esta | this
+estes | these
+estas | these
+aquele | that
+aquela | that
+aqueles | those
+aquelas | those
+isto | this
+aquilo | that
+
+ | forms of estar, to be (not including the infinitive):
+estou
+está
+estamos
+estão
+estive
+esteve
+estivemos
+estiveram
+estava
+estávamos
+estavam
+estivera
+estivéramos
+esteja
+estejamos
+estejam
+estivesse
+estivéssemos
+estivessem
+estiver
+estivermos
+estiverem
+
+ | forms of haver, to have (not including the infinitive):
+hei
+há
+havemos
+hão
+houve
+houvemos
+houveram
+houvera
+houvéramos
+haja
+hajamos
+hajam
+houvesse
+houvéssemos
+houvessem
+houver
+houvermos
+houverem
+houverei
+houverá
+houveremos
+houverão
+houveria
+houverÃamos
+houveriam
+
+ | forms of ser, to be (not including the infinitive):
+sou
+somos
+são
+era
+éramos
+eram
+fui
+foi
+fomos
+foram
+fora
+fôramos
+seja
+sejamos
+sejam
+fosse
+fôssemos
+fossem
+for
+formos
+forem
+serei
+será
+seremos
+serão
+seria
+serÃamos
+seriam
+
+ | forms of ter, to have (not including the infinitive):
+tenho
+tem
+temos
+tém
+tinha
+tÃnhamos
+tinham
+tive
+teve
+tivemos
+tiveram
+tivera
+tivéramos
+tenha
+tenhamos
+tenham
+tivesse
+tivéssemos
+tivessem
+tiver
+tivermos
+tiverem
+terei
+terá
+teremos
+terão
+teria
+terÃamos
+teriam
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ro.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ro.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ro.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ro.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,233 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+acea
+aceasta
+aceastÄ
+aceea
+acei
+aceia
+acel
+acela
+acele
+acelea
+acest
+acesta
+aceste
+acestea
+aceÅti
+aceÅtia
+acolo
+acum
+ai
+aia
+aibÄ
+aici
+al
+Äla
+ale
+alea
+Älea
+altceva
+altcineva
+am
+ar
+are
+aÅ
+aÅadar
+asemenea
+asta
+Ästa
+astÄzi
+astea
+Ästea
+ÄÅtia
+asupra
+aţi
+au
+avea
+avem
+aveţi
+azi
+bine
+bucur
+bunÄ
+ca
+cÄ
+cÄci
+când
+care
+cÄrei
+cÄror
+cÄrui
+cât
+câte
+câţi
+cÄtre
+câtva
+ce
+cel
+ceva
+chiar
+cînd
+cine
+cineva
+cît
+cîte
+cîţi
+cîtva
+contra
+cu
+cum
+cumva
+curând
+curînd
+da
+dÄ
+dacÄ
+dar
+datoritÄ
+de
+deci
+deja
+deoarece
+departe
+deÅi
+din
+dinaintea
+dintr
+dintre
+drept
+dupÄ
+ea
+ei
+el
+ele
+eram
+este
+eÅti
+eu
+face
+fÄrÄ
+fi
+fie
+fiecare
+fii
+fim
+fiţi
+iar
+ieri
+îi
+îl
+îmi
+împotriva
+în
+înainte
+înaintea
+încât
+încît
+încotro
+între
+întrucât
+întrucît
+îţi
+la
+lângÄ
+le
+li
+lîngÄ
+lor
+lui
+mÄ
+mâine
+mea
+mei
+mele
+mereu
+meu
+mi
+mine
+mult
+multÄ
+mulţi
+ne
+nicÄieri
+nici
+nimeni
+niÅte
+noastrÄ
+noastre
+noi
+noÅtri
+nostru
+nu
+ori
+oricând
+oricare
+oricât
+orice
+oricînd
+oricine
+oricît
+oricum
+oriunde
+pânÄ
+pe
+pentru
+peste
+pînÄ
+poate
+pot
+prea
+prima
+primul
+prin
+printr
+sa
+sÄ
+sÄi
+sale
+sau
+sÄu
+se
+Åi
+sînt
+sîntem
+sînteţi
+spre
+sub
+sunt
+suntem
+sunteţi
+ta
+tÄi
+tale
+tÄu
+te
+Å£i
+Å£ie
+tine
+toatÄ
+toate
+tot
+toţi
+totuÅi
+tu
+un
+una
+unde
+undeva
+unei
+unele
+uneori
+unor
+vÄ
+vi
+voastrÄ
+voastre
+voi
+voÅtri
+vostru
+vouÄ
+vreo
+vreun
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ru.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ru.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ru.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_ru.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,243 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/russian/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | a russian stop word list. comments begin with vertical bar. each stop
+ | word is at the start of a line.
+
+ | this is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | letter `Ñ' is translated to `е'.
+
+и | and
+в | in/into
+во | alternative form
+не | not
+ÑÑо | what/that
+он | he
+на | on/onto
+Ñ | i
+Ñ | from
+Ñо | alternative form
+как | how
+а | milder form of `no' (but)
+Ñо | conjunction and form of `that'
+вÑе | all
+она | she
+Ñак | so, thus
+его | him
+но | but
+да | yes/and
+ÑÑ | thou
+к | towards, by
+Ñ | around, chez
+же | intensifier particle
+Ð²Ñ | you
+за | beyond, behind
+Ð±Ñ | conditional/subj. particle
+по | up to, along
+ÑолÑко | only
+ее | her
+мне | to me
+бÑло | it was
+Ð²Ð¾Ñ | here is/are, particle
+Ð¾Ñ | away from
+Ð¼ÐµÐ½Ñ | me
+еÑе | still, yet, more
+Ð½ÐµÑ | no, there isnt/arent
+о | about
+из | out of
+ÐµÐ¼Ñ | to him
+ÑепеÑÑ | now
+когда | when
+даже | even
+Ð½Ñ | so, well
+вдÑÑг | suddenly
+ли | interrogative particle
+еÑли | if
+Ñже | already, but homonym of `narrower'
+или | or
+ни | neither
+бÑÑÑ | to be
+бÑл | he was
+него | prepositional form of его
+до | up to
+Ð²Ð°Ñ | you accusative
+нибÑÐ´Ñ | indef. suffix preceded by hyphen
+опÑÑÑ | again
+Ñж | already, but homonym of `adder'
+вам | to you
+Ñказал | he said
+Ð²ÐµÐ´Ñ | particle `after all'
+Ñам | there
+поÑом | then
+ÑÐµÐ±Ñ | oneself
+ниÑего | nothing
+ей | to her
+Ð¼Ð¾Ð¶ÐµÑ | usually with `бÑÑÑ' as `maybe'
+они | they
+ÑÑÑ | here
+где | where
+еÑÑÑ | there is/are
+надо | got to, must
+ней | prepositional form of ей
+Ð´Ð»Ñ | for
+Ð¼Ñ | we
+ÑÐµÐ±Ñ | thee
+иÑ
| them, their
+Ñем | than
+бÑла | she was
+Ñам | self
+ÑÑоб | in order to
+без | without
+бÑдÑо | as if
+Ñеловек | man, person, one
+Ñего | genitive form of `what'
+Ñаз | once
+Ñоже | also
+Ñебе | to oneself
+под | beneath
+Ð¶Ð¸Ð·Ð½Ñ | life
+бÑÐ´ÐµÑ | will be
+ж | short form of intensifer particle `же'
+Ñогда | then
+кÑо | who
+ÑÑÐ¾Ñ | this
+говоÑил | was saying
+Ñого | genitive form of `that'
+поÑÐ¾Ð¼Ñ | for that reason
+ÑÑого | genitive form of `this'
+какой | which
+ÑовÑем | altogether
+ним | prepositional form of `его', `они'
+здеÑÑ | here
+ÑÑом | prepositional form of `ÑÑоÑ'
+один | one
+поÑÑи | almost
+мой | my
+Ñем | instrumental/dative plural of `ÑоÑ', `Ñо'
+ÑÑÐ¾Ð±Ñ | full form of `in order that'
+нее | her (acc.)
+кажеÑÑÑ | it seems
+ÑейÑÐ°Ñ | now
+бÑли | they were
+кÑда | where to
+заÑем | why
+ÑказаÑÑ | to say
+вÑеÑ
| all (acc., gen. preposn. plural)
+никогда | never
+ÑÐµÐ³Ð¾Ð´Ð½Ñ | today
+можно | possible, one can
+пÑи | by
+Ð½Ð°ÐºÐ¾Ð½ÐµÑ | finally
+два | two
+об | alternative form of `о', about
+дÑÑгой | another
+Ñ
оÑÑ | even
+поÑле | after
+над | above
+болÑÑе | more
+ÑÐ¾Ñ | that one (masc.)
+ÑеÑез | across, in
+ÑÑи | these
+Ð½Ð°Ñ | us
+пÑо | about
+вÑего | in all, only, of all
+ниÑ
| prepositional form of `они' (they)
+ÐºÐ°ÐºÐ°Ñ | which, feminine
+много | lots
+Ñазве | interrogative particle
+Ñказала | she said
+ÑÑи | three
+ÑÑÑ | this, acc. fem. sing.
+Ð¼Ð¾Ñ | my, feminine
+впÑоÑем | moreover, besides
+Ñ
оÑоÑо | good
+ÑÐ²Ð¾Ñ | ones own, acc. fem. sing.
+ÑÑой | oblique form of `ÑÑа', fem. `this'
+пеÑед | in front of
+иногда | sometimes
+лÑÑÑе | better
+ÑÑÑÑ | a little
+Ñом | preposn. form of `that one'
+нелÑÐ·Ñ | one must not
+Ñакой | such a one
+им | to them
+более | more
+вÑегда | always
+конеÑно | of course
+вÑÑ | acc. fem. sing of `all'
+Ð¼ÐµÐ¶Ð´Ñ | between
+
+
+ | b: some paradigms
+ |
+ | personal pronouns
+ |
+ | Ñ Ð¼ÐµÐ½Ñ Ð¼Ð½Ðµ мной [мноÑ]
+ | ÑÑ ÑÐµÐ±Ñ Ñебе Ñобой [ÑобоÑ]
+ | он его ÐµÐ¼Ñ Ð¸Ð¼ [него, немÑ, ним]
+ | она ее Ñи ÐµÑ [нее, нÑи, неÑ]
+ | оно его ÐµÐ¼Ñ Ð¸Ð¼ [него, немÑ, ним]
+ |
+ | Ð¼Ñ Ð½Ð°Ñ Ð½Ð°Ð¼ нами
+ | Ð²Ñ Ð²Ð°Ñ Ð²Ð°Ð¼ вами
+ | они иÑ
им ими [ниÑ
, ним, ними]
+ |
+ | ÑÐµÐ±Ñ Ñебе Ñобой [ÑобоÑ]
+ |
+ | demonstrative pronouns: ÑÑÐ¾Ñ (this), ÑÐ¾Ñ (that)
+ |
+ | ÑÑÐ¾Ñ ÑÑа ÑÑо ÑÑи
+ | ÑÑого ÑÑÑ ÑÑо ÑÑи
+ | ÑÑого ÑÑой ÑÑого ÑÑиÑ
+ | ÑÑÐ¾Ð¼Ñ ÑÑой ÑÑÐ¾Ð¼Ñ ÑÑим
+ | ÑÑим ÑÑой ÑÑим [ÑÑоÑ] ÑÑими
+ | ÑÑом ÑÑой ÑÑом ÑÑиÑ
+ |
+ | ÑÐ¾Ñ Ñа Ñо Ñе
+ | Ñого ÑÑ Ñо Ñе
+ | Ñого Ñой Ñого ÑеÑ
+ | ÑÐ¾Ð¼Ñ Ñой ÑÐ¾Ð¼Ñ Ñем
+ | Ñем Ñой Ñем [ÑоÑ] Ñеми
+ | Ñом Ñой Ñом ÑеÑ
+ |
+ | determinative pronouns
+ |
+ | (a) веÑÑ (all)
+ |
+ | веÑÑ Ð²ÑÑ Ð²Ñе вÑе
+ | вÑего вÑÑ Ð²Ñе вÑе
+ | вÑего вÑей вÑего вÑеÑ
+ | вÑÐµÐ¼Ñ Ð²Ñей вÑÐµÐ¼Ñ Ð²Ñем
+ | вÑем вÑей вÑем [вÑеÑ] вÑеми
+ | вÑем вÑей вÑем вÑеÑ
+ |
+ | (b) Ñам (himself etc)
+ |
+ | Ñам Ñама Ñамо Ñами
+ | Ñамого ÑÐ°Ð¼Ñ Ñамо ÑамиÑ
+ | Ñамого Ñамой Ñамого ÑамиÑ
+ | ÑÐ°Ð¼Ð¾Ð¼Ñ Ñамой ÑÐ°Ð¼Ð¾Ð¼Ñ Ñамим
+ | Ñамим Ñамой Ñамим [ÑамоÑ] Ñамими
+ | Ñамом Ñамой Ñамом ÑамиÑ
+ |
+ | stems of verbs `to be', `to have', `to do' and modal
+ |
+ | бÑÑÑ Ð±Ñ Ð±Ñд бÑв еÑÑÑ ÑÑÑÑ
+ | име
+ | дел
+ | мог мож моÑÑ
+ | Ñме
+ | Ñ
Ð¾Ñ Ñ
оÑ
+ | долж
+ | можн
+ | нÑжн
+ | нелÑзÑ
+
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_sv.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_sv.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_sv.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_sv.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,133 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/swedish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Swedish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+ | Swedish stop words occasionally exhibit homonym clashes. For example
+ | så = so, but also seed. These are indicated clearly below.
+
+och | and
+det | it, this/that
+att | to (with infinitive)
+i | in, at
+en | a
+jag | I
+hon | she
+som | who, that
+han | he
+på | on
+den | it, this/that
+med | with
+var | where, each
+sig | him(self) etc
+för | for
+så | so (also: seed)
+till | to
+är | is
+men | but
+ett | a
+om | if; around, about
+hade | had
+de | they, these/those
+av | of
+icke | not, no
+mig | me
+du | you
+henne | her
+då | then, when
+sin | his
+nu | now
+har | have
+inte | inte någon = no one
+hans | his
+honom | him
+skulle | 'sake'
+hennes | her
+där | there
+min | my
+man | one (pronoun)
+ej | nor
+vid | at, by, on (also: vast)
+kunde | could
+något | some etc
+från | from, off
+ut | out
+när | when
+efter | after, behind
+upp | up
+vi | we
+dem | them
+vara | be
+vad | what
+över | over
+än | than
+dig | you
+kan | can
+sina | his
+här | here
+ha | have
+mot | towards
+alla | all
+under | under (also: wonder)
+någon | some etc
+eller | or (else)
+allt | all
+mycket | much
+sedan | since
+ju | why
+denna | this/that
+själv | myself, yourself etc
+detta | this/that
+Ã¥t | to
+utan | without
+varit | was
+hur | how
+ingen | no
+mitt | my
+ni | you
+bli | to be, become
+blev | from bli
+oss | us
+din | thy
+dessa | these/those
+några | some etc
+deras | their
+blir | from bli
+mina | my
+samma | (the) same
+vilken | who, that
+er | you, your
+sådan | such a
+vår | our
+blivit | from bli
+dess | its
+inom | within
+mellan | between
+sådant | such a
+varför | why
+varje | each
+vilka | who, that
+ditt | thy
+vem | who
+vilket | who, that
+sitta | his
+sådana | such a
+vart | each
+dina | thy
+vars | whose
+vårt | our
+våra | our
+ert | your
+era | your
+vilkas | whose
+
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_th.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_th.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_th.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_th.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,119 @@
+# Thai stopwords from:
+# "Opinion Detection in Thai Political News Columns
+# Based on Subjectivity Analysis"
+# Khampol Sukhum, Supot Nitsuwat, and Choochart Haruechaiyasak
+à¹à¸§à¹
+à¹à¸¡à¹
+à¹à¸
+à¹à¸à¹
+à¹à¸«à¹
+à¹à¸
+à¹à¸à¸¢
+à¹à¸«à¹à¸
+à¹à¸¥à¹à¸§
+à¹à¸¥à¸°
+à¹à¸£à¸
+à¹à¸à¸
+à¹à¸à¹
+à¹à¸à¸
+à¹à¸«à¹à¸
+à¹à¸¥à¸¢
+à¹à¸£à¸´à¹à¸¡
+à¹à¸£à¸²
+à¹à¸¡à¸·à¹à¸
+à¹à¸à¸·à¹à¸
+à¹à¸à¸£à¸²à¸°
+à¹à¸à¹à¸à¸à¸²à¸£
+à¹à¸à¹à¸
+à¹à¸à¸´à¸à¹à¸à¸¢
+à¹à¸à¸´à¸
+à¹à¸à¸·à¹à¸à¸à¸à¸²à¸
+à¹à¸à¸µà¸¢à¸§à¸à¸±à¸
+à¹à¸à¸µà¸¢à¸§
+à¹à¸à¹à¸
+à¹à¸à¸à¸²à¸°
+à¹à¸à¸¢
+à¹à¸à¹à¸²
+à¹à¸à¸²
+à¸à¸µà¸
+à¸à¸²à¸
+à¸à¸°à¹à¸£
+à¸à¸à¸
+à¸à¸¢à¹à¸²à¸
+à¸à¸¢à¸¹à¹
+à¸à¸¢à¸²à¸
+หาà¸
+หลาย
+หลัà¸à¸à¸²à¸
+หลัà¸
+หรืà¸
+หà¸à¸¶à¹à¸
+สà¹à¸§à¸
+สà¹à¸
+สุà¸
+สà¹à¸²à¸«à¸£à¸±à¸
+วà¹à¸²
+วัà¸
+ลà¸
+รà¹à¸§à¸¡
+ราย
+รัà¸
+ระหวà¹à¸²à¸
+รวม
+ยัà¸
+มี
+มาà¸
+มา
+à¸à¸£à¹à¸à¸¡
+à¸à¸
+à¸à¹à¸²à¸
+à¸à¸¥
+à¸à¸²à¸
+à¸à¹à¸²
+à¸à¸µà¹
+à¸à¹à¸²
+à¸à¸±à¹à¸
+à¸à¸±à¸
+à¸à¸à¸à¸à¸²à¸
+à¸à¸¸à¸
+à¸à¸µà¹à¸ªà¸¸à¸
+à¸à¸µà¹
+à¸à¹à¸²à¹à¸«à¹
+à¸à¹à¸²
+à¸à¸²à¸
+à¸à¸±à¹à¸à¸à¸µà¹
+à¸à¸±à¹à¸
+à¸à¹à¸²
+à¸à¸¹à¸
+à¸à¸¶à¸
+à¸à¹à¸à¸
+à¸à¹à¸²à¸à¹
+à¸à¹à¸²à¸
+à¸à¹à¸
+à¸à¸²à¸¡
+à¸à¸±à¹à¸à¹à¸à¹
+à¸à¸±à¹à¸
+à¸à¹à¸²à¸
+à¸à¹à¸§à¸¢
+à¸à¸±à¸
+à¸à¸¶à¹à¸
+à¸à¹à¸§à¸
+à¸à¸¶à¸
+à¸à¸²à¸
+à¸à¸±à¸
+à¸à¸°
+à¸à¸·à¸
+à¸à¸§à¸²à¸¡
+à¸à¸£à¸±à¹à¸
+à¸à¸
+à¸à¸¶à¹à¸
+à¸à¸à¸
+à¸à¸
+à¸à¸à¸°
+à¸à¹à¸à¸
+à¸à¹
+à¸à¸²à¸£
+à¸à¸±à¸
+à¸à¸±à¸
+à¸à¸§à¹à¸²
+à¸à¸¥à¹à¸²à¸§
Added: lucene/dev/trunk/solr/example/files/conf/lang/stopwords_tr.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/stopwords_tr.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/stopwords_tr.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/stopwords_tr.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,212 @@
+# Turkish stopwords from LUCENE-559
+# merged with the list from "Information Retrieval on Turkish Texts"
+# (http://www.users.muohio.edu/canf/papers/JASIST2008offPrint.pdf)
+acaba
+altmıÅ
+altı
+ama
+ancak
+arada
+aslında
+ayrıca
+bana
+bazı
+belki
+ben
+benden
+beni
+benim
+beri
+beÅ
+bile
+bin
+bir
+birçok
+biri
+birkaç
+birkez
+birÅey
+birÅeyi
+biz
+bize
+bizden
+bizi
+bizim
+böyle
+böylece
+bu
+buna
+bunda
+bundan
+bunlar
+bunları
+bunların
+bunu
+bunun
+burada
+çok
+çünkü
+da
+daha
+dahi
+de
+defa
+deÄil
+diÄer
+diye
+doksan
+dokuz
+dolayı
+dolayısıyla
+dört
+edecek
+eden
+ederek
+edilecek
+ediliyor
+edilmesi
+ediyor
+eÄer
+elli
+en
+etmesi
+etti
+ettiÄi
+ettiÄini
+gibi
+göre
+halen
+hangi
+hatta
+hem
+henüz
+hep
+hepsi
+her
+herhangi
+herkesin
+hiç
+hiçbir
+için
+iki
+ile
+ilgili
+ise
+iÅte
+itibaren
+itibariyle
+kadar
+karÅın
+katrilyon
+kendi
+kendilerine
+kendini
+kendisi
+kendisine
+kendisini
+kez
+ki
+kim
+kimden
+kime
+kimi
+kimse
+kırk
+milyar
+milyon
+mu
+mü
+mı
+nasıl
+ne
+neden
+nedenle
+nerde
+nerede
+nereye
+niye
+niçin
+o
+olan
+olarak
+oldu
+olduÄu
+olduÄunu
+olduklarını
+olmadı
+olmadıÄı
+olmak
+olması
+olmayan
+olmaz
+olsa
+olsun
+olup
+olur
+olursa
+oluyor
+on
+ona
+ondan
+onlar
+onlardan
+onları
+onların
+onu
+onun
+otuz
+oysa
+öyle
+pek
+raÄmen
+sadece
+sanki
+sekiz
+seksen
+sen
+senden
+seni
+senin
+siz
+sizden
+sizi
+sizin
+Åey
+Åeyden
+Åeyi
+Åeyler
+Åöyle
+Åu
+Åuna
+Åunda
+Åundan
+Åunları
+Åunu
+tarafından
+trilyon
+tüm
+üç
+üzere
+var
+vardı
+ve
+veya
+ya
+yani
+yapacak
+yapılan
+yapılması
+yapıyor
+yapmak
+yaptı
+yaptıÄı
+yaptıÄını
+yaptıkları
+yedi
+yerine
+yetmiÅ
+yine
+yirmi
+yoksa
+yüz
+zaten
Added: lucene/dev/trunk/solr/example/files/conf/lang/userdict_ja.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/files/conf/lang/userdict_ja.txt?rev=1680973&view=auto
==============================================================================
--- lucene/dev/trunk/solr/example/files/conf/lang/userdict_ja.txt (added)
+++ lucene/dev/trunk/solr/example/files/conf/lang/userdict_ja.txt Fri May 22 01:23:27 2015
@@ -0,0 +1,29 @@
+#
+# This is a sample user dictionary for Kuromoji (JapaneseTokenizer)
+#
+# Add entries to this file in order to override the statistical model in terms
+# of segmentation, readings and part-of-speech tags. Notice that entries do
+# not have weights since they are always used when found. This is by-design
+# in order to maximize ease-of-use.
+#
+# Entries are defined using the following CSV format:
+# <text>,<token 1> ... <token n>,<reading 1> ... <reading n>,<part-of-speech tag>
+#
+# Notice that a single half-width space separates tokens and readings, and
+# that the number tokens and readings must match exactly.
+#
+# Also notice that multiple entries with the same <text> is undefined.
+#
+# Whitespace only lines are ignored. Comments are not allowed on entry lines.
+#
+
+# Custom segmentation for kanji compounds
+æ¥æ¬çµæ¸æ°è,æ¥æ¬ çµæ¸ æ°è,ããã³ ã±ã¤ã¶ã¤ ã·ã³ãã³,ã«ã¹ã¿ã åè©
+é¢è¥¿å½é空港,é¢è¥¿ å½é 空港,ã«ã³ãµã¤ ã³ã¯ãµã¤ ã¯ã¦ã³ã¦,ã«ã¹ã¿ã åè©
+
+# Custom segmentation for compound katakana
+ãã¼ãããã°,ãã¼ã ããã°,ãã¼ã ããã°,ããã«ãåè©
+ã·ã§ã«ãã¼ããã°,ã·ã§ã«ãã¼ ããã°,ã·ã§ã«ãã¼ ããã°,ããã«ãåè©
+
+# Custom reading for former sumo wrestler
+æéé¾,æéé¾,ã¢ãµã·ã§ã¦ãªã¥ã¦,ã«ã¹ã¿ã 人å