You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@chukwa.apache.org by ey...@apache.org on 2014/07/31 06:05:02 UTC
svn commit: r1614808 [1/7] - in /chukwa/trunk: ./ conf/ contrib/solr/
contrib/solr/logs/ contrib/solr/logs/conf/ contrib/solr/logs/conf/clustering/
contrib/solr/logs/conf/clustering/carrot2/ contrib/solr/logs/conf/lang/
contrib/solr/logs/conf/velocity/...
Author: eyang
Date: Thu Jul 31 04:04:59 2014
New Revision: 1614808
URL: http://svn.apache.org/r1614808
Log:
CHUKWA-722. Added SolrWriter to stream data to SolrCloud. (Eric Yang)
Added:
chukwa/trunk/contrib/solr/
chukwa/trunk/contrib/solr/logs/
chukwa/trunk/contrib/solr/logs/README.txt
chukwa/trunk/contrib/solr/logs/conf/
chukwa/trunk/contrib/solr/logs/conf/_schema_analysis_stopwords_english.json
chukwa/trunk/contrib/solr/logs/conf/_schema_analysis_synonyms_english.json
chukwa/trunk/contrib/solr/logs/conf/admin-extra.html
chukwa/trunk/contrib/solr/logs/conf/admin-extra.menu-bottom.html
chukwa/trunk/contrib/solr/logs/conf/admin-extra.menu-top.html
chukwa/trunk/contrib/solr/logs/conf/clustering/
chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/
chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/kmeans-attributes.xml
chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/lingo-attributes.xml
chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/stc-attributes.xml
chukwa/trunk/contrib/solr/logs/conf/currency.xml
chukwa/trunk/contrib/solr/logs/conf/elevate.xml
chukwa/trunk/contrib/solr/logs/conf/lang/
chukwa/trunk/contrib/solr/logs/conf/lang/contractions_ca.txt
chukwa/trunk/contrib/solr/logs/conf/lang/contractions_fr.txt
chukwa/trunk/contrib/solr/logs/conf/lang/contractions_ga.txt
chukwa/trunk/contrib/solr/logs/conf/lang/contractions_it.txt
chukwa/trunk/contrib/solr/logs/conf/lang/hyphenations_ga.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stemdict_nl.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stoptags_ja.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ar.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_bg.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ca.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ckb.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_cz.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_da.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_de.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_el.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_en.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_es.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_eu.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_fa.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_fi.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_fr.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ga.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_gl.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_hi.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_hu.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_hy.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_id.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_it.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ja.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_lv.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_nl.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_no.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_pt.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ro.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ru.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_sv.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_th.txt
chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_tr.txt
chukwa/trunk/contrib/solr/logs/conf/lang/userdict_ja.txt
chukwa/trunk/contrib/solr/logs/conf/mapping-FoldToASCII.txt
chukwa/trunk/contrib/solr/logs/conf/mapping-ISOLatin1Accent.txt
chukwa/trunk/contrib/solr/logs/conf/protwords.txt
chukwa/trunk/contrib/solr/logs/conf/schema.xml
chukwa/trunk/contrib/solr/logs/conf/scripts.conf
chukwa/trunk/contrib/solr/logs/conf/solrconfig.xml
chukwa/trunk/contrib/solr/logs/conf/spellings.txt
chukwa/trunk/contrib/solr/logs/conf/stopwords.txt
chukwa/trunk/contrib/solr/logs/conf/synonyms.txt
chukwa/trunk/contrib/solr/logs/conf/update-script.js
chukwa/trunk/contrib/solr/logs/conf/velocity/
chukwa/trunk/contrib/solr/logs/conf/velocity/README.txt
chukwa/trunk/contrib/solr/logs/conf/velocity/VM_global_library.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/browse.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/cluster.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/cluster_results.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/debug.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/did_you_mean.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/error.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/facet_fields.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/facet_pivot.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/facet_queries.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/facet_ranges.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/facets.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/footer.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/head.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/header.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/hit.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/hit_grouped.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/hit_plain.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/join_doc.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/jquery.autocomplete.css
chukwa/trunk/contrib/solr/logs/conf/velocity/jquery.autocomplete.js
chukwa/trunk/contrib/solr/logs/conf/velocity/layout.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/main.css
chukwa/trunk/contrib/solr/logs/conf/velocity/mime_type_lists.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/pagination_bottom.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/pagination_top.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/product_doc.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/query.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/query_form.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/query_group.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/query_spatial.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/results_list.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/richtext_doc.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/suggest.vm
chukwa/trunk/contrib/solr/logs/conf/velocity/tabs.vm
chukwa/trunk/contrib/solr/logs/conf/xslt/
chukwa/trunk/contrib/solr/logs/conf/xslt/example.xsl
chukwa/trunk/contrib/solr/logs/conf/xslt/example_atom.xsl
chukwa/trunk/contrib/solr/logs/conf/xslt/example_rss.xsl
chukwa/trunk/contrib/solr/logs/conf/xslt/luke.xsl
chukwa/trunk/contrib/solr/logs/conf/xslt/updateXml.xsl
chukwa/trunk/contrib/solr/logs/core.properties
chukwa/trunk/src/main/java/org/apache/hadoop/chukwa/datacollection/writer/solr/
chukwa/trunk/src/main/java/org/apache/hadoop/chukwa/datacollection/writer/solr/SolrWriter.java
chukwa/trunk/src/test/java/org/apache/hadoop/chukwa/datacollection/writer/solr/
chukwa/trunk/src/test/java/org/apache/hadoop/chukwa/datacollection/writer/solr/TestSolrWriter.java
Modified:
chukwa/trunk/CHANGES.txt
chukwa/trunk/conf/chukwa-agent-conf.xml
chukwa/trunk/pom.xml
chukwa/trunk/src/packages/tarball/all.xml
Modified: chukwa/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/CHANGES.txt?rev=1614808&r1=1614807&r2=1614808&view=diff
==============================================================================
--- chukwa/trunk/CHANGES.txt (original)
+++ chukwa/trunk/CHANGES.txt Thu Jul 31 04:04:59 2014
@@ -12,6 +12,8 @@ Release 0.6 - Unreleased
NEW FEATURES
+ CHUKWA-722. Added SolrWriter to stream data to SolrCloud. (Eric Yang)
+
CHUKWA-719. Added Kerberos support for HBaseWriter. (Sreepathi Prasanna via Eric Yang)
CHUKWA-715. Added Oozie Adaptor for collecting Oozie metrics. (Sreepathi Prasanna via Eric Yang)
Modified: chukwa/trunk/conf/chukwa-agent-conf.xml
URL: http://svn.apache.org/viewvc/chukwa/trunk/conf/chukwa-agent-conf.xml?rev=1614808&r1=1614807&r2=1614808&view=diff
==============================================================================
--- chukwa/trunk/conf/chukwa-agent-conf.xml (original)
+++ chukwa/trunk/conf/chukwa-agent-conf.xml Thu Jul 31 04:04:59 2014
@@ -97,4 +97,13 @@
<value>HADOOP</value>
</property>
+ <property>
+ <name>solr.cloud.address</name>
+ <value>localhost:2181</value>
+ </property>
+
+ <property>
+ <name>solr.collection</name>
+ <value>logs</value>
+ </property>
</configuration>
Added: chukwa/trunk/contrib/solr/logs/README.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/README.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/README.txt (added)
+++ chukwa/trunk/contrib/solr/logs/README.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,82 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+Chukwa SolrCore Instance Directory
+=============================
+
+This directory is provided as an example of what an "Instance Directory"
+should look like for Chukwa SolrCore
+
+Basic Directory Structure
+-------------------------
+
+The Solr Home directory typically contains the following sub-directories...
+
+ conf/
+ This directory is mandatory and must contain your solrconfig.xml
+ and schema.xml. Any other optional configuration files would also
+ be kept here.
+
+ data/
+ This directory is the default location where Solr will keep your
+ index, and is used by the replication scripts for dealing with
+ snapshots. You can override this location in the
+ conf/solrconfig.xml. Solr will create this directory if it does not
+ already exist.
+
+ lib/
+ This directory is optional. If it exists, Solr will load any Jars
+ found in this directory and use them to resolve any "plugins"
+ specified in your solrconfig.xml or schema.xml (ie: Analyzers,
+ Request Handlers, etc...). Alternatively you can use the <lib>
+ syntax in conf/solrconfig.xml to direct Solr to your plugins. See
+ the example conf/solrconfig.xml file for details.
+
+Usage
+-----
+
+- Symlink this directory to solr-4.9.0/examples/solr/logs.
+- Start solr cloud with:
+
+ java -Dbootstrap_confdir=chukwa-0.6.0/etc/solr/logs/conf \
+ -Dcollection.configName=myconf -Djetty.port=7574 \
+ -DzkHost=localhost:2181 -jar start.jar
+
+- Configure chukwa-agent-conf.xml with pipeline that includes SolrWriter.
+
+ <property>
+ <name>chukwa.pipeline</name>
+ <value>org.apache.hadoop.chukwa.datacollection.writer.solr.SolrWriter</value>
+ <description>Configure agent to write to solr</description>
+ </property>
+
+ <property>
+ <name>solr.cloud.address</name>
+ <value>localhost:2181</value>
+ <description>Solr cloud zookeeper address</description>
+ </property>
+
+ <property>
+ <name>solr.collection</name>
+ <value>logs</value>
+ <description>SolrCore Instance name</description>
+ </property>
+
+- Restart Chukwa Agent and point browser to:
+
+ http://localhost:7574/solr/logs/select?q=*:*&wt=json&indent=true
+
+This REST API will display all collected log entries.
Added: chukwa/trunk/contrib/solr/logs/conf/_schema_analysis_stopwords_english.json
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/_schema_analysis_stopwords_english.json?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/_schema_analysis_stopwords_english.json (added)
+++ chukwa/trunk/contrib/solr/logs/conf/_schema_analysis_stopwords_english.json Thu Jul 31 04:04:59 2014
@@ -0,0 +1,38 @@
+{
+ "initArgs":{"ignoreCase":true},
+ "managedList":[
+ "a",
+ "an",
+ "and",
+ "are",
+ "as",
+ "at",
+ "be",
+ "but",
+ "by",
+ "for",
+ "if",
+ "in",
+ "into",
+ "is",
+ "it",
+ "no",
+ "not",
+ "of",
+ "on",
+ "or",
+ "stopworda",
+ "stopwordb",
+ "such",
+ "that",
+ "the",
+ "their",
+ "then",
+ "there",
+ "these",
+ "they",
+ "this",
+ "to",
+ "was",
+ "will",
+ "with"]}
Added: chukwa/trunk/contrib/solr/logs/conf/_schema_analysis_synonyms_english.json
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/_schema_analysis_synonyms_english.json?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/_schema_analysis_synonyms_english.json (added)
+++ chukwa/trunk/contrib/solr/logs/conf/_schema_analysis_synonyms_english.json Thu Jul 31 04:04:59 2014
@@ -0,0 +1,11 @@
+{
+ "initArgs":{
+ "ignoreCase":true,
+ "format":"solr"
+ },
+ "managedMap":{
+ "GB":["GiB","Gigabyte"],
+ "happy":["glad","joyful"],
+ "TV":["Television"]
+ }
+}
Added: chukwa/trunk/contrib/solr/logs/conf/admin-extra.html
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/admin-extra.html?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/admin-extra.html (added)
+++ chukwa/trunk/contrib/solr/logs/conf/admin-extra.html Thu Jul 31 04:04:59 2014
@@ -0,0 +1,24 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- The content of this page will be statically included into the top-
+right box of the cores overview page. Uncomment this as an example to
+see there the content will show up.
+
+<img src="img/ico/construction.png"> This line will appear at the top-
+right box on collection1's Overview
+-->
Added: chukwa/trunk/contrib/solr/logs/conf/admin-extra.menu-bottom.html
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/admin-extra.menu-bottom.html?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/admin-extra.menu-bottom.html (added)
+++ chukwa/trunk/contrib/solr/logs/conf/admin-extra.menu-bottom.html Thu Jul 31 04:04:59 2014
@@ -0,0 +1,25 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- admin-extra.menu-bottom.html -->
+<!--
+<li>
+ <a href="#" style="background-image: url(img/ico/construction.png);">
+ LAST ITEM
+ </a>
+</li>
+-->
Added: chukwa/trunk/contrib/solr/logs/conf/admin-extra.menu-top.html
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/admin-extra.menu-top.html?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/admin-extra.menu-top.html (added)
+++ chukwa/trunk/contrib/solr/logs/conf/admin-extra.menu-top.html Thu Jul 31 04:04:59 2014
@@ -0,0 +1,25 @@
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- admin-extra.menu-top.html -->
+<!--
+<li>
+ <a href="#" style="background-image: url(img/ico/construction.png);">
+ FIRST ITEM
+ </a>
+</li>
+-->
Added: chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/kmeans-attributes.xml
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/kmeans-attributes.xml?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/kmeans-attributes.xml (added)
+++ chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/kmeans-attributes.xml Thu Jul 31 04:04:59 2014
@@ -0,0 +1,19 @@
+<!--
+ Default configuration for the bisecting k-means clustering algorithm.
+
+ This file can be loaded (and saved) by Carrot2 Workbench.
+ http://project.carrot2.org/download.html
+-->
+<attribute-sets default="attributes">
+ <attribute-set id="attributes">
+ <value-set>
+ <label>attributes</label>
+ <attribute key="MultilingualClustering.defaultLanguage">
+ <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
+ </attribute>
+ <attribute key="MultilingualClustering.languageAggregationStrategy">
+ <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
+ </attribute>
+ </value-set>
+ </attribute-set>
+</attribute-sets>
Added: chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/lingo-attributes.xml
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/lingo-attributes.xml?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/lingo-attributes.xml (added)
+++ chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/lingo-attributes.xml Thu Jul 31 04:04:59 2014
@@ -0,0 +1,24 @@
+<!--
+ Default configuration for the Lingo clustering algorithm.
+
+ This file can be loaded (and saved) by Carrot2 Workbench.
+ http://project.carrot2.org/download.html
+-->
+<attribute-sets default="attributes">
+ <attribute-set id="attributes">
+ <value-set>
+ <label>attributes</label>
+ <!--
+ The language to assume for clustered documents.
+ For a list of allowed values, see:
+ http://download.carrot2.org/stable/manual/#section.attribute.lingo.MultilingualClustering.defaultLanguage
+ -->
+ <attribute key="MultilingualClustering.defaultLanguage">
+ <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
+ </attribute>
+ <attribute key="LingoClusteringAlgorithm.desiredClusterCountBase">
+ <value type="java.lang.Integer" value="20"/>
+ </attribute>
+ </value-set>
+ </attribute-set>
+</attribute-sets>
\ No newline at end of file
Added: chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/stc-attributes.xml
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/stc-attributes.xml?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/stc-attributes.xml (added)
+++ chukwa/trunk/contrib/solr/logs/conf/clustering/carrot2/stc-attributes.xml Thu Jul 31 04:04:59 2014
@@ -0,0 +1,19 @@
+<!--
+ Default configuration for the STC clustering algorithm.
+
+ This file can be loaded (and saved) by Carrot2 Workbench.
+ http://project.carrot2.org/download.html
+-->
+<attribute-sets default="attributes">
+ <attribute-set id="attributes">
+ <value-set>
+ <label>attributes</label>
+ <attribute key="MultilingualClustering.defaultLanguage">
+ <value type="org.carrot2.core.LanguageCode" value="ENGLISH"/>
+ </attribute>
+ <attribute key="MultilingualClustering.languageAggregationStrategy">
+ <value type="org.carrot2.text.clustering.MultilingualClustering$LanguageAggregationStrategy" value="FLATTEN_MAJOR_LANGUAGE"/>
+ </attribute>
+ </value-set>
+ </attribute-set>
+</attribute-sets>
Added: chukwa/trunk/contrib/solr/logs/conf/currency.xml
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/currency.xml?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/currency.xml (added)
+++ chukwa/trunk/contrib/solr/logs/conf/currency.xml Thu Jul 31 04:04:59 2014
@@ -0,0 +1,67 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- Example exchange rates file for CurrencyField type named "currency" in example schema -->
+
+<currencyConfig version="1.0">
+ <rates>
+ <!-- Updated from http://www.exchangerate.com/ at 2011-09-27 -->
+ <rate from="USD" to="ARS" rate="4.333871" comment="ARGENTINA Peso" />
+ <rate from="USD" to="AUD" rate="1.025768" comment="AUSTRALIA Dollar" />
+ <rate from="USD" to="EUR" rate="0.743676" comment="European Euro" />
+ <rate from="USD" to="BRL" rate="1.881093" comment="BRAZIL Real" />
+ <rate from="USD" to="CAD" rate="1.030815" comment="CANADA Dollar" />
+ <rate from="USD" to="CLP" rate="519.0996" comment="CHILE Peso" />
+ <rate from="USD" to="CNY" rate="6.387310" comment="CHINA Yuan" />
+ <rate from="USD" to="CZK" rate="18.47134" comment="CZECH REP. Koruna" />
+ <rate from="USD" to="DKK" rate="5.515436" comment="DENMARK Krone" />
+ <rate from="USD" to="HKD" rate="7.801922" comment="HONG KONG Dollar" />
+ <rate from="USD" to="HUF" rate="215.6169" comment="HUNGARY Forint" />
+ <rate from="USD" to="ISK" rate="118.1280" comment="ICELAND Krona" />
+ <rate from="USD" to="INR" rate="49.49088" comment="INDIA Rupee" />
+ <rate from="USD" to="XDR" rate="0.641358" comment="INTNL MON. FUND SDR" />
+ <rate from="USD" to="ILS" rate="3.709739" comment="ISRAEL Sheqel" />
+ <rate from="USD" to="JPY" rate="76.32419" comment="JAPAN Yen" />
+ <rate from="USD" to="KRW" rate="1169.173" comment="KOREA (SOUTH) Won" />
+ <rate from="USD" to="KWD" rate="0.275142" comment="KUWAIT Dinar" />
+ <rate from="USD" to="MXN" rate="13.85895" comment="MEXICO Peso" />
+ <rate from="USD" to="NZD" rate="1.285159" comment="NEW ZEALAND Dollar" />
+ <rate from="USD" to="NOK" rate="5.859035" comment="NORWAY Krone" />
+ <rate from="USD" to="PKR" rate="87.57007" comment="PAKISTAN Rupee" />
+ <rate from="USD" to="PEN" rate="2.730683" comment="PERU Sol" />
+ <rate from="USD" to="PHP" rate="43.62039" comment="PHILIPPINES Peso" />
+ <rate from="USD" to="PLN" rate="3.310139" comment="POLAND Zloty" />
+ <rate from="USD" to="RON" rate="3.100932" comment="ROMANIA Leu" />
+ <rate from="USD" to="RUB" rate="32.14663" comment="RUSSIA Ruble" />
+ <rate from="USD" to="SAR" rate="3.750465" comment="SAUDI ARABIA Riyal" />
+ <rate from="USD" to="SGD" rate="1.299352" comment="SINGAPORE Dollar" />
+ <rate from="USD" to="ZAR" rate="8.329761" comment="SOUTH AFRICA Rand" />
+ <rate from="USD" to="SEK" rate="6.883442" comment="SWEDEN Krona" />
+ <rate from="USD" to="CHF" rate="0.906035" comment="SWITZERLAND Franc" />
+ <rate from="USD" to="TWD" rate="30.40283" comment="TAIWAN Dollar" />
+ <rate from="USD" to="THB" rate="30.89487" comment="THAILAND Baht" />
+ <rate from="USD" to="AED" rate="3.672955" comment="U.A.E. Dirham" />
+ <rate from="USD" to="UAH" rate="7.988582" comment="UKRAINE Hryvnia" />
+ <rate from="USD" to="GBP" rate="0.647910" comment="UNITED KINGDOM Pound" />
+
+ <!-- Cross-rates for some common currencies -->
+ <rate from="EUR" to="GBP" rate="0.869914" />
+ <rate from="EUR" to="NOK" rate="7.800095" />
+ <rate from="GBP" to="NOK" rate="8.966508" />
+ </rates>
+</currencyConfig>
Added: chukwa/trunk/contrib/solr/logs/conf/elevate.xml
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/elevate.xml?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/elevate.xml (added)
+++ chukwa/trunk/contrib/solr/logs/conf/elevate.xml Thu Jul 31 04:04:59 2014
@@ -0,0 +1,38 @@
+<?xml version="1.0" encoding="UTF-8" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- If this file is found in the config directory, it will only be
+ loaded once at startup. If it is found in Solr's data
+ directory, it will be re-loaded every commit.
+
+ See http://wiki.apache.org/solr/QueryElevationComponent for more info
+
+-->
+<elevate>
+ <query text="foo bar">
+ <doc id="1" />
+ <doc id="2" />
+ <doc id="3" />
+ </query>
+
+ <query text="ipod">
+ <doc id="MA147LL/A" /> <!-- put the actual ipod at the top -->
+ <doc id="IW-02" exclude="true" /> <!-- exclude this cable -->
+ </query>
+
+</elevate>
Added: chukwa/trunk/contrib/solr/logs/conf/lang/contractions_ca.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/contractions_ca.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/contractions_ca.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/contractions_ca.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,8 @@
+# Set of Catalan contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+l
+m
+n
+s
+t
Added: chukwa/trunk/contrib/solr/logs/conf/lang/contractions_fr.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/contractions_fr.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/contractions_fr.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/contractions_fr.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,15 @@
+# Set of French contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+l
+m
+t
+qu
+n
+s
+j
+d
+c
+jusqu
+quoiqu
+lorsqu
+puisqu
Added: chukwa/trunk/contrib/solr/logs/conf/lang/contractions_ga.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/contractions_ga.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/contractions_ga.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/contractions_ga.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,5 @@
+# Set of Irish contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+d
+m
+b
Added: chukwa/trunk/contrib/solr/logs/conf/lang/contractions_it.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/contractions_it.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/contractions_it.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/contractions_it.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,23 @@
+# Set of Italian contractions for ElisionFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+c
+l
+all
+dall
+dell
+nell
+sull
+coll
+pell
+gl
+agl
+dagl
+degl
+negl
+sugl
+un
+m
+t
+s
+v
+d
Added: chukwa/trunk/contrib/solr/logs/conf/lang/hyphenations_ga.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/hyphenations_ga.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/hyphenations_ga.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/hyphenations_ga.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,5 @@
+# Set of Irish hyphenations for StopFilter
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+h
+n
+t
Added: chukwa/trunk/contrib/solr/logs/conf/lang/stemdict_nl.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/stemdict_nl.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/stemdict_nl.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/stemdict_nl.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,6 @@
+# Set of overrides for the dutch stemmer
+# TODO: load this as a resource from the analyzer and sync it in build.xml
+fiets fiets
+bromfiets bromfiets
+ei eier
+kind kinder
Added: chukwa/trunk/contrib/solr/logs/conf/lang/stoptags_ja.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/stoptags_ja.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/stoptags_ja.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/stoptags_ja.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,420 @@
+#
+# This file defines a Japanese stoptag set for JapanesePartOfSpeechStopFilter.
+#
+# Any token with a part-of-speech tag that exactly matches those defined in this
+# file are removed from the token stream.
+#
+# Set your own stoptags by uncommenting the lines below. Note that comments are
+# not allowed on the same line as a stoptag. See LUCENE-3745 for frequency lists,
+# etc. that can be useful for building you own stoptag set.
+#
+# The entire possible tagset is provided below for convenience.
+#
+#####
+# noun: unclassified nouns
+#åè©
+#
+# noun-common: Common nouns or nouns where the sub-classification is undefined
+#åè©-ä¸è¬
+#
+# noun-proper: Proper nouns where the sub-classification is undefined
+#åè©-åºæåè©
+#
+# noun-proper-misc: miscellaneous proper nouns
+#åè©-åºæåè©-ä¸è¬
+#
+# noun-proper-person: Personal names where the sub-classification is undefined
+#åè©-åºæåè©-人å
+#
+# noun-proper-person-misc: names that cannot be divided into surname and
+# given name; foreign names; names where the surname or given name is unknown.
+# e.g. ãå¸ã®æ¹
+#åè©-åºæåè©-人å-ä¸è¬
+#
+# noun-proper-person-surname: Mainly Japanese surnames.
+# e.g. å±±ç°
+#åè©-åºæåè©-人å-å§
+#
+# noun-proper-person-given_name: Mainly Japanese given names.
+# e.g. 太é
+#åè©-åºæåè©-人å-å
+#
+# noun-proper-organization: Names representing organizations.
+# e.g. éç£ç, NHK
+#åè©-åºæåè©-çµç¹
+#
+# noun-proper-place: Place names where the sub-classification is undefined
+#åè©-åºæåè©-å°å
+#
+# noun-proper-place-misc: Place names excluding countries.
+# e.g. ã¢ã¸ã¢, ãã«ã»ãã, 京é½
+#åè©-åºæåè©-å°å-ä¸è¬
+#
+# noun-proper-place-country: Country names.
+# e.g. æ¥æ¬, ãªã¼ã¹ãã©ãªã¢
+#åè©-åºæåè©-å°å-å½
+#
+# noun-pronoun: Pronouns where the sub-classification is undefined
+#åè©-代åè©
+#
+# noun-pronoun-misc: miscellaneous pronouns:
+# e.g. ãã, ãã, ããã¤, ããªã, ãã¡ãã¡, ããã¤, ã©ãã, ãªã«, ã¿ãªãã, ã¿ããª, ãããã, ãããã
+#åè©-代åè©-ä¸è¬
+#
+# noun-pronoun-contraction: Spoken language contraction made by combining a
+# pronoun and the particle 'wa'.
+# e.g. ããã, ããã, ãããã, ããã, ãããã
+#åè©-代åè©-縮ç´
+#
+# noun-adverbial: Temporal nouns such as names of days or months that behave
+# like adverbs. Nouns that represent amount or ratios and can be used adverbially,
+# e.g. éæ, ä¸æ, åå¾, å°é
+#åè©-å¯è©å¯è½
+#
+# noun-verbal: Nouns that take arguments with case and can appear followed by
+# 'suru' and related verbs (ãã, ã§ãã, ãªãã, ãã ãã)
+# e.g. ã¤ã³ããã, æç, æªå, æªæ¦è¦é, ä¸å®å¿, ä¸åã
+#åè©-ãµå¤æ¥ç¶
+#
+# noun-adjective-base: The base form of adjectives, words that appear before 㪠("na")
+# e.g. å¥åº·, å®æ, é§ç®, ã ã
+#åè©-形容åè©èªå¹¹
+#
+# noun-numeric: Arabic numbers, Chinese numerals, and counters like ä½ (å), æ°.
+# e.g. 0, 1, 2, ä½, æ°, å¹¾
+#åè©-æ°
+#
+# noun-affix: noun affixes where the sub-classification is undefined
+#åè©-éèªç«
+#
+# noun-affix-misc: Of adnominalizers, the case-marker ã® ("no"), and words that
+# attach to the base form of inflectional words, words that cannot be classified
+# into any of the other categories below. This category includes indefinite nouns.
+# e.g. ããã¤ã, æ, ãã, ç²æ, æ°, ããã, å«ã, ãã, ç, ãã¨, äº, ãã¨, æ¯, ãã ã, 次第,
+# é , ãã, æçº, ã¤ãã§, åºã§, ã¤ãã, ç©ãã, ç¹, ã©ãã, ã®, ã¯ã, ç, ã¯ãã¿, å¼¾ã¿,
+# æå, ãµã, ãµã, æ¯ã, ã»ã, æ¹, æ¨, ãã®, ç©, è
, ãã, æ
, ããã, æ以, ãã, 訳,
+# ãã, å²ã, å², ã-å£èª/, ãã-å£èª/
+#åè©-éèªç«-ä¸è¬
+#
+# noun-affix-adverbial: noun affixes that that can behave as adverbs.
+# e.g. ããã , é, ããã, æãå¥, ãã¨, å¾, ä½ã, 以å¤, 以é, 以å¾, 以ä¸, 以å, ä¸æ¹, ãã,
+# ä¸, ãã¡, å
, ãã, æã, ããã, éã, ãã, ã£ãã, çµæ, ãã, é , ãã, é, æä¸, ããªã,
+# æä¸, ããã, èªä½, ãã³, 度, ãã, çº, ã¤ã©, é½åº¦, ã¨ãã, éã, ã¨ã, æ, ã¨ãã, æ,
+# ã¨ãã, é端, ãªã, ä¸, ã®ã¡, å¾, ã°ãã, å ´å, æ¥, ã¶ã, å, ã»ã, ä», ã¾ã, å, ã¾ã¾,
+# å, ä¾, ã¿ãã, ç¢å
+#åè©-éèªç«-å¯è©å¯è½
+#
+# noun-affix-aux: noun affixes treated as å©åè© ("auxiliary verb") in school grammars
+# with the stem ãã(ã ) ("you(da)").
+# e.g. ãã, ãã, æ§ (ãã)
+#åè©-éèªç«-å©åè©èªå¹¹
+#
+# noun-affix-adjective-base: noun affixes that can connect to the indeclinable
+# connection form 㪠(aux "da").
+# e.g. ã¿ãã, ãµã
+#åè©-éèªç«-形容åè©èªå¹¹
+#
+# noun-special: special nouns where the sub-classification is undefined.
+#åè©-ç¹æ®
+#
+# noun-special-aux: The ããã ("souda") stem form that is used for reporting news, is
+# treated as å©åè© ("auxiliary verb") in school grammars, and attach to the base
+# form of inflectional words.
+# e.g. ãã
+#åè©-ç¹æ®-å©åè©èªå¹¹
+#
+# noun-suffix: noun suffixes where the sub-classification is undefined.
+#åè©-æ¥å°¾
+#
+# noun-suffix-misc: Of the nouns or stem forms of other parts of speech that connect
+# to ã¬ã« or ã¿ã¤ and can combine into compound nouns, words that cannot be classified into
+# any of the other categories below. In general, this category is more inclusive than
+# æ¥å°¾èª ("suffix") and is usually the last element in a compound noun.
+# e.g. ãã, ãã, æ¹, ç²æ (ãã), ããã, ãã¿, æ°å³, ããã¿, (ï½ãã) ã, 次第, æ¸ (ã) ã¿,
+# ãã, (ã§ã)ã£ã, æ, 観, æ§, å¦, é¡, é¢, ç¨
+#åè©-æ¥å°¾-ä¸è¬
+#
+# noun-suffix-person: Suffixes that form nouns and attach to person names more often
+# than other nouns.
+# e.g. å, æ§, è
+#åè©-æ¥å°¾-人å
+#
+# noun-suffix-place: Suffixes that form nouns and attach to place names more often
+# than other nouns.
+# e.g. çº, å¸, ç
+#åè©-æ¥å°¾-å°å
+#
+# noun-suffix-verbal: Of the suffixes that attach to nouns and form nouns, those that
+# can appear before ã¹ã« ("suru").
+# e.g. å, è¦, åã, å
¥ã, è½ã¡, è²·ã
+#åè©-æ¥å°¾-ãµå¤æ¥ç¶
+#
+# noun-suffix-aux: The stem form of ããã (æ§æ
) that is used to indicate conditions,
+# is treated as å©åè© ("auxiliary verb") in school grammars, and attach to the
+# conjunctive form of inflectional words.
+# e.g. ãã
+#åè©-æ¥å°¾-å©åè©èªå¹¹
+#
+# noun-suffix-adjective-base: Suffixes that attach to other nouns or the conjunctive
+# form of inflectional words and appear before the copula ã ("da").
+# e.g. ç, ã, ãã¡
+#åè©-æ¥å°¾-形容åè©èªå¹¹
+#
+# noun-suffix-adverbial: Suffixes that attach to other nouns and can behave as adverbs.
+# e.g. å¾ (ã), 以å¾, 以é, 以å, åå¾, ä¸, æ«, ä¸, æ (ã)
+#åè©-æ¥å°¾-å¯è©å¯è½
+#
+# noun-suffix-classifier: Suffixes that attach to numbers and form nouns. This category
+# is more inclusive than å©æ°è© ("classifier") and includes common nouns that attach
+# to numbers.
+# e.g. å, ã¤, æ¬, å, ãã¼ã»ã³ã, cm, kg, ã«æ, ãå½, åºç», æé, æå
+#åè©-æ¥å°¾-å©æ°è©
+#
+# noun-suffix-special: Special suffixes that mainly attach to inflecting words.
+# e.g. (楽ã) ã, (èã) æ¹
+#åè©-æ¥å°¾-ç¹æ®
+#
+# noun-suffix-conjunctive: Nouns that behave like conjunctions and join two words
+# together.
+# e.g. (æ¥æ¬) 対 (ã¢ã¡ãªã«), 対 (ã¢ã¡ãªã«), (3) 対 (5), (女åª) å
¼ (主婦)
+#åè©-æ¥ç¶è©ç
+#
+# noun-verbal_aux: Nouns that attach to the conjunctive particle 㦠("te") and are
+# semantically verb-like.
+# e.g. ããã, ã覧, 御覧, é æ´
+#åè©-åè©éèªç«ç
+#
+# noun-quotation: text that cannot be segmented into words, proverbs, Chinese poetry,
+# dialects, English, etc. Currently, the only entry for åè© å¼ç¨æåå ("noun quotation")
+# is ããã ("iwaku").
+#åè©-å¼ç¨æåå
+#
+# noun-nai_adjective: Words that appear before the auxiliary verb ãªã ("nai") and
+# behave like an adjective.
+# e.g. ç³ã訳, ä»æ¹, ã¨ãã§ã, éã
+#åè©-ãã¤å½¢å®¹è©èªå¹¹
+#
+#####
+# prefix: unclassified prefixes
+#æ¥é è©
+#
+# prefix-nominal: Prefixes that attach to nouns (including adjective stem forms)
+# excluding numerical expressions.
+# e.g. ã (æ°´), æ (æ°), å (社), æ
(ï½æ°), é« (å質), ã (è¦äº), ã (ç«æ´¾)
+#æ¥é è©-åè©æ¥ç¶
+#
+# prefix-verbal: Prefixes that attach to the imperative form of a verb or a verb
+# in conjunctive form followed by ãªã/ãªãã/ãã ãã.
+# e.g. ã (èªã¿ãªãã), ã (座ã)
+#æ¥é è©-åè©æ¥ç¶
+#
+# prefix-adjectival: Prefixes that attach to adjectives.
+# e.g. ã (å¯ãã§ããã), ãã« (ã§ãã)
+#æ¥é è©-形容è©æ¥ç¶
+#
+# prefix-numerical: Prefixes that attach to numerical expressions.
+# e.g. ç´, ããã, æ¯æ
+#æ¥é è©-æ°æ¥ç¶
+#
+#####
+# verb: unclassified verbs
+#åè©
+#
+# verb-main:
+#åè©-èªç«
+#
+# verb-auxiliary:
+#åè©-éèªç«
+#
+# verb-suffix:
+#åè©-æ¥å°¾
+#
+#####
+# adjective: unclassified adjectives
+#形容è©
+#
+# adjective-main:
+#形容è©-èªç«
+#
+# adjective-auxiliary:
+#形容è©-éèªç«
+#
+# adjective-suffix:
+#形容è©-æ¥å°¾
+#
+#####
+# adverb: unclassified adverbs
+#å¯è©
+#
+# adverb-misc: Words that can be segmented into one unit and where adnominal
+# modification is not possible.
+# e.g. ãããããã, å¤å
+#å¯è©-ä¸è¬
+#
+# adverb-particle_conjunction: Adverbs that can be followed by ã®, ã¯, ã«,
+# ãª, ãã, ã , etc.
+# e.g. ãããªã«, ãããªã«, ãããªã«, ãªã«ã, ãªãã§ã
+#å¯è©-å©è©é¡æ¥ç¶
+#
+#####
+# adnominal: Words that only have noun-modifying forms.
+# e.g. ãã®, ãã®, ãã®, ã©ã®, ãããã, ãªãããã®, ä½ããã®, ããããª, ãããã, ãããã, ãããã,
+# ã©ããã, ãããª, ãããª, ãããª, ã©ããª, 大ããª, å°ããª, ããããª, ã»ãã®, ãããã,
+# ã(, ã) ãã (ãã¨ãªãã)ã, å¾®ã
ãã, å ã
ãã, åãªã, ãããªã, æãããåã, 亡ã
+#é£ä½è©
+#
+#####
+# conjunction: Conjunctions that can occur independently.
+# e.g. ã, ããã©ã, ããã¦, ããã, ããã©ããã
+æ¥ç¶è©
+#
+#####
+# particle: unclassified particles.
+å©è©
+#
+# particle-case: case particles where the subclassification is undefined.
+å©è©-æ ¼å©è©
+#
+# particle-case-misc: Case particles.
+# e.g. ãã, ã, ã§, ã¨, ã«, ã¸, ãã, ã, ã®, ã«ã¦
+å©è©-æ ¼å©è©-ä¸è¬
+#
+# particle-case-quote: the "to" that appears after nouns, a personâs speech,
+# quotation marks, expressions of decisions from a meeting, reasons, judgements,
+# conjectures, etc.
+# e.g. ( ã ) 㨠(è¿°ã¹ã.), ( ã§ãã) 㨠(ãã¦å·è¡ç¶äº...)
+å©è©-æ ¼å©è©-å¼ç¨
+#
+# particle-case-compound: Compounds of particles and verbs that mainly behave
+# like case particles.
+# e.g. ã¨ãã, ã¨ãã£ã, ã¨ããã, ã¨ãã¦, ã¨ã¨ãã«, ã¨å
±ã«, ã§ãã£ã¦, ã«ããã£ã¦, ã«å½ãã£ã¦, ã«å½ã£ã¦,
+# ã«ããã, ã«å½ãã, ã«å½ã, ã«å½ãã, ã«ããã, ã«ããã¦, ã«æ¼ãã¦,ã«æ¼ã¦, ã«ããã, ã«æ¼ãã,
+# ã«ãã, ã«ããã¦, ã«ããã, ã«é¢ã, ã«ãããã¦, ã«é¢ãã¦, ã«ãããã, ã«é¢ãã, ã«éã,
+# ã«éãã¦, ã«ãããã, ã«å¾ã, ã«å¾ã, ã«ãããã£ã¦, ã«å¾ã£ã¦, ã«ããã, ã«å¯¾ã, ã«ãããã¦,
+# ã«å¯¾ãã¦, ã«ãããã, ã«å¯¾ãã, ã«ã¤ãã¦, ã«ã¤ã, ã«ã¤ã, ã«ã¤ãã¦, ã«ã¤ã, ã«ã¤ãã¦, ã«ã¨ã£ã¦,
+# ã«ã¨ã, ã«ã¾ã¤ãã, ã«ãã£ã¦, ã«ä¾ã£ã¦, ã«å ã£ã¦, ã«ãã, ã«ä¾ã, ã«å ã, ã«ãã, ã«ä¾ã, ã«å ã,
+# ã«ããã£ã¦, ã«ããã, ããã£ã¦, ã以ã£ã¦, ãéã, ãéãã¦, ãéãã¦, ãããã£ã¦, ãããã, ãããã,
+# ã£ã¦-å£èª/, ã¡ã
ã-é¢è¥¿å¼ãã¨ããã/, (ä½) ã¦ãã (人)-å£èª/, ã£ã¦ãã-å£èª/, ã¨ããµ, ã¨ãããµ
+å©è©-æ ¼å©è©-é£èª
+#
+# particle-conjunctive:
+# e.g. ãã, ããã«ã¯, ã, ããã©, ããã©ã, ãã©, ã, ã¤ã¤, ã¦, ã§, ã¨, ã¨ããã, ã©ããã, ã¨ã, ã©ã,
+# ãªãã, ãªã, ã®ã§, ã®ã«, ã°, ãã®ã®, ã ( ãã), ãããªã, (ããã) ãã(ãããªã)-å£èª/,
+# (è¡ã£) ã¡ã(ãããªã)-å£èª/, (è¨ã£) ãã£ã¦ (ãããããªã)-å£èª/, (ããããªã)ã£ãã£ã¦ (å¹³æ°)-å£èª/
+å©è©-æ¥ç¶å©è©
+#
+# particle-dependency:
+# e.g. ãã, ãã, ãã, ãã, ã¯, ã, ã
+å©è©-ä¿å©è©
+#
+# particle-adverbial:
+# e.g. ãã¦ã, ãã, ããã, ä½, ããã, ãã, (å¦æ ¡) ãã(ãããæµè¡ã£ã¦ãã)-å£èª/,
+# (ãã)ããã (ãããªã)-å£èª/, ãã¤, (ç§) ãªã, ãªã©, (ç§) ãªã (ã«), (å
ç) ãªãã (大å«ã)-å£èª/,
+# (ç§) ãªãã, (å
ç) ãªã㦠(大å«ã)-å£èª/, ã®ã¿, ã ã, (ç§) ã ã£ã¦-å£èª/, ã ã«,
+# (å½¼)ã£ãã-å£èª/, (ãè¶) ã§ã (ããã), ç (ã¨ã), (ä»å¾) ã¨ã, ã°ãã, ã°ã£ã-å£èª/, ã°ã£ãã-å£èª/,
+# ã»ã©, ç¨, ã¾ã§, è¿, (誰) ã (ã)([å©è©-æ ¼å©è©] ããã³ [å©è©-ä¿å©è©] ã®åã«ä½ç½®ããããã)
+å©è©-å¯å©è©
+#
+# particle-interjective: particles with interjective grammatical roles.
+# e.g. (æ¾å³¶) ã
+å©è©-éæå©è©
+#
+# particle-coordinate:
+# e.g. ã¨, ãã, ã ã®, ã ã, ã¨ã, ãªã, ã, ãã
+å©è©-並ç«å©è©
+#
+# particle-final:
+# e.g. ãã, ããã, ã, ã, (ã )ã£ã-å£èª/, (ã¨ã¾ã£ã¦ã) ã§-æ¹è¨/, ãª, ã, ãªã-å£èª/, ã, ã, ã,
+# ãã-å£èª/, ãã-å£èª/, ãã-æ¹è¨/, ã®, ã®ã-å£èª/, ã, ã, ã¨, ãã-å£èª/, ã, ãã-å£èª/
+å©è©-çµå©è©
+#
+# particle-adverbial/conjunctive/final: The particle "ka" when unknown whether it is
+# adverbial, conjunctive, or sentence final. For example:
+# (a) ãA ã B ãã. Ex:ã(å½å
ã§éç¨ãã) ã,(æµ·å¤ã§éç¨ãã) ã (.)ã
+# (b) Inside an adverb phrase. Ex:ã(幸ãã¨ãã) ã (, æ»è
ã¯ããªãã£ã.)ã
+# ã(ç¥ããå±ãããã) ã (, 試é¨ã«åæ ¼ãã.)ã
+# (c) ããã®ããã«ã. Ex:ã(ä½ããªãã£ã) ã (ã®ããã«æ¯ãèã£ã.)ã
+# e.g. ã
+å©è©-å¯å©è©ï¼ä¸¦ç«å©è©ï¼çµå©è©
+#
+# particle-adnominalizer: The "no" that attaches to nouns and modifies
+# non-inflectional words.
+å©è©-é£ä½å
+#
+# particle-adnominalizer: The "ni" and "to" that appear following nouns and adverbs
+# that are giongo, giseigo, or gitaigo.
+# e.g. ã«, ã¨
+å©è©-å¯è©å
+#
+# particle-special: A particle that does not fit into one of the above classifications.
+# This includes particles that are used in Tanka, Haiku, and other poetry.
+# e.g. ããª, ãã, ( ããã ãã) ã«, (ããã) ã«ã(ãããã), (俺) ã (家)
+å©è©-ç¹æ®
+#
+#####
+# auxiliary-verb:
+å©åè©
+#
+#####
+# interjection: Greetings and other exclamations.
+# e.g. ãã¯ãã, ãã¯ãããããã¾ã, ããã«ã¡ã¯, ããã°ãã¯, ãããã¨ã, ã©ãããããã¨ã, ãããã¨ããããã¾ã,
+# ããã ãã¾ã, ãã¡ãããã¾, ãããªã, ããããªã, ã¯ã, ããã, ããã, ããããªãã
+#æåè©
+#
+#####
+# symbol: unclassified Symbols.
+è¨å·
+#
+# symbol-misc: A general symbol not in one of the categories below.
+# e.g. [ââ@$ãâ+]
+è¨å·-ä¸è¬
+#
+# symbol-comma: Commas
+# e.g. [,ã]
+è¨å·-èªç¹
+#
+# symbol-period: Periods and full stops.
+# e.g. [.ï¼ã]
+è¨å·-å¥ç¹
+#
+# symbol-space: Full-width whitespace.
+è¨å·-空ç½
+#
+# symbol-open_bracket:
+# e.g. [({ââãã]
+è¨å·-æ¬å¼§é
+#
+# symbol-close_bracket:
+# e.g. [)}ââããã]
+è¨å·-æ¬å¼§é
+#
+# symbol-alphabetic:
+#è¨å·-ã¢ã«ãã¡ããã
+#
+#####
+# other: unclassified other
+#ãã®ä»
+#
+# other-interjection: Words that are hard to classify as noun-suffixes or
+# sentence-final particles.
+# e.g. (ã )ã¡
+ãã®ä»-éæ
+#
+#####
+# filler: Aizuchi that occurs during a conversation or sounds inserted as filler.
+# e.g. ãã®, ããã¨, ãã¨
+ãã£ã©ã¼
+#
+#####
+# non-verbal: non-verbal sound.
+éè¨èªé³
+#
+#####
+# fragment:
+#èªæç
+#
+#####
+# unknown: unknown part of speech.
+#æªç¥èª
+#
+##### End of file
Added: chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ar.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ar.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ar.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ar.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,125 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+# Cleaned on October 11, 2009 (not normalized, so use before normalization)
+# This means that when modifying this list, you might need to add some
+# redundant entries, for example containing forms with both أ and ا
+Ù
Ù
+ÙÙ
Ù
+Ù
ÙÙا
+Ù
ÙÙ
+ÙÙ
+ÙÙÙ
+ÙÙÙا
+ÙÙÙ
+Ù
+Ù
+Ø«Ù
+اÙ
+Ø£Ù
+ب
+بÙا
+بÙ
+ا
+Ø£
+اÙ
+اÙ
+Ø£Ù
+Ø£Ù
+Ùا
+ÙÙا
+اÙا
+Ø£Ùا
+Ø¥Ùا
+ÙÙÙ
+Ù
ا
+ÙÙ
ا
+ÙÙ
ا
+ÙÙ
ا
+عÙ
+Ù
ع
+اذا
+إذا
+اÙ
+Ø£Ù
+Ø¥Ù
+اÙÙا
+Ø£ÙÙا
+Ø¥ÙÙا
+اÙÙ
+Ø£ÙÙ
+Ø¥ÙÙ
+باÙ
+بأÙ
+ÙاÙ
+ÙØ£Ù
+ÙاÙ
+ÙØ£Ù
+ÙØ¥Ù
+اÙتÙ
+اÙتÙ
+اÙØ°Ù
+اÙØ°Ù
+اÙØ°ÙÙ
+اÙÙ
+اÙÙ
+Ø¥ÙÙ
+Ø¥ÙÙ
+عÙÙ
+عÙÙÙا
+عÙÙÙ
+اÙ
ا
+Ø£Ù
ا
+Ø¥Ù
ا
+اÙضا
+Ø£Ùضا
+ÙÙ
+ÙÙÙ
+ÙÙ
+ÙÙÙ
+ÙÙ
+ÙÙÙ
+ÙÙ
+ÙÙ
+ÙÙ
+ÙÙÙ
+ÙÙÙ
+ÙÙÙ
+ÙÙÙ
+ÙÙÙ
+ÙÙÙ
+اÙت
+Ø£Ùت
+ÙÙ
+ÙÙا
+ÙÙ
+ÙØ°Ù
+Ùذا
+تÙÙ
+Ø°ÙÙ
+ÙÙاÙ
+ÙاÙت
+ÙاÙ
+ÙÙÙÙ
+تÙÙÙ
+ÙÙاÙت
+ÙÙاÙ
+غÙر
+بعض
+Ùد
+ÙØÙ
+بÙÙ
+بÙÙÙ
ا
+Ù
ÙØ°
+ضÙ
Ù
+ØÙØ«
+اÙاÙ
+اÙØ¢Ù
+Ø®ÙاÙ
+بعد
+ÙبÙ
+ØتÙ
+عÙد
+عÙدÙ
ا
+ÙدÙ
+جÙ
Ùع
Added: chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_bg.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_bg.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_bg.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_bg.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,193 @@
+# This file was created by Jacques Savoy and is distributed under the BSD license.
+# See http://members.unine.ch/jacques.savoy/clef/index.html.
+# Also see http://www.opensource.org/licenses/bsd-license.html
+а
+аз
+ако
+ала
+бе
+без
+беÑе
+би
+бил
+била
+били
+било
+близо
+бÑдаÑ
+бÑде
+бÑÑ
а
+в
+ваÑ
+ваÑ
+ваÑа
+веÑоÑÑно
+веÑе
+взема
+ви
+вие
+винаги
+вÑе
+вÑеки
+вÑиÑки
+вÑиÑко
+вÑÑка
+вÑв
+вÑпÑеки
+вÑÑÑ
Ñ
+г
+ги
+главно
+го
+д
+да
+дали
+до
+докаÑо
+докога
+доÑи
+доÑега
+доÑÑа
+е
+едва
+един
+еÑо
+за
+зад
+заедно
+заÑади
+заÑега
+заÑова
+заÑо
+заÑоÑо
+и
+из
+или
+им
+има
+имаÑ
+иÑка
+й
+каза
+как
+каква
+какво
+какÑо
+какÑв
+каÑо
+кога
+когаÑо
+коеÑо
+коиÑо
+кой
+койÑо
+колко
+коÑÑо
+кÑде
+кÑдеÑо
+кÑм
+ли
+м
+ме
+междÑ
+мен
+ми
+мнозина
+мога
+могаÑ
+може
+молÑ
+моменÑа
+мÑ
+н
+на
+над
+назад
+най
+напÑави
+напÑед
+напÑимеÑ
+наÑ
+не
+него
+неÑ
+ни
+ние
+никой
+ниÑо
+но
+нÑкои
+нÑкой
+нÑма
+обаÑе
+около
+оÑвен
+оÑобено
+оÑ
+оÑгоÑе
+оÑново
+оÑе
+пак
+по
+повеÑе
+повеÑеÑо
+под
+поне
+поÑади
+поÑле
+поÑÑи
+пÑави
+пÑед
+пÑеди
+пÑез
+пÑи
+пÑк
+пÑÑво
+Ñ
+Ñа
+Ñамо
+Ñе
+Ñега
+Ñи
+ÑкоÑо
+Ñлед
+Ñме
+ÑпоÑед
+ÑÑед
+ÑÑеÑÑ
+ÑÑе
+ÑÑм
+ÑÑÑ
+ÑÑÑо
+Ñ
+Ñази
+Ñака
+Ñакива
+ÑакÑв
+Ñам
+Ñвой
+Ñе
+Ñези
+Ñи
+Ñн
+Ñо
+Ñова
+Ñогава
+Ñози
+Ñой
+Ñолкова
+ÑоÑно
+ÑÑÑбва
+ÑÑк
+ÑÑй
+ÑÑ
+ÑÑÑ
+Ñ
+Ñ
аÑеÑва
+Ñ
+Ñе
+ÑеÑÑо
+ÑÑез
+Ñе
+Ñом
+Ñ
Added: chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ca.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ca.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ca.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ca.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,220 @@
+# Catalan stopwords from http://github.com/vcl/cue.language (Apache 2 Licensed)
+a
+abans
+acÃ
+ah
+aixÃ
+això
+al
+als
+aleshores
+algun
+alguna
+algunes
+alguns
+alhora
+allÃ
+allÃ
+allò
+altra
+altre
+altres
+amb
+ambdós
+ambdues
+apa
+aquell
+aquella
+aquelles
+aquells
+aquest
+aquesta
+aquestes
+aquests
+aquÃ
+baix
+cada
+cadascú
+cadascuna
+cadascunes
+cadascuns
+com
+contra
+d'un
+d'una
+d'unes
+d'uns
+dalt
+de
+del
+dels
+des
+després
+dins
+dintre
+donat
+doncs
+durant
+e
+eh
+el
+els
+em
+en
+encara
+ens
+entre
+érem
+eren
+éreu
+es
+és
+esta
+estÃ
+està vem
+estaven
+està veu
+esteu
+et
+etc
+ets
+fins
+fora
+gairebé
+ha
+han
+has
+havia
+he
+hem
+heu
+hi
+ho
+i
+igual
+iguals
+ja
+l'hi
+la
+les
+li
+li'n
+llavors
+m'he
+ma
+mal
+malgrat
+mateix
+mateixa
+mateixes
+mateixos
+me
+mentre
+més
+meu
+meus
+meva
+meves
+molt
+molta
+moltes
+molts
+mon
+mons
+n'he
+n'hi
+ne
+ni
+no
+nogensmenys
+només
+nosaltres
+nostra
+nostre
+nostres
+o
+oh
+oi
+on
+pas
+pel
+pels
+per
+però
+perquè
+poc
+poca
+pocs
+poques
+potser
+propi
+qual
+quals
+quan
+quant
+que
+què
+quelcom
+qui
+quin
+quina
+quines
+quins
+s'ha
+s'han
+sa
+semblant
+semblants
+ses
+seu
+seus
+seva
+seva
+seves
+si
+sobre
+sobretot
+sóc
+solament
+sols
+son
+són
+sons
+sota
+sou
+t'ha
+t'han
+t'he
+ta
+tal
+també
+tampoc
+tan
+tant
+tanta
+tantes
+teu
+teus
+teva
+teves
+ton
+tons
+tot
+tota
+totes
+tots
+un
+una
+unes
+uns
+us
+va
+vaig
+vam
+van
+vas
+veu
+vosaltres
+vostra
+vostre
+vostres
Added: chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ckb.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ckb.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ckb.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_ckb.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,136 @@
+# set of kurdish stopwords
+# note these have been normalized with our scheme (e represented with U+06D5, etc)
+# constructed from:
+# * Fig 5 of "Building A Test Collection For Sorani Kurdish" (Esmaili et al)
+# * "Sorani Kurdish: A Reference Grammar with selected readings" (Thackston)
+# * Corpus-based analysis of 77M word Sorani collection: wikipedia, news, blogs, etc
+
+# and
+Ù
+# which
+Ú©Û
+# of
+Û
+# made/did
+کرد
+# that/which
+ئÛÙÛÛ
+# on/head
+سÛر
+# two
+دÙÙ
+# also
+ÙÛرÙÛÙا
+# from/that
+ÙÛÙ
+# makes/does
+دÛکات
+# some
+ÚÛÙد
+# every
+ÙÛر
+
+# demonstratives
+# that
+ئÛÙ
+# this
+ئÛÙ
+
+# personal pronouns
+# I
+Ù
Ù
+# we
+ئÛÙ
Û
+# you
+تÛ
+# you
+ئÛÙÛ
+# he/she/it
+ئÛÙ
+# they
+ئÛÙاÙ
+
+# prepositions
+# to/with/by
+بÛ
+Ù¾Û
+# without
+بÛبÛ
+# along with/while/during
+بÛدÛÙ
+# in the opinion of
+بÛÙاÛ
+# according to
+بÛÙ¾ÛÛ
+# before
+بÛرÙÛ
+# in the direction of
+بÛرÛÙÛ
+# in front of/toward
+بÛرÛÙÛ
+# before/in the face of
+بÛردÛÙ
+# without
+بÛ
+# except for
+بÛجگÛ
+# for
+بÛ
+# on/in
+دÛ
+تÛ
+# with
+دÛÚ¯ÛÚµ
+# after
+دÙاÛ
+# except for/aside from
+جگÛ
+# in/from
+ÙÛ
+ÙÛ
+# in front of/before/because of
+ÙÛبÛر
+# between/among
+ÙÛبÛÛÙÛ
+# concerning/about
+ÙÛبابÛت
+# concerning
+ÙÛبارÛÛ
+# instead of
+ÙÛباتÛ
+# beside
+ÙÛبÙ
+# instead of
+ÙÛبرÛتÛ
+# behind
+ÙÛدÛÙ
+# with/together with
+ÙÛÚ¯ÛÚµ
+# by
+ÙÛÙاÛÛÙ
+# within
+ÙÛÙاÙ
+# between/among
+ÙÛÙÛÙ
+# for the sake of
+ÙÛÙ¾ÛÙاÙÛ
+# with respect to
+ÙÛرÛÙÛ
+# by means of/for
+ÙÛرÛ
+# for the sake of
+ÙÛرÛگا
+# on/on top of/according to
+ÙÛسÛر
+# under
+ÙÛÚÛر
+# between/among
+ÙاÙ
+# between/among
+ÙÛÙاÙ
+# after
+پاش
+# before
+Ù¾ÛØ´
+# like
+ÙÛÚ©
Added: chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_cz.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_cz.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_cz.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_cz.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,172 @@
+a
+s
+k
+o
+i
+u
+v
+z
+dnes
+cz
+tÃmto
+budeš
+budem
+byli
+jseš
+můj
+svým
+ta
+tomto
+tohle
+tuto
+tyto
+jej
+zda
+proÄ
+máte
+tato
+kam
+tohoto
+kdo
+kteÅÃ
+mi
+nám
+tom
+tomuto
+mÃt
+nic
+proto
+kterou
+byla
+toho
+protože
+asi
+ho
+naši
+napište
+re
+což
+tÃm
+takže
+svých
+jejÃ
+svými
+jste
+aj
+tu
+tedy
+teto
+bylo
+kde
+ke
+pravé
+ji
+nad
+nejsou
+Äi
+pod
+téma
+mezi
+pÅes
+ty
+pak
+vám
+ani
+když
+však
+neg
+jsem
+tento
+Älánku
+Älánky
+aby
+jsme
+pÅed
+pta
+jejich
+byl
+jeÅ¡tÄ
+až
+bez
+také
+pouze
+prvnÃ
+vaše
+která
+nás
+nový
+tipy
+pokud
+může
+strana
+jeho
+své
+jiné
+zprávy
+nové
+nenÃ
+vás
+jen
+podle
+zde
+už
+být
+vÃce
+bude
+již
+než
+který
+by
+které
+co
+nebo
+ten
+tak
+má
+pÅi
+od
+po
+jsou
+jak
+dalÅ¡Ã
+ale
+si
+se
+ve
+to
+jako
+za
+zpÄt
+ze
+do
+pro
+je
+na
+atd
+atp
+jakmile
+pÅiÄemž
+já
+on
+ona
+ono
+oni
+ony
+my
+vy
+jÃ
+ji
+mÄ
+mne
+jemu
+tomu
+tÄm
+tÄmu
+nÄmu
+nÄmuž
+jehož
+jÞ
+jelikož
+jež
+jakož
+naÄež
Added: chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_da.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_da.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_da.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_da.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,110 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/danish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Danish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | This is a ranked list (commonest to rarest) of stopwords derived from
+ | a large text sample.
+
+
+og | and
+i | in
+jeg | I
+det | that (dem. pronoun)/it (pers. pronoun)
+at | that (in front of a sentence)/to (with infinitive)
+en | a/an
+den | it (pers. pronoun)/that (dem. pronoun)
+til | to/at/for/until/against/by/of/into, more
+er | present tense of "to be"
+som | who, as
+på | on/upon/in/on/at/to/after/of/with/for, on
+de | they
+med | with/by/in, along
+han | he
+af | of/by/from/off/for/in/with/on, off
+for | at/for/to/from/by/of/ago, in front/before, because
+ikke | not
+der | who/which, there/those
+var | past tense of "to be"
+mig | me/myself
+sig | oneself/himself/herself/itself/themselves
+men | but
+et | a/an/one, one (number), someone/somebody/one
+har | present tense of "to have"
+om | round/about/for/in/a, about/around/down, if
+vi | we
+min | my
+havde | past tense of "to have"
+ham | him
+hun | she
+nu | now
+over | over/above/across/by/beyond/past/on/about, over/past
+da | then, when/as/since
+fra | from/off/since, off, since
+du | you
+ud | out
+sin | his/her/its/one's
+dem | them
+os | us/ourselves
+op | up
+man | you/one
+hans | his
+hvor | where
+eller | or
+hvad | what
+skal | must/shall etc.
+selv | myself/youself/herself/ourselves etc., even
+her | here
+alle | all/everyone/everybody etc.
+vil | will (verb)
+blev | past tense of "to stay/to remain/to get/to become"
+kunne | could
+ind | in
+når | when
+være | present tense of "to be"
+dog | however/yet/after all
+noget | something
+ville | would
+jo | you know/you see (adv), yes
+deres | their/theirs
+efter | after/behind/according to/for/by/from, later/afterwards
+ned | down
+skulle | should
+denne | this
+end | than
+dette | this
+mit | my/mine
+også | also
+under | under/beneath/below/during, below/underneath
+have | have
+dig | you
+anden | other
+hende | her
+mine | my
+alt | everything
+meget | much/very, plenty of
+sit | his, her, its, one's
+sine | his, her, its, one's
+vor | our
+mod | against
+disse | these
+hvis | if
+din | your/yours
+nogle | some
+hos | by/at
+blive | be/become
+mange | many
+ad | by/through
+bliver | present tense of "to be/to become"
+hendes | her/hers
+været | be
+thi | for (conj)
+jer | you
+sådan | such, like this/like that
Added: chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_de.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_de.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_de.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_de.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,294 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/german/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A German stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+ | The number of forms in this list is reduced significantly by passing it
+ | through the German stemmer.
+
+
+aber | but
+
+alle | all
+allem
+allen
+aller
+alles
+
+als | than, as
+also | so
+am | an + dem
+an | at
+
+ander | other
+andere
+anderem
+anderen
+anderer
+anderes
+anderm
+andern
+anderr
+anders
+
+auch | also
+auf | on
+aus | out of
+bei | by
+bin | am
+bis | until
+bist | art
+da | there
+damit | with it
+dann | then
+
+der | the
+den
+des
+dem
+die
+das
+
+daà | that
+
+derselbe | the same
+derselben
+denselben
+desselben
+demselben
+dieselbe
+dieselben
+dasselbe
+
+dazu | to that
+
+dein | thy
+deine
+deinem
+deinen
+deiner
+deines
+
+denn | because
+
+derer | of those
+dessen | of him
+
+dich | thee
+dir | to thee
+du | thou
+
+dies | this
+diese
+diesem
+diesen
+dieser
+dieses
+
+
+doch | (several meanings)
+dort | (over) there
+
+
+durch | through
+
+ein | a
+eine
+einem
+einen
+einer
+eines
+
+einig | some
+einige
+einigem
+einigen
+einiger
+einiges
+
+einmal | once
+
+er | he
+ihn | him
+ihm | to him
+
+es | it
+etwas | something
+
+euer | your
+eure
+eurem
+euren
+eurer
+eures
+
+für | for
+gegen | towards
+gewesen | p.p. of sein
+hab | have
+habe | have
+haben | have
+hat | has
+hatte | had
+hatten | had
+hier | here
+hin | there
+hinter | behind
+
+ich | I
+mich | me
+mir | to me
+
+
+ihr | you, to her
+ihre
+ihrem
+ihren
+ihrer
+ihres
+euch | to you
+
+im | in + dem
+in | in
+indem | while
+ins | in + das
+ist | is
+
+jede | each, every
+jedem
+jeden
+jeder
+jedes
+
+jene | that
+jenem
+jenen
+jener
+jenes
+
+jetzt | now
+kann | can
+
+kein | no
+keine
+keinem
+keinen
+keiner
+keines
+
+können | can
+könnte | could
+machen | do
+man | one
+
+manche | some, many a
+manchem
+manchen
+mancher
+manches
+
+mein | my
+meine
+meinem
+meinen
+meiner
+meines
+
+mit | with
+muss | must
+musste | had to
+nach | to(wards)
+nicht | not
+nichts | nothing
+noch | still, yet
+nun | now
+nur | only
+ob | whether
+oder | or
+ohne | without
+sehr | very
+
+sein | his
+seine
+seinem
+seinen
+seiner
+seines
+
+selbst | self
+sich | herself
+
+sie | they, she
+ihnen | to them
+
+sind | are
+so | so
+
+solche | such
+solchem
+solchen
+solcher
+solches
+
+soll | shall
+sollte | should
+sondern | but
+sonst | else
+über | over
+um | about, around
+und | and
+
+uns | us
+unse
+unsem
+unsen
+unser
+unses
+
+unter | under
+viel | much
+vom | von + dem
+von | from
+vor | before
+während | while
+war | was
+waren | were
+warst | wast
+was | what
+weg | away, off
+weil | because
+weiter | further
+
+welche | which
+welchem
+welchen
+welcher
+welches
+
+wenn | when
+werde | will
+werden | will
+wie | how
+wieder | again
+will | want
+wir | we
+wird | will
+wirst | willst
+wo | where
+wollen | want
+wollte | wanted
+würde | would
+würden | would
+zu | to
+zum | zu + dem
+zur | zu + der
+zwar | indeed
+zwischen | between
+
Added: chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_el.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_el.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_el.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_el.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,78 @@
+# Lucene Greek Stopwords list
+# Note: by default this file is used after GreekLowerCaseFilter,
+# so when modifying this file use 'Ï' instead of 'Ï'
+ο
+η
+Ïο
+οι
+Ïα
+ÏοÏ
+ÏηÏ
+ÏÏν
+Ïον
+Ïην
+και
+κι
+κ
+ειμαι
+ειÏαι
+ειναι
+ειμαÏÏε
+ειÏÏε
+ÏÏο
+ÏÏον
+ÏÏη
+ÏÏην
+μα
+αλλα
+αÏο
+για
+ÏÏοÏ
+με
+Ïε
+ÏÏ
+ÏαÏα
+ανÏι
+καÏα
+μεÏα
+θα
+να
+δε
+δεν
+μη
+μην
+εÏι
+ενÏ
+εαν
+αν
+ÏοÏε
+ÏοÏ
+ÏÏÏ
+ÏοιοÏ
+Ïοια
+Ïοιο
+Ïοιοι
+ÏοιεÏ
+ÏοιÏν
+ÏοιοÏ
Ï
+αÏ
ÏοÏ
+αÏ
Ïη
+αÏ
Ïο
+αÏ
Ïοι
+αÏ
ÏÏν
+αÏ
ÏοÏ
Ï
+αÏ
ÏεÏ
+αÏ
Ïα
+εκεινοÏ
+εκεινη
+εκεινο
+εκεινοι
+εκεινεÏ
+εκεινα
+εκεινÏν
+εκεινοÏ
Ï
+οÏÏÏ
+ομÏÏ
+ιÏÏÏ
+οÏο
+οÏι
Added: chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_en.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_en.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_en.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_en.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,54 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# a couple of test stopwords to test that the words are really being
+# configured from this file:
+stopworda
+stopwordb
+
+# Standard english stop words taken from Lucene's StopAnalyzer
+a
+an
+and
+are
+as
+at
+be
+but
+by
+for
+if
+in
+into
+is
+it
+no
+not
+of
+on
+or
+such
+that
+the
+their
+then
+there
+these
+they
+this
+to
+was
+will
+with
Added: chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_es.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_es.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_es.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_es.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,356 @@
+ | From svn.tartarus.org/snowball/trunk/website/algorithms/spanish/stop.txt
+ | This file is distributed under the BSD License.
+ | See http://snowball.tartarus.org/license.php
+ | Also see http://www.opensource.org/licenses/bsd-license.html
+ | - Encoding was converted to UTF-8.
+ | - This notice was added.
+ |
+ | NOTE: To use this file with StopFilterFactory, you must specify format="snowball"
+
+ | A Spanish stop word list. Comments begin with vertical bar. Each stop
+ | word is at the start of a line.
+
+
+ | The following is a ranked list (commonest to rarest) of stopwords
+ | deriving from a large sample of text.
+
+ | Extra words have been added at the end.
+
+de | from, of
+la | the, her
+que | who, that
+el | the
+en | in
+y | and
+a | to
+los | the, them
+del | de + el
+se | himself, from him etc
+las | the, them
+por | for, by, etc
+un | a
+para | for
+con | with
+no | no
+una | a
+su | his, her
+al | a + el
+ | es from SER
+lo | him
+como | how
+más | more
+pero | pero
+sus | su plural
+le | to him, her
+ya | already
+o | or
+ | fue from SER
+este | this
+ | ha from HABER
+sà | himself etc
+porque | because
+esta | this
+ | son from SER
+entre | between
+ | está from ESTAR
+cuando | when
+muy | very
+sin | without
+sobre | on
+ | ser from SER
+ | tiene from TENER
+también | also
+me | me
+hasta | until
+hay | there is/are
+donde | where
+ | han from HABER
+quien | whom, that
+ | están from ESTAR
+ | estado from ESTAR
+desde | from
+todo | all
+nos | us
+durante | during
+ | estados from ESTAR
+todos | all
+uno | a
+les | to them
+ni | nor
+contra | against
+otros | other
+ | fueron from SER
+ese | that
+eso | that
+ | habÃa from HABER
+ante | before
+ellos | they
+e | and (variant of y)
+esto | this
+mà | me
+antes | before
+algunos | some
+qué | what?
+unos | a
+yo | I
+otro | other
+otras | other
+otra | other
+él | he
+tanto | so much, many
+esa | that
+estos | these
+mucho | much, many
+quienes | who
+nada | nothing
+muchos | many
+cual | who
+ | sea from SER
+poco | few
+ella | she
+estar | to be
+ | haber from HABER
+estas | these
+ | estaba from ESTAR
+ | estamos from ESTAR
+algunas | some
+algo | something
+nosotros | we
+
+ | other forms
+
+mi | me
+mis | mi plural
+tú | thou
+te | thee
+ti | thee
+tu | thy
+tus | tu plural
+ellas | they
+nosotras | we
+vosotros | you
+vosotras | you
+os | you
+mÃo | mine
+mÃa |
+mÃos |
+mÃas |
+tuyo | thine
+tuya |
+tuyos |
+tuyas |
+suyo | his, hers, theirs
+suya |
+suyos |
+suyas |
+nuestro | ours
+nuestra |
+nuestros |
+nuestras |
+vuestro | yours
+vuestra |
+vuestros |
+vuestras |
+esos | those
+esas | those
+
+ | forms of estar, to be (not including the infinitive):
+estoy
+estás
+está
+estamos
+estáis
+están
+esté
+estés
+estemos
+estéis
+estén
+estaré
+estarás
+estará
+estaremos
+estaréis
+estarán
+estarÃa
+estarÃas
+estarÃamos
+estarÃais
+estarÃan
+estaba
+estabas
+estábamos
+estabais
+estaban
+estuve
+estuviste
+estuvo
+estuvimos
+estuvisteis
+estuvieron
+estuviera
+estuvieras
+estuviéramos
+estuvierais
+estuvieran
+estuviese
+estuvieses
+estuviésemos
+estuvieseis
+estuviesen
+estando
+estado
+estada
+estados
+estadas
+estad
+
+ | forms of haber, to have (not including the infinitive):
+he
+has
+ha
+hemos
+habéis
+han
+haya
+hayas
+hayamos
+hayáis
+hayan
+habré
+habrás
+habrá
+habremos
+habréis
+habrán
+habrÃa
+habrÃas
+habrÃamos
+habrÃais
+habrÃan
+habÃa
+habÃas
+habÃamos
+habÃais
+habÃan
+hube
+hubiste
+hubo
+hubimos
+hubisteis
+hubieron
+hubiera
+hubieras
+hubiéramos
+hubierais
+hubieran
+hubiese
+hubieses
+hubiésemos
+hubieseis
+hubiesen
+habiendo
+habido
+habida
+habidos
+habidas
+
+ | forms of ser, to be (not including the infinitive):
+soy
+eres
+es
+somos
+sois
+son
+sea
+seas
+seamos
+seáis
+sean
+seré
+serás
+será
+seremos
+seréis
+serán
+serÃa
+serÃas
+serÃamos
+serÃais
+serÃan
+era
+eras
+éramos
+erais
+eran
+fui
+fuiste
+fue
+fuimos
+fuisteis
+fueron
+fuera
+fueras
+fuéramos
+fuerais
+fueran
+fuese
+fueses
+fuésemos
+fueseis
+fuesen
+siendo
+sido
+ | sed also means 'thirst'
+
+ | forms of tener, to have (not including the infinitive):
+tengo
+tienes
+tiene
+tenemos
+tenéis
+tienen
+tenga
+tengas
+tengamos
+tengáis
+tengan
+tendré
+tendrás
+tendrá
+tendremos
+tendréis
+tendrán
+tendrÃa
+tendrÃas
+tendrÃamos
+tendrÃais
+tendrÃan
+tenÃa
+tenÃas
+tenÃamos
+tenÃais
+tenÃan
+tuve
+tuviste
+tuvo
+tuvimos
+tuvisteis
+tuvieron
+tuviera
+tuvieras
+tuviéramos
+tuvierais
+tuvieran
+tuviese
+tuvieses
+tuviésemos
+tuvieseis
+tuviesen
+teniendo
+tenido
+tenida
+tenidos
+tenidas
+tened
+
Added: chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_eu.txt
URL: http://svn.apache.org/viewvc/chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_eu.txt?rev=1614808&view=auto
==============================================================================
--- chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_eu.txt (added)
+++ chukwa/trunk/contrib/solr/logs/conf/lang/stopwords_eu.txt Thu Jul 31 04:04:59 2014
@@ -0,0 +1,99 @@
+# example set of basque stopwords
+al
+anitz
+arabera
+asko
+baina
+bat
+batean
+batek
+bati
+batzuei
+batzuek
+batzuetan
+batzuk
+bera
+beraiek
+berau
+berauek
+bere
+berori
+beroriek
+beste
+bezala
+da
+dago
+dira
+ditu
+du
+dute
+edo
+egin
+ere
+eta
+eurak
+ez
+gainera
+gu
+gutxi
+guzti
+haiei
+haiek
+haietan
+hainbeste
+hala
+han
+handik
+hango
+hara
+hari
+hark
+hartan
+hau
+hauei
+hauek
+hauetan
+hemen
+hemendik
+hemengo
+hi
+hona
+honek
+honela
+honetan
+honi
+hor
+hori
+horiei
+horiek
+horietan
+horko
+horra
+horrek
+horrela
+horretan
+horri
+hortik
+hura
+izan
+ni
+noiz
+nola
+non
+nondik
+nongo
+nor
+nora
+ze
+zein
+zen
+zenbait
+zenbat
+zer
+zergatik
+ziren
+zituen
+zu
+zuek
+zuen
+zuten