You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2014/04/09 22:05:27 UTC
svn commit: r1586120 - in /lucene/dev/trunk/solr: ./
example/example-DIH/solr/db/conf/
example/example-DIH/solr/db/conf/clustering/
example/example-DIH/solr/db/conf/lang/
example/example-DIH/solr/db/conf/velocity/
example/example-DIH/solr/db/conf/xslt/...
Author: sarowe
Date: Wed Apr 9 20:05:25 2014
New Revision: 1586120
URL: http://svn.apache.org/r1586120
Log:
SOLR-5937: Modernize the DIH example config sets
Added:
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/admin-extra.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.html
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/admin-extra.menu-bottom.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-bottom.html
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/admin-extra.menu-top.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-top.html
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/clustering/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/currency.xml
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/currency.xml
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/elevate.xml
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/elevate.xml
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/lang/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/lang/
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/mapping-FoldToASCII.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-FoldToASCII.txt
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/mapping-ISOLatin1Accent.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-ISOLatin1Accent.txt
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/schema.xml
- copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/solrconfig.xml
- copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/spellings.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/spellings.txt
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/stopwords.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/stopwords.txt
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/synonyms.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/synonyms.txt
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/update-script.js
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/update-script.js
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/velocity/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/velocity/
lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/xslt/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/xslt/
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/admin-extra.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.html
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/admin-extra.menu-bottom.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-bottom.html
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/admin-extra.menu-top.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-top.html
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/clustering/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/currency.xml
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/currency.xml
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/elevate.xml
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/elevate.xml
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/lang/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/lang/
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/mapping-FoldToASCII.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-FoldToASCII.txt
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/mapping-ISOLatin1Accent.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-ISOLatin1Accent.txt
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/schema.xml
- copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/scripts.conf
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/scripts.conf
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/solrconfig.xml
- copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/spellings.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/spellings.txt
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/stopwords.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/stopwords.txt
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/synonyms.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/synonyms.txt
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/update-script.js
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/update-script.js
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/velocity/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/velocity/
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/xslt/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/xslt/
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/admin-extra.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.html
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-bottom.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-bottom.html
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/admin-extra.menu-top.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-top.html
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/clustering/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/currency.xml
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/currency.xml
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/elevate.xml
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/elevate.xml
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/lang/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/lang/
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/mapping-FoldToASCII.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-FoldToASCII.txt
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/mapping-ISOLatin1Accent.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-ISOLatin1Accent.txt
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/schema.xml
- copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/solrconfig.xml
- copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/spellings.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/spellings.txt
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/stopwords.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/stopwords.txt
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/synonyms.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/synonyms.txt
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/update-script.js
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/update-script.js
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/velocity/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/velocity/
lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/xslt/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/xslt/
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/admin-extra.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.html
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/admin-extra.menu-bottom.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-bottom.html
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/admin-extra.menu-top.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-top.html
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/clustering/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/clustering/
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/currency.xml
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/currency.xml
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/elevate.xml
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/elevate.xml
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/lang/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/lang/
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/mapping-FoldToASCII.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-FoldToASCII.txt
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/mapping-ISOLatin1Accent.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/mapping-ISOLatin1Accent.txt
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/schema.xml
- copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/solrconfig.xml
- copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/spellings.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/spellings.txt
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/stopwords.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/stopwords.txt
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/synonyms.txt
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/synonyms.txt
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/update-script.js
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/update-script.js
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/velocity/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/velocity/
lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/xslt/
- copied from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/xslt/
lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/admin-extra.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.html
lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/admin-extra.menu-bottom.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-bottom.html
lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/admin-extra.menu-top.html
- copied unchanged from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/admin-extra.menu-top.html
lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/schema.xml
- copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml
lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/solrconfig.xml
- copied, changed from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml
Removed:
lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/data-config.xml
Modified:
lucene/dev/trunk/solr/CHANGES.txt
Modified: lucene/dev/trunk/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/CHANGES.txt?rev=1586120&r1=1586119&r2=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/CHANGES.txt (original)
+++ lucene/dev/trunk/solr/CHANGES.txt Wed Apr 9 20:05:25 2014
@@ -295,6 +295,8 @@ Other Changes
* SOLR-5773: CollapsingQParserPlugin should make elevated documents the
group head. (David Boychuck, Joel Bernstein)
+* SOLR-5937: Modernize the DIH example config sets. (Steve Rowe)
+
================== 4.7.1 ==================
Versions of Major Components
Copied: lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/schema.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/schema.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/schema.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/schema.xml Wed Apr 9 20:05:25 2014
@@ -45,7 +45,7 @@
that avoids logging every request
-->
-<schema name="example" version="1.5">
+<schema name="example-DIH-db" version="1.5">
<!-- attribute "name" is the name of this schema and is only used for display purposes.
version="x.y" is Solr's version number for the schema syntax and
semantics. It should not normally be changed by applications.
@@ -290,17 +290,17 @@
<!-- Create a string version of author for faceting -->
<copyField source="author" dest="author_s"/>
-
- <!-- Above, multiple source fields are copied to the [text] field.
- Another way to map multiple source fields to the same
- destination field is to use the dynamic field syntax.
+
+ <!-- Above, multiple source fields are copied to the [text] field.
+ Another way to map multiple source fields to the same
+ destination field is to use the dynamic field syntax.
copyField also supports a maxChars to copy setting. -->
-
+
<!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
<!-- copy name to alphaNameSort, a field designed for sorting by name -->
<!-- <copyField source="name" dest="alphaNameSort"/> -->
-
+
<!-- field type definitions. The "name" attribute is
just a label to be used by field definitions. The "class"
Copied: lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/solrconfig.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/solrconfig.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/solrconfig.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/db/conf/solrconfig.xml Wed Apr 9 20:05:25 2014
@@ -72,17 +72,19 @@
The examples below can be used to load some solr-contribs along
with their external dependencies.
-->
- <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
- <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
+ <lib dir="../../../../contrib/extraction/lib" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-cell-\d.*\.jar" />
- <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
+ <lib dir="../../../../contrib/clustering/lib/" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-clustering-\d.*\.jar" />
- <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
+ <lib dir="../../../../contrib/langid/lib/" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-langid-\d.*\.jar" />
+
+ <lib dir="../../../../contrib/velocity/lib" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-velocity-\d.*\.jar" />
<!-- an exact 'path' can be used instead of a 'dir' to specify a
specific jar file. This will cause a serious error to be logged
@@ -803,6 +805,13 @@
not be initialized until the first request that uses it.
-->
+
+ <requestHandler name="/dataimport" class="solr.DataImportHandler">
+ <lst name="defaults">
+ <str name="config">db-data-config.xml</str>
+ </lst>
+ </requestHandler>
+
<!-- SearchHandler
http://wiki.apache.org/solr/SearchHandler
Copied: lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/schema.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/schema.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/schema.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/schema.xml Wed Apr 9 20:05:25 2014
@@ -45,7 +45,7 @@
that avoids logging every request
-->
-<schema name="example" version="1.5">
+<schema name="example-DIH-mail" version="1.5">
<!-- attribute "name" is the name of this schema and is only used for display purposes.
version="x.y" is Solr's version number for the schema syntax and
semantics. It should not normally be changed by applications.
@@ -111,89 +111,25 @@
-->
<field name="_version_" type="long" indexed="true" stored="true"/>
- <!-- points to the root document of a block of nested documents. Required for nested
- document support, may be removed otherwise
- -->
- <field name="_root_" type="string" indexed="true" stored="false"/>
-
- <!-- Only remove the "id" field if you have a very good reason to. While not strictly
- required, it is highly recommended. A <uniqueKey> is present in almost all Solr
- installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
- -->
- <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
-
- <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
- <field name="name" type="text_general" indexed="true" stored="true"/>
- <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
- <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
- <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
- <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
-
- <field name="weight" type="float" indexed="true" stored="true"/>
- <field name="price" type="float" indexed="true" stored="true"/>
- <field name="popularity" type="int" indexed="true" stored="true" />
- <field name="inStock" type="boolean" indexed="true" stored="true" />
-
- <field name="store" type="location" indexed="true" stored="true"/>
-
- <!-- Common metadata fields, named specifically to match up with
- SolrCell metadata when parsing rich documents such as Word, PDF.
- Some fields are multiValued only because Tika currently may return
- multiple values for them. Some metadata is parsed from the documents,
- but there are some which come from the client context:
- "content_type": From the HTTP headers of incoming stream
- "resourcename": From SolrCell request param resource.name
- -->
- <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
- <field name="subject" type="text_general" indexed="true" stored="true"/>
- <field name="description" type="text_general" indexed="true" stored="true"/>
- <field name="comments" type="text_general" indexed="true" stored="true"/>
- <field name="author" type="text_general" indexed="true" stored="true"/>
- <field name="keywords" type="text_general" indexed="true" stored="true"/>
- <field name="category" type="text_general" indexed="true" stored="true"/>
- <field name="resourcename" type="text_general" indexed="true" stored="true"/>
- <field name="url" type="text_general" indexed="true" stored="true"/>
- <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
- <field name="last_modified" type="date" indexed="true" stored="true"/>
- <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
-
- <!-- Main body of document extracted by SolrCell.
- NOTE: This field is not indexed by default, since it is also copied to "text"
- using copyField below. This is to save space. Use this field for returning and
- highlighting document content. Use the "text" field to search the content. -->
- <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
-
+ <field name="content" type="text_general" indexed="true" stored="true" multiValued="true"/>
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
- <!-- catchall text field that indexes tokens both normally and in reverse for efficient
- leading wildcard queries. -->
- <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
-
- <!-- non-tokenized version of manufacturer to make it easier to sort or group
- results by manufacturer. copied from "manu" via copyField -->
- <field name="manu_exact" type="string" indexed="true" stored="false"/>
-
- <field name="payloads" type="payloads" indexed="true" stored="true"/>
-
-
- <!--
- Some fields such as popularity and manu_exact could be modified to
- leverage doc values:
- <field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
- <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
- <field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
-
-
- Although it would make indexing slightly slower and the index bigger, it
- would also make the index faster to load, more memory-efficient and more
- NRT-friendly.
- -->
+ <field name="messageId" type="string" indexed="true" stored="true" required="true" multiValued="false"/>
+ <field name="subject" type="text_general" indexed="true" stored="true"/>
+ <field name="from" type="string" indexed="true" stored="true" omitNorms="true"/>
+ <field name="sentDate" type="date" indexed="true" stored="true"/>
+ <field name="xMailer" type="string" indexed="true" stored="true" omitNorms="true"/>
+
+ <field name="allTo" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
+ <field name="flags" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
+ <field name="attachment" type="text_general" indexed="true" stored="true" multiValued="true"/>
+ <field name="attachmentNames" type="string" indexed="true" stored="true" omitNorms="true" multiValued="true"/>
<!-- Dynamic field definitions allow using convention over configuration
- for fields via the specification of patterns to match field names.
+ for fields via the specification of patterns to match field names.
EXAMPLE: name="*_i" will match any field ending in _i (like myid_i, z_i)
RESTRICTION: the glob-like pattern in the name attribute must have
a "*" only at the start or the end. -->
@@ -247,7 +183,7 @@
<!-- Field to use to determine and enforce document uniqueness.
Unless this field is marked with required="false", it will be a required field
-->
- <uniqueKey>id</uniqueKey>
+ <uniqueKey>messageId</uniqueKey>
<!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
parsing a query string that isn't explicit about the field. Machine (non-user)
@@ -268,29 +204,12 @@
is added to the index. It's used either to index the same field differently,
or to add multiple fields to the same field for easier/faster searching. -->
- <copyField source="cat" dest="text"/>
- <copyField source="name" dest="text"/>
- <copyField source="manu" dest="text"/>
- <copyField source="features" dest="text"/>
- <copyField source="includes" dest="text"/>
- <copyField source="manu" dest="manu_exact"/>
-
- <!-- Copy the price into a currency enabled field (default USD) -->
- <copyField source="price" dest="price_c"/>
-
- <!-- Text fields from SolrCell to search by default in our catch-all field -->
- <copyField source="title" dest="text"/>
- <copyField source="author" dest="text"/>
- <copyField source="description" dest="text"/>
- <copyField source="keywords" dest="text"/>
- <copyField source="content" dest="text"/>
- <copyField source="content_type" dest="text"/>
- <copyField source="resourcename" dest="text"/>
- <copyField source="url" dest="text"/>
-
- <!-- Create a string version of author for faceting -->
- <copyField source="author" dest="author_s"/>
-
+ <copyField source="content" dest="text"/>
+ <copyField source="attachmentNames" dest="text"/>
+ <copyField source="attachment" dest="text"/>
+ <copyField source="subject" dest="text"/>
+ <copyField source="allTo" dest="text"/>
+
<!-- Above, multiple source fields are copied to the [text] field.
Another way to map multiple source fields to the same
destination field is to use the dynamic field syntax.
Copied: lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/solrconfig.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/solrconfig.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/solrconfig.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/mail/conf/solrconfig.xml Wed Apr 9 20:05:25 2014
@@ -72,17 +72,20 @@
The examples below can be used to load some solr-contribs along
with their external dependencies.
-->
- <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
+ <lib dir="../../../../contrib/dataimporthandler/lib/" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
- <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
+ <lib dir="../../../../contrib/extraction/lib" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-cell-\d.*\.jar" />
- <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
+ <lib dir="../../../../contrib/clustering/lib/" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-clustering-\d.*\.jar" />
- <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
+ <lib dir="../../../../contrib/langid/lib/" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-langid-\d.*\.jar" />
+
+ <lib dir="../../../../contrib/velocity/lib" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-velocity-\d.*\.jar" />
<!-- an exact 'path' can be used instead of a 'dir' to specify a
specific jar file. This will cause a serious error to be logged
@@ -803,6 +806,13 @@
not be initialized until the first request that uses it.
-->
+
+ <requestHandler name="/dataimport" class="solr.DataImportHandler">
+ <lst name="defaults">
+ <str name="config">mail-data-config.xml</str>
+ </lst>
+ </requestHandler>
+
<!-- SearchHandler
http://wiki.apache.org/solr/SearchHandler
Copied: lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/schema.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/schema.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/schema.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/schema.xml Wed Apr 9 20:05:25 2014
@@ -45,7 +45,7 @@
that avoids logging every request
-->
-<schema name="example" version="1.5">
+<schema name="example-DIH-rss" version="1.5">
<!-- attribute "name" is the name of this schema and is only used for display purposes.
version="x.y" is Solr's version number for the schema syntax and
semantics. It should not normally be changed by applications.
@@ -111,86 +111,25 @@
-->
<field name="_version_" type="long" indexed="true" stored="true"/>
- <!-- points to the root document of a block of nested documents. Required for nested
- document support, may be removed otherwise
- -->
- <field name="_root_" type="string" indexed="true" stored="false"/>
-
- <!-- Only remove the "id" field if you have a very good reason to. While not strictly
- required, it is highly recommended. A <uniqueKey> is present in almost all Solr
- installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
- -->
- <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
-
- <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
- <field name="name" type="text_general" indexed="true" stored="true"/>
- <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
- <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
- <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
- <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
-
- <field name="weight" type="float" indexed="true" stored="true"/>
- <field name="price" type="float" indexed="true" stored="true"/>
- <field name="popularity" type="int" indexed="true" stored="true" />
- <field name="inStock" type="boolean" indexed="true" stored="true" />
-
- <field name="store" type="location" indexed="true" stored="true"/>
-
- <!-- Common metadata fields, named specifically to match up with
- SolrCell metadata when parsing rich documents such as Word, PDF.
- Some fields are multiValued only because Tika currently may return
- multiple values for them. Some metadata is parsed from the documents,
- but there are some which come from the client context:
- "content_type": From the HTTP headers of incoming stream
- "resourcename": From SolrCell request param resource.name
- -->
- <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
<field name="subject" type="text_general" indexed="true" stored="true"/>
- <field name="description" type="text_general" indexed="true" stored="true"/>
- <field name="comments" type="text_general" indexed="true" stored="true"/>
- <field name="author" type="text_general" indexed="true" stored="true"/>
- <field name="keywords" type="text_general" indexed="true" stored="true"/>
- <field name="category" type="text_general" indexed="true" stored="true"/>
- <field name="resourcename" type="text_general" indexed="true" stored="true"/>
- <field name="url" type="text_general" indexed="true" stored="true"/>
- <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
- <field name="last_modified" type="date" indexed="true" stored="true"/>
- <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
-
- <!-- Main body of document extracted by SolrCell.
- NOTE: This field is not indexed by default, since it is also copied to "text"
- using copyField below. This is to save space. Use this field for returning and
- highlighting document content. Use the "text" field to search the content. -->
- <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
-
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
- <!-- catchall text field that indexes tokens both normally and in reverse for efficient
- leading wildcard queries. -->
- <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
-
- <!-- non-tokenized version of manufacturer to make it easier to sort or group
- results by manufacturer. copied from "manu" via copyField -->
- <field name="manu_exact" type="string" indexed="true" stored="false"/>
-
- <field name="payloads" type="payloads" indexed="true" stored="true"/>
-
-
- <!--
- Some fields such as popularity and manu_exact could be modified to
- leverage doc values:
- <field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
- <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
- <field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
+ <field name="source" type="text_general" indexed="true" stored="true" />
+ <field name="source-link" type="string" indexed="false" stored="true" />
+ <field name="title" type="text_general" indexed="true" stored="true" />
+ <field name="description" type="html" indexed="true" stored="true" />
- Although it would make indexing slightly slower and the index bigger, it
- would also make the index faster to load, more memory-efficient and more
- NRT-friendly.
- -->
+ <field name="link" type="string" indexed="true" stored="true" required="true" multiValued="false" />
+ <field name="creator" type="string" indexed="false" stored="true" />
+ <field name="item-subject" type="string" indexed="true" stored="false" />
+ <field name="date" type="date" indexed="true" stored="false" />
+ <field name="slash-department" type="string" indexed="true" stored="false" />
+ <field name="slash-section" type="string" indexed="true" stored="false" />
+ <field name="slash-comments" type="int" indexed="true" stored="true" />
<!-- Dynamic field definitions allow using convention over configuration
for fields via the specification of patterns to match field names.
@@ -247,7 +186,7 @@
<!-- Field to use to determine and enforce document uniqueness.
Unless this field is marked with required="false", it will be a required field
-->
- <uniqueKey>id</uniqueKey>
+ <uniqueKey>link</uniqueKey>
<!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
parsing a query string that isn't explicit about the field. Machine (non-user)
@@ -268,29 +207,13 @@
is added to the index. It's used either to index the same field differently,
or to add multiple fields to the same field for easier/faster searching. -->
- <copyField source="cat" dest="text"/>
- <copyField source="name" dest="text"/>
- <copyField source="manu" dest="text"/>
- <copyField source="features" dest="text"/>
- <copyField source="includes" dest="text"/>
- <copyField source="manu" dest="manu_exact"/>
-
- <!-- Copy the price into a currency enabled field (default USD) -->
- <copyField source="price" dest="price_c"/>
-
- <!-- Text fields from SolrCell to search by default in our catch-all field -->
- <copyField source="title" dest="text"/>
- <copyField source="author" dest="text"/>
- <copyField source="description" dest="text"/>
- <copyField source="keywords" dest="text"/>
- <copyField source="content" dest="text"/>
- <copyField source="content_type" dest="text"/>
- <copyField source="resourcename" dest="text"/>
- <copyField source="url" dest="text"/>
-
- <!-- Create a string version of author for faceting -->
- <copyField source="author" dest="author_s"/>
-
+ <copyField source="source" dest="text"/>
+ <copyField source="subject" dest="text"/>
+ <copyField source="title" dest="text"/>
+ <copyField source="description" dest="text"/>
+ <copyField source="creator" dest="text"/>
+ <copyField source="item-subject" dest="text"/>
+
<!-- Above, multiple source fields are copied to the [text] field.
Another way to map multiple source fields to the same
destination field is to use the dynamic field syntax.
@@ -310,6 +233,33 @@
standard package such as org.apache.solr.analysis
-->
+ <fieldtype name="html" stored="true" indexed="true" class="solr.TextField">
+ <analyzer type="index">
+ <charFilter class="solr.HTMLStripCharFilterFactory"/>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <!-- in this example, we will only use synonyms at query time
+ <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+ -->
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ </analyzer>
+ <analyzer type="query">
+ <charFilter class="solr.HTMLStripCharFilterFactory"/>
+ <tokenizer class="solr.StandardTokenizerFactory"/>
+ <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+ <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+ <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
+ <filter class="solr.LowerCaseFilterFactory"/>
+ <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+ <filter class="solr.PorterStemFilterFactory"/>
+ <filter class="solr.RemoveDuplicatesTokenFilterFactory"/>
+ </analyzer>
+ </fieldtype>
+
<!-- The StrField type is not analyzed, but indexed/stored verbatim.
It supports doc values but in that case the field needs to be
single-valued and either required or have a default value.
Copied: lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/solrconfig.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/solrconfig.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/solrconfig.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/rss/conf/solrconfig.xml Wed Apr 9 20:05:25 2014
@@ -72,17 +72,19 @@
The examples below can be used to load some solr-contribs along
with their external dependencies.
-->
- <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
- <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
+ <lib dir="../../../../contrib/extraction/lib" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-cell-\d.*\.jar" />
- <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
+ <lib dir="../../../../contrib/clustering/lib/" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-clustering-\d.*\.jar" />
- <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
+ <lib dir="../../../../contrib/langid/lib/" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-langid-\d.*\.jar" />
+
+ <lib dir="../../../../contrib/velocity/lib" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-velocity-\d.*\.jar" />
<!-- an exact 'path' can be used instead of a 'dir' to specify a
specific jar file. This will cause a serious error to be logged
@@ -803,6 +805,13 @@
not be initialized until the first request that uses it.
-->
+
+ <requestHandler name="/dataimport" class="solr.DataImportHandler">
+ <lst name="defaults">
+ <str name="config">rss-data-config.xml</str>
+ </lst>
+ </requestHandler>
+
<!-- SearchHandler
http://wiki.apache.org/solr/SearchHandler
Copied: lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/schema.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/schema.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/schema.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/schema.xml Wed Apr 9 20:05:25 2014
@@ -45,7 +45,7 @@
that avoids logging every request
-->
-<schema name="example" version="1.5">
+<schema name="example-DIH-solr" version="1.5">
<!-- attribute "name" is the name of this schema and is only used for display purposes.
version="x.y" is Solr's version number for the schema syntax and
semantics. It should not normally be changed by applications.
Copied: lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/solrconfig.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/solrconfig.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/solrconfig.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/solr/conf/solrconfig.xml Wed Apr 9 20:05:25 2014
@@ -72,17 +72,19 @@
The examples below can be used to load some solr-contribs along
with their external dependencies.
-->
- <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
- <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
+ <lib dir="../../../../contrib/extraction/lib" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-cell-\d.*\.jar" />
- <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
+ <lib dir="../../../../contrib/clustering/lib/" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-clustering-\d.*\.jar" />
- <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
+ <lib dir="../../../../contrib/langid/lib/" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-langid-\d.*\.jar" />
+
+ <lib dir="../../../../contrib/velocity/lib" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-velocity-\d.*\.jar" />
<!-- an exact 'path' can be used instead of a 'dir' to specify a
specific jar file. This will cause a serious error to be logged
@@ -803,6 +805,13 @@
not be initialized until the first request that uses it.
-->
+
+ <requestHandler name="/dataimport" class="solr.DataImportHandler">
+ <lst name="defaults">
+ <str name="config">solr-data-config.xml</str>
+ </lst>
+ </requestHandler>
+
<!-- SearchHandler
http://wiki.apache.org/solr/SearchHandler
Copied: lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/schema.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/schema.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/schema.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/schema.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/schema.xml Wed Apr 9 20:05:25 2014
@@ -45,7 +45,7 @@
that avoids logging every request
-->
-<schema name="example" version="1.5">
+<schema name="example-DIH-tika" version="1.5">
<!-- attribute "name" is the name of this schema and is only used for display purposes.
version="x.y" is Solr's version number for the schema syntax and
semantics. It should not normally be changed by applications.
@@ -106,91 +106,13 @@
trailing underscores (e.g. _version_) are reserved.
-->
- <!-- If you remove this field, you must _also_ disable the update log in solrconfig.xml
- or Solr won't start. _version_ and update log are required for SolrCloud
- -->
- <field name="_version_" type="long" indexed="true" stored="true"/>
-
- <!-- points to the root document of a block of nested documents. Required for nested
- document support, may be removed otherwise
- -->
- <field name="_root_" type="string" indexed="true" stored="false"/>
-
- <!-- Only remove the "id" field if you have a very good reason to. While not strictly
- required, it is highly recommended. A <uniqueKey> is present in almost all Solr
- installations. See the <uniqueKey> declaration below where <uniqueKey> is set to "id".
- -->
- <field name="id" type="string" indexed="true" stored="true" required="true" multiValued="false" />
-
- <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
- <field name="name" type="text_general" indexed="true" stored="true"/>
- <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
- <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
- <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
- <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
-
- <field name="weight" type="float" indexed="true" stored="true"/>
- <field name="price" type="float" indexed="true" stored="true"/>
- <field name="popularity" type="int" indexed="true" stored="true" />
- <field name="inStock" type="boolean" indexed="true" stored="true" />
-
- <field name="store" type="location" indexed="true" stored="true"/>
-
- <!-- Common metadata fields, named specifically to match up with
- SolrCell metadata when parsing rich documents such as Word, PDF.
- Some fields are multiValued only because Tika currently may return
- multiple values for them. Some metadata is parsed from the documents,
- but there are some which come from the client context:
- "content_type": From the HTTP headers of incoming stream
- "resourcename": From SolrCell request param resource.name
- -->
<field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
- <field name="subject" type="text_general" indexed="true" stored="true"/>
- <field name="description" type="text_general" indexed="true" stored="true"/>
- <field name="comments" type="text_general" indexed="true" stored="true"/>
<field name="author" type="text_general" indexed="true" stored="true"/>
- <field name="keywords" type="text_general" indexed="true" stored="true"/>
- <field name="category" type="text_general" indexed="true" stored="true"/>
- <field name="resourcename" type="text_general" indexed="true" stored="true"/>
- <field name="url" type="text_general" indexed="true" stored="true"/>
- <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
- <field name="last_modified" type="date" indexed="true" stored="true"/>
- <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
-
- <!-- Main body of document extracted by SolrCell.
- NOTE: This field is not indexed by default, since it is also copied to "text"
- using copyField below. This is to save space. Use this field for returning and
- highlighting document content. Use the "text" field to search the content. -->
- <field name="content" type="text_general" indexed="false" stored="true" multiValued="true"/>
-
<!-- catchall field, containing all other searchable text fields (implemented
via copyField further on in this schema -->
<field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
- <!-- catchall text field that indexes tokens both normally and in reverse for efficient
- leading wildcard queries. -->
- <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
-
- <!-- non-tokenized version of manufacturer to make it easier to sort or group
- results by manufacturer. copied from "manu" via copyField -->
- <field name="manu_exact" type="string" indexed="true" stored="false"/>
-
- <field name="payloads" type="payloads" indexed="true" stored="true"/>
-
-
- <!--
- Some fields such as popularity and manu_exact could be modified to
- leverage doc values:
- <field name="popularity" type="int" indexed="true" stored="true" docValues="true" />
- <field name="manu_exact" type="string" indexed="false" stored="false" docValues="true" />
- <field name="cat" type="string" indexed="true" stored="true" docValues="true" multiValued="true"/>
-
-
- Although it would make indexing slightly slower and the index bigger, it
- would also make the index faster to load, more memory-efficient and more
- NRT-friendly.
- -->
<!-- Dynamic field definitions allow using convention over configuration
for fields via the specification of patterns to match field names.
@@ -228,8 +150,6 @@
<dynamicField name="*_td" type="tdouble" indexed="true" stored="true"/>
<dynamicField name="*_tdt" type="tdate" indexed="true" stored="true"/>
- <dynamicField name="*_c" type="currency" indexed="true" stored="true"/>
-
<dynamicField name="ignored_*" type="ignored" multiValued="true"/>
<dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
@@ -247,7 +167,7 @@
<!-- Field to use to determine and enforce document uniqueness.
Unless this field is marked with required="false", it will be a required field
-->
- <uniqueKey>id</uniqueKey>
+ <!-- <uniqueKey>id</uniqueKey> -->
<!-- DEPRECATED: The defaultSearchField is consulted by various query parsers when
parsing a query string that isn't explicit about the field. Machine (non-user)
@@ -263,44 +183,6 @@
The default is OR, which is generally assumed so it is not a good idea to change it
globally here. The "q.op" request parameter takes precedence over this.
<solrQueryParser defaultOperator="OR"/> -->
-
- <!-- copyField commands copy one field to another at the time a document
- is added to the index. It's used either to index the same field differently,
- or to add multiple fields to the same field for easier/faster searching. -->
-
- <copyField source="cat" dest="text"/>
- <copyField source="name" dest="text"/>
- <copyField source="manu" dest="text"/>
- <copyField source="features" dest="text"/>
- <copyField source="includes" dest="text"/>
- <copyField source="manu" dest="manu_exact"/>
-
- <!-- Copy the price into a currency enabled field (default USD) -->
- <copyField source="price" dest="price_c"/>
-
- <!-- Text fields from SolrCell to search by default in our catch-all field -->
- <copyField source="title" dest="text"/>
- <copyField source="author" dest="text"/>
- <copyField source="description" dest="text"/>
- <copyField source="keywords" dest="text"/>
- <copyField source="content" dest="text"/>
- <copyField source="content_type" dest="text"/>
- <copyField source="resourcename" dest="text"/>
- <copyField source="url" dest="text"/>
-
- <!-- Create a string version of author for faceting -->
- <copyField source="author" dest="author_s"/>
-
- <!-- Above, multiple source fields are copied to the [text] field.
- Another way to map multiple source fields to the same
- destination field is to use the dynamic field syntax.
- copyField also supports a maxChars to copy setting. -->
-
- <!-- <copyField source="*_t" dest="text" maxChars="3000"/> -->
-
- <!-- copy name to alphaNameSort, a field designed for sorting by name -->
- <!-- <copyField source="name" dest="alphaNameSort"/> -->
-
<!-- field type definitions. The "name" attribute is
just a label to be used by field definitions. The "class"
@@ -443,46 +325,26 @@
<!-- A general text field that has reasonable, generic
cross-language defaults: it tokenizes with StandardTokenizer,
- removes stop words from case-insensitive "stopwords.txt"
- (empty by default), and down cases. At query time only, it
- also applies synonyms. -->
+ and down cases. -->
<fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
- <!-- in this example, we will only use synonyms at query time
- <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
- -->
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
<!-- A text field with defaults appropriate for English: it
- tokenizes with StandardTokenizer, removes English stop words
- (lang/stopwords_en.txt), down cases, protects words from protwords.txt, and
- finally applies Porter's stemming. The query time analyzer
- also applies synonyms from synonyms.txt. -->
+ tokenizes with StandardTokenizer, down cases, and
+ finally applies Porter's stemming. -->
<fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- in this example, we will only use synonyms at query time
- <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
- -->
- <!-- Case insensitive stop word removal.
- -->
- <filter class="solr.StopFilterFactory"
- ignoreCase="true"
- words="lang/stopwords_en.txt"
- />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/>
-->
@@ -490,14 +352,8 @@
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory"
- ignoreCase="true"
- words="lang/stopwords_en.txt"
- />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.EnglishPossessiveFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
<filter class="solr.EnglishMinimalStemFilterFactory"/>
-->
@@ -517,30 +373,14 @@
<fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer type="index">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <!-- in this example, we will only use synonyms at query time
- <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
- -->
- <!-- Case insensitive stop word removal.
- -->
- <filter class="solr.StopFilterFactory"
- ignoreCase="true"
- words="lang/stopwords_en.txt"
- />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory"
- ignoreCase="true"
- words="lang/stopwords_en.txt"
- />
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.PorterStemFilterFactory"/>
</analyzer>
</fieldType>
@@ -550,11 +390,8 @@
<fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
<analyzer>
<tokenizer class="solr.WhitespaceTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_en.txt"/>
<filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
<filter class="solr.EnglishMinimalStemFilterFactory"/>
<!-- this filter can remove any duplicate tokens that appear at the same position - sometimes
possible with WordDelimiterFilter in conjuncton with stemming. -->
@@ -567,15 +404,12 @@
<fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
<analyzer type="index">
<tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
</analyzer>
<analyzer type="query">
<tokenizer class="solr.StandardTokenizerFactory"/>
- <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" />
<filter class="solr.LowerCaseFilterFactory"/>
</analyzer>
</fieldType>
@@ -708,19 +542,7 @@
<fieldType name="location_rpt" class="solr.SpatialRecursivePrefixTreeFieldType"
geo="true" distErrPct="0.025" maxDistErr="0.000009" units="degrees" />
- <!-- Money/currency field type. See http://wiki.apache.org/solr/MoneyFieldType
- Parameters:
- defaultCurrency: Specifies the default currency if none specified. Defaults to "USD"
- precisionStep: Specifies the precisionStep for the TrieLong field used for the amount
- providerClass: Lets you plug in other exchange provider backend:
- solr.FileExchangeRateProvider is the default and takes one parameter:
- currencyConfig: name of an xml file holding exchange rates
- solr.OpenExchangeRatesOrgProvider uses rates from openexchangerates.org:
- ratesFileLocation: URL or path to rates JSON file (default latest.json on the web)
- refreshInterval: Number of minutes between each rates fetch (default: 1440, min: 60)
- -->
- <fieldType name="currency" class="solr.CurrencyField" precisionStep="8" defaultCurrency="USD" currencyConfig="currency.xml" />
-
+
<!-- some examples for different languages (generally ordered by ISO code) -->
@@ -731,7 +553,6 @@
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- for any non-arabic -->
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ar.txt" />
<!-- normalizes ﻯ to ﻱ, etc -->
<filter class="solr.ArabicNormalizationFilterFactory"/>
<filter class="solr.ArabicStemFilterFactory"/>
@@ -743,8 +564,7 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_bg.txt" />
- <filter class="solr.BulgarianStemFilterFactory"/>
+ <filter class="solr.BulgarianStemFilterFactory"/>
</analyzer>
</fieldType>
@@ -752,11 +572,8 @@
<fieldType name="text_ca" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- removes l', etc -->
- <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ca.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ca.txt" />
- <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
+ <filter class="solr.SnowballPorterFilterFactory" language="Catalan"/>
</analyzer>
</fieldType>
@@ -779,7 +596,6 @@
<filter class="solr.SoraniNormalizationFilterFactory"/>
<!-- for any latin text -->
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ckb.txt"/>
<filter class="solr.SoraniStemFilterFactory"/>
</analyzer>
</fieldType>
@@ -789,8 +605,7 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_cz.txt" />
- <filter class="solr.CzechStemFilterFactory"/>
+ <filter class="solr.CzechStemFilterFactory"/>
</analyzer>
</fieldType>
@@ -799,8 +614,7 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_da.txt" format="snowball" />
- <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
+ <filter class="solr.SnowballPorterFilterFactory" language="Danish"/>
</analyzer>
</fieldType>
@@ -809,7 +623,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_de.txt" format="snowball" />
<filter class="solr.GermanNormalizationFilterFactory"/>
<filter class="solr.GermanLightStemFilterFactory"/>
<!-- less aggressive: <filter class="solr.GermanMinimalStemFilterFactory"/> -->
@@ -823,7 +636,6 @@
<tokenizer class="solr.StandardTokenizerFactory"/>
<!-- greek specific lowercase for sigma -->
<filter class="solr.GreekLowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_el.txt" />
<filter class="solr.GreekStemFilterFactory"/>
</analyzer>
</fieldType>
@@ -833,7 +645,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_es.txt" format="snowball" />
<filter class="solr.SpanishLightStemFilterFactory"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Spanish"/> -->
</analyzer>
@@ -844,7 +655,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_eu.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Basque"/>
</analyzer>
</fieldType>
@@ -858,7 +668,6 @@
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ArabicNormalizationFilterFactory"/>
<filter class="solr.PersianNormalizationFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fa.txt" />
</analyzer>
</fieldType>
@@ -867,7 +676,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fi.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Finnish"/>
<!-- less aggressive: <filter class="solr.FinnishLightStemFilterFactory"/> -->
</analyzer>
@@ -877,10 +685,7 @@
<fieldType name="text_fr" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- removes l', etc -->
- <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_fr.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_fr.txt" format="snowball" />
<filter class="solr.FrenchLightStemFilterFactory"/>
<!-- less aggressive: <filter class="solr.FrenchMinimalStemFilterFactory"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="French"/> -->
@@ -891,12 +696,7 @@
<fieldType name="text_ga" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- removes d', etc -->
- <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_ga.txt"/>
- <!-- removes n-, etc. position increments is intentionally false! -->
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/hyphenations_ga.txt"/>
<filter class="solr.IrishLowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ga.txt"/>
<filter class="solr.SnowballPorterFilterFactory" language="Irish"/>
</analyzer>
</fieldType>
@@ -906,7 +706,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_gl.txt" />
<filter class="solr.GalicianStemFilterFactory"/>
<!-- less aggressive: <filter class="solr.GalicianMinimalStemFilterFactory"/> -->
</analyzer>
@@ -921,7 +720,6 @@
<filter class="solr.IndicNormalizationFilterFactory"/>
<!-- normalizes variation in spelling -->
<filter class="solr.HindiNormalizationFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hi.txt" />
<filter class="solr.HindiStemFilterFactory"/>
</analyzer>
</fieldType>
@@ -931,7 +729,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hu.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Hungarian"/>
<!-- less aggressive: <filter class="solr.HungarianLightStemFilterFactory"/> -->
</analyzer>
@@ -942,7 +739,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_hy.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Armenian"/>
</analyzer>
</fieldType>
@@ -952,7 +748,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_id.txt" />
<!-- for a less aggressive approach (only inflectional suffixes), set stemDerivational to false -->
<filter class="solr.IndonesianStemFilterFactory" stemDerivational="true"/>
</analyzer>
@@ -962,10 +757,7 @@
<fieldType name="text_it" class="solr.TextField" positionIncrementGap="100">
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
- <!-- removes l', etc -->
- <filter class="solr.ElisionFilterFactory" ignoreCase="true" articles="lang/contractions_it.txt"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_it.txt" format="snowball" />
<filter class="solr.ItalianLightStemFilterFactory"/>
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Italian"/> -->
</analyzer>
@@ -1001,22 +793,15 @@
userDictionary: user dictionary filename
userDictionaryEncoding: user dictionary encoding (default is UTF-8)
- See lang/userdict_ja.txt for a sample user dictionary file.
-
Punctuation characters are discarded by default. Use discardPunctuation="false" to keep them.
See http://wiki.apache.org/solr/JapaneseLanguageSupport for more on Japanese language support.
-->
<tokenizer class="solr.JapaneseTokenizerFactory" mode="search"/>
- <!--<tokenizer class="solr.JapaneseTokenizerFactory" mode="search" userDictionary="lang/userdict_ja.txt"/>-->
<!-- Reduces inflected verbs and adjectives to their base/dictionary forms (è¾æ¸å½¢) -->
<filter class="solr.JapaneseBaseFormFilterFactory"/>
- <!-- Removes tokens with certain part-of-speech tags -->
- <filter class="solr.JapanesePartOfSpeechStopFilterFactory" tags="lang/stoptags_ja.txt" />
<!-- Normalizes full-width romaji to half-width and half-width kana to full-width (Unicode NFKC subset) -->
<filter class="solr.CJKWidthFilterFactory"/>
- <!-- Removes common tokens typically not useful for search, but have a negative effect on ranking -->
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ja.txt" />
<!-- Normalizes common katakana spelling variations by removing any last long sound character (U+30FC) -->
<filter class="solr.JapaneseKatakanaStemFilterFactory" minimumLength="4"/>
<!-- Lower-cases romaji characters -->
@@ -1029,7 +814,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_lv.txt" />
<filter class="solr.LatvianStemFilterFactory"/>
</analyzer>
</fieldType>
@@ -1039,8 +823,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_nl.txt" format="snowball" />
- <filter class="solr.StemmerOverrideFilterFactory" dictionary="lang/stemdict_nl.txt" ignoreCase="false"/>
<filter class="solr.SnowballPorterFilterFactory" language="Dutch"/>
</analyzer>
</fieldType>
@@ -1050,7 +832,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_no.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Norwegian"/>
<!-- less aggressive: <filter class="solr.NorwegianLightStemFilterFactory" variant="nb"/> -->
<!-- singular/plural: <filter class="solr.NorwegianMinimalStemFilterFactory" variant="nb"/> -->
@@ -1063,7 +844,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_pt.txt" format="snowball" />
<filter class="solr.PortugueseLightStemFilterFactory"/>
<!-- less aggressive: <filter class="solr.PortugueseMinimalStemFilterFactory"/> -->
<!-- more aggressive: <filter class="solr.SnowballPorterFilterFactory" language="Portuguese"/> -->
@@ -1076,7 +856,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ro.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Romanian"/>
</analyzer>
</fieldType>
@@ -1086,7 +865,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_ru.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Russian"/>
<!-- less aggressive: <filter class="solr.RussianLightStemFilterFactory"/> -->
</analyzer>
@@ -1097,7 +875,6 @@
<analyzer>
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_sv.txt" format="snowball" />
<filter class="solr.SnowballPorterFilterFactory" language="Swedish"/>
<!-- less aggressive: <filter class="solr.SwedishLightStemFilterFactory"/> -->
</analyzer>
@@ -1109,7 +886,6 @@
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.LowerCaseFilterFactory"/>
<filter class="solr.ThaiWordFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="true" words="lang/stopwords_th.txt" />
</analyzer>
</fieldType>
@@ -1119,7 +895,6 @@
<tokenizer class="solr.StandardTokenizerFactory"/>
<filter class="solr.ApostropheFilterFactory"/>
<filter class="solr.TurkishLowerCaseFilterFactory"/>
- <filter class="solr.StopFilterFactory" ignoreCase="false" words="lang/stopwords_tr.txt" />
<filter class="solr.SnowballPorterFilterFactory" language="Turkish"/>
</analyzer>
</fieldType>
Copied: lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/solrconfig.xml (from r1586108, lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml)
URL: http://svn.apache.org/viewvc/lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/solrconfig.xml?p2=lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/solrconfig.xml&p1=lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml&r1=1586108&r2=1586120&rev=1586120&view=diff
==============================================================================
--- lucene/dev/trunk/solr/example/solr/collection1/conf/solrconfig.xml (original)
+++ lucene/dev/trunk/solr/example/example-DIH/solr/tika/conf/solrconfig.xml Wed Apr 9 20:05:25 2014
@@ -72,17 +72,20 @@
The examples below can be used to load some solr-contribs along
with their external dependencies.
-->
- <lib dir="../../../contrib/extraction/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-cell-\d.*\.jar" />
+ <lib dir="../../../../contrib/dataimporthandler/lib/" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-dataimporthandler-.*\.jar" />
- <lib dir="../../../contrib/clustering/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-clustering-\d.*\.jar" />
+ <lib dir="../../../../contrib/extraction/lib" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-cell-\d.*\.jar" />
- <lib dir="../../../contrib/langid/lib/" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-langid-\d.*\.jar" />
+ <lib dir="../../../../contrib/clustering/lib/" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-clustering-\d.*\.jar" />
- <lib dir="../../../contrib/velocity/lib" regex=".*\.jar" />
- <lib dir="../../../dist/" regex="solr-velocity-\d.*\.jar" />
+ <lib dir="../../../../contrib/langid/lib/" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-langid-\d.*\.jar" />
+
+ <lib dir="../../../../contrib/velocity/lib" regex=".*\.jar" />
+ <lib dir="../../../../dist/" regex="solr-velocity-\d.*\.jar" />
<!-- an exact 'path' can be used instead of a 'dir' to specify a
specific jar file. This will cause a serious error to be logged
@@ -353,9 +356,11 @@
is recommended (see below).
"dir" - the target directory for transaction logs, defaults to the
solr data directory. -->
+ <!--
<updateLog>
<str name="dir">${solr.ulog.dir:}</str>
</updateLog>
+ -->
<!-- AutoCommit
@@ -803,6 +808,13 @@
not be initialized until the first request that uses it.
-->
+
+ <requestHandler name="/dataimport" class="solr.DataImportHandler">
+ <lst name="defaults">
+ <str name="config">tika-data-config.xml</str>
+ </lst>
+ </requestHandler>
+
<!-- SearchHandler
http://wiki.apache.org/solr/SearchHandler
@@ -1558,31 +1570,6 @@
</requestHandler>
- <!-- Query Elevation Component
-
- http://wiki.apache.org/solr/QueryElevationComponent
-
- a search component that enables you to configure the top
- results for a given query regardless of the normal lucene
- scoring.
- -->
- <searchComponent name="elevator" class="solr.QueryElevationComponent" >
- <!-- pick a fieldType to analyze queries -->
- <str name="queryFieldType">string</str>
- <str name="config-file">elevate.xml</str>
- </searchComponent>
-
- <!-- A request handler for demonstrating the elevator component -->
- <requestHandler name="/elevate" class="solr.SearchHandler" startup="lazy">
- <lst name="defaults">
- <str name="echoParams">explicit</str>
- <str name="df">text</str>
- </lst>
- <arr name="last-components">
- <str>elevator</str>
- </arr>
- </requestHandler>
-
<!-- Highlighting Component
http://wiki.apache.org/solr/HighlightingParameters