You are viewing a plain text version of this content. The canonical link for it is here.
Posted to solr-commits@lucene.apache.org by gs...@apache.org on 2008/12/06 14:04:28 UTC
svn commit: r723977 [2/2] - in /lucene/solr/trunk: ./ client/java/solrj/src/org/apache/solr/client/solrj/util/ contrib/dataimporthandler/ contrib/extraction/ contrib/extraction/lib/ contrib/extraction/src/ contrib/extraction/src/main/ contrib/extractio...

Propchange: lucene/solr/trunk/contrib/extraction/src/test/java/org/apache/solr/handler/ExtractingRequestHandlerTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/solr/trunk/contrib/extraction/src/test/resources/example.html
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/example.html?rev=723977&view=auto
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/resources/example.html (added)
+++ lucene/solr/trunk/contrib/extraction/src/test/resources/example.html Sat Dec  6 05:04:26 2008
@@ -0,0 +1,49 @@
+<html>
+<head>
+  <title>Welcome to Solr</title>
+</head>
+<body>
+<p>
+  Here is some text
+</p>
+<div>Here is some text in a div</div>
+<div>This has a <a href="http://www.apache.org">link</a>.</div>
+<a href="#news">News</a>
+<ul class="minitoc">
+<li>
+<a href="#03+October+2008+-+Solr+Logo+Contest">03 October 2008 - Solr Logo Contest</a>
+</li>
+<li>
+<a href="#15+September+2008+-+Solr+1.3.0+Available">15 September 2008 - Solr 1.3.0 Available</a>
+</li>
+<li>
+<a href="#28+August+2008+-+Lucene%2FSolr+at+ApacheCon+New+Orleans">28 August 2008 - Lucene/Solr at ApacheCon New Orleans</a>
+</li>
+<li>
+<a href="#03+September+2007+-+Lucene+at+ApacheCon+Atlanta">03 September 2007 - Lucene at ApacheCon Atlanta</a>
+</li>
+<li>
+<a href="#06+June+2007%3A+Release+1.2+available">06 June 2007: Release 1.2 available</a>
+</li>
+<li>
+<a href="#17+January+2007%3A+Solr+graduates+from+Incubator">17 January 2007: Solr graduates from Incubator</a>
+</li>
+<li>
+<a href="#22+December+2006%3A+Release+1.1.0+available">22 December 2006: Release 1.1.0 available</a>
+</li>
+<li>
+<a href="#15+August+2006%3A+Solr+at+ApacheCon+US">15 August 2006: Solr at ApacheCon US</a>
+</li>
+<li>
+<a href="#21+April+2006%3A+Solr+at+ApacheCon">21 April 2006: Solr at ApacheCon</a>
+</li>
+<li>
+<a href="#21+February+2006%3A+nightly+builds">21 February 2006: nightly builds</a>
+</li>
+<li>
+<a href="#17+January+2006%3A+Solr+Joins+Apache+Incubator">17 January 2006: Solr Joins Apache Incubator</a>
+</li>
+</ul>
+
+</body>
+</html>

Propchange: lucene/solr/trunk/contrib/extraction/src/test/resources/example.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/solr/trunk/contrib/extraction/src/test/resources/simple.html
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/simple.html?rev=723977&view=auto
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/resources/simple.html (added)
+++ lucene/solr/trunk/contrib/extraction/src/test/resources/simple.html Sat Dec  6 05:04:26 2008
@@ -0,0 +1,12 @@
+<html>
+<head>
+  <title>Welcome to Solr</title>
+</head>
+<body>
+<p>
+  Here is some text
+</p>
+<div>Here is some text in a div</div>
+<div>This has a <a href="http://www.apache.org">link</a>.</div>
+</body>
+</html>

Propchange: lucene/solr/trunk/contrib/extraction/src/test/resources/simple.html
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/solr/trunk/contrib/extraction/src/test/resources/solr-word.pdf
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/solr-word.pdf?rev=723977&view=auto
==============================================================================
Binary file - no diff available.

Propchange: lucene/solr/trunk/contrib/extraction/src/test/resources/solr-word.pdf
------------------------------------------------------------------------------
    svn:mime-type = application/pdf

Added: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/protwords.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/protwords.txt?rev=723977&view=auto
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/protwords.txt (added)
+++ lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/protwords.txt Sat Dec  6 05:04:26 2008
@@ -0,0 +1,20 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+#use a protected word file to avoid stemming two
+#unrelated words to the same base word.
+#to test, we will use words that would normally obviously be stemmed.
+cats
+ridding

Propchange: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/protwords.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml?rev=723977&view=auto
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml (added)
+++ lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml Sat Dec  6 05:04:26 2008
@@ -0,0 +1,467 @@
+<?xml version="1.0" ?>
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- The Solr schema file. This file should be named "schema.xml" and
+     should be located where the classloader for the Solr webapp can find it.
+
+     This schema is used for testing, and as such has everything and the 
+     kitchen sink thrown in. See example/solr/conf/schema.xml for a 
+     more concise example.
+
+     $Id: schema.xml 382610 2006-03-03 01:43:03Z yonik $
+     $Source: /cvs/main/searching/solr-configs/test/WEB-INF/classes/schema.xml,v $
+     $Name:  $
+  -->
+
+<schema name="test" version="1.0">
+  <types>
+
+    <!-- field type definitions... note that the "name" attribute is
+         just a label to be used by field definitions.  The "class"
+         attribute and any other attributes determine the real type and
+         behavior of the fieldtype.
+      -->
+
+    <!-- numeric field types that store and index the text
+         value verbatim (and hence don't sort correctly or support range queries.)
+         These are provided more for backward compatability, allowing one
+         to create a schema that matches an existing lucene index.
+    -->
+    <fieldType name="integer" class="solr.IntField"/>
+    <fieldType name="long" class="solr.LongField"/>
+    <fieldtype name="float" class="solr.FloatField"/>
+    <fieldType name="double" class="solr.DoubleField"/>
+
+    <!-- numeric field types that manipulate the value into
+       a string value that isn't human readable in it's internal form,
+       but sorts correctly and supports range queries.
+
+         If sortMissingLast="true" then a sort on this field will cause documents
+       without the field to come after documents with the field,
+       regardless of the requested sort order.
+         If sortMissingFirst="true" then a sort on this field will cause documents
+       without the field to come before documents with the field,
+       regardless of the requested sort order.
+         If sortMissingLast="false" and sortMissingFirst="false" (the default),
+       then default lucene sorting will be used which places docs without the field
+       first in an ascending sort and last in a descending sort.
+    -->
+    <fieldtype name="sint" class="solr.SortableIntField" sortMissingLast="true"/>
+    <fieldtype name="slong" class="solr.SortableLongField" sortMissingLast="true"/>
+    <fieldtype name="sfloat" class="solr.SortableFloatField" sortMissingLast="true"/>
+    <fieldtype name="sdouble" class="solr.SortableDoubleField" sortMissingLast="true"/>
+
+    <!-- bcd versions of sortable numeric type may provide smaller
+         storage space and support very large numbers.
+    -->
+    <fieldtype name="bcdint" class="solr.BCDIntField" sortMissingLast="true"/>
+    <fieldtype name="bcdlong" class="solr.BCDLongField" sortMissingLast="true"/>
+    <fieldtype name="bcdstr" class="solr.BCDStrField" sortMissingLast="true"/>
+
+    <!-- Field type demonstrating an Analyzer failure -->
+    <fieldtype name="failtype1" class="solr.TextField">
+      <analyzer type="index">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- Demonstrating ignoreCaseChange -->
+    <fieldtype name="wdf_nocase" class="solr.TextField">
+      <analyzer>
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="0" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+    
+     <fieldtype name="wdf_preserve" class="solr.TextField">
+      <analyzer>
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0" preserveOriginal="1"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+
+
+    <!-- HighlitText optimizes storage for (long) columns which will be highlit -->
+    <fieldtype name="highlittext" class="solr.TextField" compressThreshold="345" />
+
+    <fieldtype name="boolean" class="solr.BoolField" sortMissingLast="true"/>
+    <fieldtype name="string" class="solr.StrField" sortMissingLast="true"/>
+
+    <!-- format for date is 1995-12-31T23:59:59.999Z and only the fractional
+         seconds part (.999) is optional.
+      -->
+    <fieldtype name="date" class="solr.DateField" sortMissingLast="true"/>
+
+    <!-- solr.TextField allows the specification of custom
+         text analyzers specified as a tokenizer and a list
+         of token filters.
+      -->
+    <fieldtype name="text" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StandardFilterFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+        <filter class="solr.StopFilterFactory"/>
+        <!-- lucene PorterStemFilterFactory deprecated
+          <filter class="solr.PorterStemFilterFactory"/>
+        -->
+        <filter class="solr.EnglishPorterFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+
+
+    <fieldtype name="nametext" class="solr.TextField">
+      <analyzer class="org.apache.lucene.analysis.WhitespaceAnalyzer"/>
+    </fieldtype>
+
+    <fieldtype name="teststop" class="solr.TextField">
+       <analyzer>
+        <tokenizer class="solr.LowerCaseTokenizerFactory"/>
+        <filter class="solr.StandardFilterFactory"/>
+        <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- fieldtypes in this section isolate tokenizers and tokenfilters for testing -->
+    <fieldtype name="lowertok" class="solr.TextField">
+      <analyzer><tokenizer class="solr.LowerCaseTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="keywordtok" class="solr.TextField">
+      <analyzer><tokenizer class="solr.KeywordTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="standardtok" class="solr.TextField">
+      <analyzer><tokenizer class="solr.StandardTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="lettertok" class="solr.TextField">
+      <analyzer><tokenizer class="solr.LetterTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="whitetok" class="solr.TextField">
+      <analyzer><tokenizer class="solr.WhitespaceTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="HTMLstandardtok" class="solr.TextField">
+      <analyzer><tokenizer class="solr.HTMLStripStandardTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="HTMLwhitetok" class="solr.TextField">
+      <analyzer><tokenizer class="solr.HTMLStripWhitespaceTokenizerFactory"/></analyzer>
+    </fieldtype>
+    <fieldtype name="standardtokfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StandardFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="standardfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StandardFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="lowerfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="patternreplacefilt" class="solr.TextField">
+      <analyzer type="index">
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+        <filter class="solr.PatternReplaceFilterFactory"
+                pattern="([^a-zA-Z])" replacement="_" replace="all"
+        />
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.KeywordTokenizerFactory"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="porterfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+    <!-- fieldtype name="snowballfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.SnowballPorterFilterFactory"/>
+      </analyzer>
+    </fieldtype -->
+    <fieldtype name="engporterfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.EnglishPorterFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="custengporterfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.EnglishPorterFilterFactory" protected="protwords.txt"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="stopfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="custstopfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" words="stopwords.txt"/>
+      </analyzer>
+    </fieldtype>
+    <fieldtype name="lengthfilt" class="solr.TextField">
+      <analyzer>
+        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <filter class="solr.LengthFilterFactory" min="2" max="5"/>
+      </analyzer>
+    </fieldtype>
+
+    <fieldtype name="subword" class="solr.TextField" multiValued="true" positionIncrementGap="100">
+      <analyzer type="index">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+          <filter class="solr.StopFilterFactory"/>
+          <filter class="solr.EnglishPorterFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+          <filter class="solr.StopFilterFactory"/>
+          <filter class="solr.EnglishPorterFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- more flexible in matching skus, but more chance of a false match -->
+    <fieldtype name="skutype1" class="solr.TextField">
+      <analyzer type="index">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- less flexible in matching skus, but less chance of a false match -->
+    <fieldtype name="skutype2" class="solr.TextField">
+      <analyzer type="index">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldtype>
+
+    <!-- less flexible in matching skus, but less chance of a false match -->
+    <fieldtype name="syn" class="solr.TextField">
+      <analyzer>
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter name="syn" class="solr.SynonymFilterFactory" synonyms="synonyms.txt"/>
+      </analyzer>
+    </fieldtype>
+    
+    <!-- Demonstrates How RemoveDuplicatesTokenFilter makes stemmed
+         synonyms "better"
+      -->
+    <fieldtype name="dedup" class="solr.TextField">
+      <analyzer>
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.SynonymFilterFactory"
+                  synonyms="synonyms.txt" expand="true" />
+          <filter class="solr.EnglishPorterFilterFactory"/>
+          <filter class="solr.RemoveDuplicatesTokenFilterFactory" />
+      </analyzer>
+    </fieldtype>
+
+    <fieldtype  name="unstored" class="solr.StrField" indexed="true" stored="false"/>
+
+
+  <fieldtype name="textgap" class="solr.TextField" multiValued="true" positionIncrementGap="100">
+      <analyzer>
+          <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+          <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+  </fieldtype>
+
+ </types>
+
+
+ <fields>
+   <field name="id" type="integer" indexed="true" stored="true" multiValued="false" required="false"/>
+   <field name="name" type="nametext" indexed="true" stored="true"/>
+   <field name="text" type="text" indexed="true" stored="false"/>
+   <field name="subject" type="text" indexed="true" stored="true"/>
+   <field name="title" type="nametext" indexed="true" stored="true"/>
+   <field name="weight" type="float" indexed="true" stored="true"/>
+   <field name="bday" type="date" indexed="true" stored="true"/>
+
+   <field name="title_stemmed" type="text" indexed="true" stored="false"/>
+   <field name="title_lettertok" type="lettertok" indexed="true" stored="false"/>
+
+   <field name="syn" type="syn" indexed="true" stored="true"/>
+
+   <!-- to test property inheritance and overriding -->
+   <field name="shouldbeunstored" type="unstored" />
+   <field name="shouldbestored" type="unstored" stored="true"/>
+   <field name="shouldbeunindexed" type="unstored" indexed="false" stored="true"/>
+
+
+   <!-- test different combinations of indexed and stored -->
+   <field name="bind" type="boolean" indexed="true" stored="false"/>
+   <field name="bsto" type="boolean" indexed="false" stored="true"/>
+   <field name="bindsto" type="boolean" indexed="true" stored="true"/>
+   <field name="isto" type="integer" indexed="false" stored="true"/>
+   <field name="iind" type="integer" indexed="true" stored="false"/>
+   <field name="ssto" type="string" indexed="false" stored="true"/>
+   <field name="sind" type="string" indexed="true" stored="false"/>
+   <field name="sindsto" type="string" indexed="true" stored="true"/>
+
+   <!-- test combinations of term vector settings -->
+   <field name="test_basictv" type="text" termVectors="true"/>
+   <field name="test_notv" type="text" termVectors="false"/>
+   <field name="test_postv" type="text" termVectors="true" termPositions="true"/>
+   <field name="test_offtv" type="text" termVectors="true" termOffsets="true"/>
+   <field name="test_posofftv" type="text" termVectors="true" 
+     termPositions="true" termOffsets="true"/>
+
+   <!-- test highlit field settings -->
+   <field name="test_hlt" type="highlittext" indexed="true" compressed="true"/>
+   <field name="test_hlt_off" type="highlittext" indexed="true" compressed="false"/>
+
+   <!-- fields to test individual tokenizers and tokenfilters -->
+   <field name="teststop" type="teststop" indexed="true" stored="true"/>
+   <field name="lowertok" type="lowertok" indexed="true" stored="true"/>
+   <field name="keywordtok" type="keywordtok" indexed="true" stored="true"/>
+   <field name="standardtok" type="standardtok" indexed="true" stored="true"/>
+   <field name="HTMLstandardtok" type="HTMLstandardtok" indexed="true" stored="true"/>
+   <field name="lettertok" type="lettertok" indexed="true" stored="true"/>
+   <field name="whitetok" type="whitetok" indexed="true" stored="true"/>
+   <field name="HTMLwhitetok" type="HTMLwhitetok" indexed="true" stored="true"/>
+   <field name="standardtokfilt" type="standardtokfilt" indexed="true" stored="true"/>
+   <field name="standardfilt" type="standardfilt" indexed="true" stored="true"/>
+   <field name="lowerfilt" type="lowerfilt" indexed="true" stored="true"/>
+   <field name="patternreplacefilt" type="patternreplacefilt" indexed="true" stored="true"/>
+   <field name="porterfilt" type="porterfilt" indexed="true" stored="true"/>
+   <field name="engporterfilt" type="engporterfilt" indexed="true" stored="true"/>
+   <field name="custengporterfilt" type="custengporterfilt" indexed="true" stored="true"/>
+   <field name="stopfilt" type="stopfilt" indexed="true" stored="true"/>
+   <field name="custstopfilt" type="custstopfilt" indexed="true" stored="true"/>
+   <field name="lengthfilt" type="lengthfilt" indexed="true" stored="true"/>
+   <field name="dedup" type="dedup" indexed="true" stored="true"/>
+   <field name="wdf_nocase" type="wdf_nocase" indexed="true" stored="true"/>
+   <field name="wdf_preserve" type="wdf_preserve" indexed="true" stored="true"/>
+
+   <field name="numberpartfail" type="failtype1" indexed="true" stored="true"/>
+
+   <field name="nullfirst" type="string" indexed="true" stored="true" sortMissingFirst="true"/>
+
+   <field name="subword" type="subword" indexed="true" stored="true"/>
+   <field name="sku1" type="skutype1" indexed="true" stored="true"/>
+   <field name="sku2" type="skutype2" indexed="true" stored="true"/>
+
+   <field name="textgap" type="textgap" indexed="true" stored="true"/>
+   
+   <field name="timestamp" type="date" indexed="true" stored="true" default="NOW" multiValued="false"/>
+   <field name="multiDefault" type="string" indexed="true" stored="true" default="muLti-Default" multiValued="true"/>
+   <field name="intDefault" type="sint" indexed="true" stored="true" default="42" multiValued="false"/>
+   
+   <field name="extractedDate" type="date" indexed="true" stored="true" multiValued="true"/>
+   <field name="extractedContent" type="text" indexed="true" stored="true" multiValued="true"/>
+   <field name="extractedProducer" type="text" indexed="true" stored="true" multiValued="true"/>
+   <field name="extractedCreator" type="text" indexed="true" stored="true" multiValued="true"/>
+   <field name="extractedKeywords" type="text" indexed="true" stored="true" multiValued="true"/>
+   <field name="extractedAuthor" type="text" indexed="true" stored="true" multiValued="true"/>
+   <field name="extractedLanguage" type="string" indexed="true" stored="true" multiValued="true"/>
+   <field name="resourceName" type="string" indexed="true" stored="true" multiValued="true"/>
+
+   
+   <!-- Dynamic field definitions.  If a field name is not found, dynamicFields
+        will be used if the name matches any of the patterns.
+        RESTRICTION: the glob-like pattern in the name attribute must have
+        a "*" only at the start or the end.
+        EXAMPLE:  name="*_i" will match any field ending in _i (like myid_i, z_i)
+        Longer patterns will be matched first.  if equal size patterns
+        both match, the first appearing in the schema will be used.
+   -->
+   <dynamicField name="*_i"  type="sint"    indexed="true"  stored="true"/>
+   <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
+   <dynamicField name="*_s1"  type="string"  indexed="true"  stored="true" multiValued="false"/>
+   <dynamicField name="*_l"  type="slong"   indexed="true"  stored="true"/>
+   <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
+   <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
+   <dynamicField name="*_f"  type="sfloat"  indexed="true"  stored="true"/>
+   <dynamicField name="*_d"  type="sdouble" indexed="true"  stored="true"/>
+   <dynamicField name="*_dt" type="date"    indexed="true"  stored="true"/>
+   <dynamicField name="*_bcd" type="bcdstr" indexed="true"  stored="true"/>
+
+   <dynamicField name="*_sI" type="string"  indexed="true"  stored="false"/>
+   <dynamicField name="*_sS" type="string"  indexed="false" stored="true"/>
+   <dynamicField name="t_*"  type="text"    indexed="true"  stored="true"/>
+   <dynamicField name="tv_*"  type="text" indexed="true"  stored="true" 
+      termVectors="true" termPositions="true" termOffsets="true"/>
+
+   <dynamicField name="stream_*"  type="text" indexed="true"  stored="true"/>
+   <dynamicField name="Content*"  type="text" indexed="true"  stored="true"/>
+
+
+   <!-- special fields for dynamic copyField test -->
+   <dynamicField name="dynamic_*" type="string" indexed="true" stored="true"/>
+   <dynamicField name="*_dynamic" type="string" indexed="true" stored="true"/>
+  
+   <!-- for testing to ensure that longer patterns are matched first -->
+   <dynamicField name="*aa"  type="string"  indexed="true" stored="true"/>
+   <dynamicField name="*aaa" type="integer" indexed="false" stored="true"/>
+
+   <!-- ignored becuase not stored or indexed -->
+   <dynamicField name="*_ignored" type="text" indexed="false" stored="false"/>
+
+ </fields>
+
+ <defaultSearchField>text</defaultSearchField>
+ <uniqueKey>id</uniqueKey>
+
+  <!-- copyField commands copy one field to another at the time a document
+        is added to the index.  It's used either to index the same field different
+        ways, or to add multiple fields to the same field for easier/faster searching.
+   -->
+   <copyField source="title" dest="title_stemmed"/>
+   <copyField source="title" dest="title_lettertok"/>
+
+   <copyField source="title" dest="text"/>
+   <copyField source="subject" dest="text"/>
+ 
+   <copyField source="*_t" dest="text"/>
+   
+   <!-- dynamic destination -->
+   <copyField source="*_dynamic" dest="dynamic_*"/>
+    
+
+</schema>

Propchange: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/schema.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/solrconfig.xml?rev=723977&view=auto
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/solrconfig.xml (added)
+++ lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/solrconfig.xml Sat Dec  6 05:04:26 2008
@@ -0,0 +1,359 @@
+<?xml version="1.0" ?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<!-- $Id: solrconfig.xml 382610 2006-03-03 01:43:03Z yonik $
+     $Source$
+     $Name$
+  -->
+
+<config>
+
+  <jmx />
+
+  <!-- Used to specify an alternate directory to hold all index data.
+       It defaults to "index" if not present, and should probably
+       not be changed if replication is in use. -->
+  <dataDir>${solr.data.dir:./solr/data}</dataDir>
+
+  <indexDefaults>
+   <!-- Values here affect all index writers and act as a default
+   unless overridden. -->
+    <!-- Values here affect all index writers and act as a default unless overridden. -->
+    <useCompoundFile>false</useCompoundFile>
+    <mergeFactor>10</mergeFactor>
+    <!-- If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
+     -->
+    <!--<maxBufferedDocs>1000</maxBufferedDocs>-->
+    <!-- Tell Lucene when to flush documents to disk.
+    Giving Lucene more memory for indexing means faster indexing at the cost of more RAM
+
+    If both ramBufferSizeMB and maxBufferedDocs is set, then Lucene will flush based on whichever limit is hit first.
+
+    -->
+    <ramBufferSizeMB>32</ramBufferSizeMB>
+    <maxMergeDocs>2147483647</maxMergeDocs>
+    <maxFieldLength>10000</maxFieldLength>
+    <writeLockTimeout>1000</writeLockTimeout>
+    <commitLockTimeout>10000</commitLockTimeout>
+
+    <!-- 
+     Expert: Turn on Lucene's auto commit capability.
+
+     NOTE: Despite the name, this value does not have any relation to Solr's autoCommit functionality
+
+     -->
+    <luceneAutoCommit>false</luceneAutoCommit>
+
+    <!--
+     Expert:
+     The Merge Policy in Lucene controls how merging is handled by Lucene.  The default in 2.3 is the LogByteSizeMergePolicy, previous
+     versions used LogDocMergePolicy.
+
+     LogByteSizeMergePolicy chooses segments to merge based on their size.  The Lucene 2.2 default, LogDocMergePolicy chose when
+     to merge based on number of documents
+
+     Other implementations of MergePolicy must have a no-argument constructor
+     -->
+    <mergePolicy>org.apache.lucene.index.LogByteSizeMergePolicy</mergePolicy>
+
+    <!--
+     Expert:
+     The Merge Scheduler in Lucene controls how merges are performed.  The ConcurrentMergeScheduler (Lucene 2.3 default)
+      can perform merges in the background using separate threads.  The SerialMergeScheduler (Lucene 2.2 default) does not.
+     -->
+    <mergeScheduler>org.apache.lucene.index.ConcurrentMergeScheduler</mergeScheduler>
+    <!-- these are global... can't currently override per index -->
+    <writeLockTimeout>1000</writeLockTimeout>
+    <commitLockTimeout>10000</commitLockTimeout>
+
+    <lockType>single</lockType>
+  </indexDefaults>
+
+  <mainIndex>
+    <!-- lucene options specific to the main on-disk lucene index -->
+    <useCompoundFile>false</useCompoundFile>
+    <mergeFactor>10</mergeFactor>
+    <ramBufferSizeMB>32</ramBufferSizeMB>
+    <maxMergeDocs>2147483647</maxMergeDocs>
+    <maxFieldLength>10000</maxFieldLength>
+
+    <unlockOnStartup>true</unlockOnStartup>
+  </mainIndex>
+
+  <updateHandler class="solr.DirectUpdateHandler2">
+
+    <!-- autocommit pending docs if certain criteria are met 
+    <autoCommit> 
+      <maxDocs>10000</maxDocs>
+      <maxTime>3600000</maxTime> 
+    </autoCommit>
+    -->
+    <!-- represents a lower bound on the frequency that commits may
+    occur (in seconds). NOTE: not yet implemented
+    
+    <commitIntervalLowerBound>0</commitIntervalLowerBound>
+    -->
+
+    <!-- The RunExecutableListener executes an external command.
+         exe - the name of the executable to run
+         dir - dir to use as the current working directory. default="."
+         wait - the calling thread waits until the executable returns. default="true"
+         args - the arguments to pass to the program.  default=nothing
+         env - environment variables to set.  default=nothing
+      -->
+    <!-- A postCommit event is fired after every commit
+    <listener event="postCommit" class="solr.RunExecutableListener">
+      <str name="exe">/var/opt/resin3/__PORT__/scripts/solr/snapshooter</str>
+      <str name="dir">/var/opt/resin3/__PORT__</str>
+      <bool name="wait">true</bool>
+      <arr name="args"> <str>arg1</str> <str>arg2</str> </arr>
+      <arr name="env"> <str>MYVAR=val1</str> </arr>
+    </listener>
+    -->
+
+
+  </updateHandler>
+
+
+  <query>
+    <!-- Maximum number of clauses in a boolean query... can affect
+        range or wildcard queries that expand to big boolean
+        queries.  An exception is thrown if exceeded.
+    -->
+    <maxBooleanClauses>1024</maxBooleanClauses>
+
+
+    <!-- Cache specification for Filters or DocSets - unordered set of *all* documents
+         that match a particular query.
+      -->
+    <filterCache
+      class="solr.search.LRUCache"
+      size="512"
+      initialSize="512"
+      autowarmCount="256"/>
+
+    <queryResultCache
+      class="solr.search.LRUCache"
+      size="512"
+      initialSize="512"
+      autowarmCount="1024"/>
+
+    <documentCache
+      class="solr.search.LRUCache"
+      size="512"
+      initialSize="512"
+      autowarmCount="0"/>
+
+    <!-- If true, stored fields that are not requested will be loaded lazily.
+    -->
+    <enableLazyFieldLoading>true</enableLazyFieldLoading>
+
+    <!--
+
+    <cache name="myUserCache"
+      class="solr.search.LRUCache"
+      size="4096"
+      initialSize="1024"
+      autowarmCount="1024"
+      regenerator="MyRegenerator"
+      />
+    -->
+
+
+    <useFilterForSortedQuery>true</useFilterForSortedQuery>
+
+    <queryResultWindowSize>10</queryResultWindowSize>
+
+    <!-- set maxSize artificially low to exercise both types of sets -->
+    <HashDocSet maxSize="3" loadFactor="0.75"/>
+
+
+    <!-- boolToFilterOptimizer converts boolean clauses with zero boost
+         into cached filters if the number of docs selected by the clause exceeds
+         the threshold (represented as a fraction of the total index)
+    -->
+    <boolTofilterOptimizer enabled="false" cacheSize="32" threshold=".05"/>
+
+
+    <!-- a newSearcher event is fired whenever a new searcher is being prepared
+         and there is a current searcher handling requests (aka registered). -->
+    <!-- QuerySenderListener takes an array of NamedList and executes a
+         local query request for each NamedList in sequence. -->
+    <!--
+    <listener event="newSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+        <lst> <str name="q">solr</str> <str name="start">0</str> <str name="rows">10</str> </lst>
+        <lst> <str name="q">rocks</str> <str name="start">0</str> <str name="rows">10</str> </lst>
+      </arr>
+    </listener>
+    -->
+
+    <!-- a firstSearcher event is fired whenever a new searcher is being
+         prepared but there is no current registered searcher to handle
+         requests or to gain prewarming data from. -->
+    <!--
+    <listener event="firstSearcher" class="solr.QuerySenderListener">
+      <arr name="queries">
+        <lst> <str name="q">fast_warm</str> <str name="start">0</str> <str name="rows">10</str> </lst>
+      </arr>
+    </listener>
+    -->
+
+
+  </query>
+
+
+  <!-- An alternate set representation that uses an integer hash to store filters (sets of docids).
+       If the set cardinality <= maxSize elements, then HashDocSet will be used instead of the bitset
+       based HashBitset. -->
+
+  <!-- requestHandler plugins... incoming queries will be dispatched to the
+     correct handler based on the qt (query type) param matching the
+     name of registered handlers.
+      The "standard" request handler is the default and will be used if qt
+     is not specified in the request.
+  -->
+  <requestHandler name="standard" class="solr.StandardRequestHandler">
+  	<bool name="httpCaching">true</bool>
+  </requestHandler>
+  <requestHandler name="dismaxOldStyleDefaults"
+                  class="solr.DisMaxRequestHandler" >
+     <!-- for historic reasons, DisMaxRequestHandler will use all of
+          it's init params as "defaults" if there is no "defaults" list
+          specified
+     -->
+     <float name="tie">0.01</float>
+     <str name="qf">
+        text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
+     </str>
+     <str name="pf">
+        text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
+     </str>
+     <str name="bf">
+        ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
+     </str>
+     <str name="mm">
+        3&lt;-1 5&lt;-2 6&lt;90%
+     </str>
+     <int name="ps">100</int>
+  </requestHandler>
+  <requestHandler name="dismax" class="solr.DisMaxRequestHandler" >
+    <lst name="defaults">
+     <str name="q.alt">*:*</str>
+     <float name="tie">0.01</float>
+     <str name="qf">
+        text^0.5 features_t^1.0 subject^1.4 title_stemmed^2.0
+     </str>
+     <str name="pf">
+        text^0.2 features_t^1.1 subject^1.4 title_stemmed^2.0 title^1.5
+     </str>
+     <str name="bf">
+        ord(weight)^0.5 recip(rord(iind),1,1000,1000)^0.3
+     </str>
+     <str name="mm">
+        3&lt;-1 5&lt;-2 6&lt;90%
+     </str>
+     <int name="ps">100</int>
+    </lst>
+  </requestHandler>
+  <requestHandler name="old" class="solr.tst.OldRequestHandler" >
+    <int name="myparam">1000</int>
+    <float name="ratio">1.4142135</float>
+    <arr name="myarr"><int>1</int><int>2</int></arr>
+    <str>foo</str>
+  </requestHandler>
+  <requestHandler name="oldagain" class="solr.tst.OldRequestHandler" >
+    <lst name="lst1"> <str name="op">sqrt</str> <int name="val">2</int> </lst>
+    <lst name="lst2"> <str name="op">log</str> <float name="val">10</float> </lst>
+  </requestHandler>
+
+  <requestHandler name="test" class="solr.tst.TestRequestHandler" />
+
+  <!-- test query parameter defaults -->
+  <requestHandler name="defaults" class="solr.StandardRequestHandler">
+    <lst name="defaults">
+      <int name="rows">4</int>
+      <bool name="hl">true</bool>
+      <str name="hl.fl">text,name,subject,title,whitetok</str>
+    </lst>
+  </requestHandler>
+
+  <!-- test query parameter defaults -->
+  <requestHandler name="lazy" class="solr.StandardRequestHandler" startup="lazy">
+    <lst name="defaults">
+      <int name="rows">4</int>
+      <bool name="hl">true</bool>
+      <str name="hl.fl">text,name,subject,title,whitetok</str>
+    </lst>
+  </requestHandler>
+
+  <requestHandler name="/update"     class="solr.XmlUpdateRequestHandler"          />
+  <requestHandler name="/update/csv" class="solr.CSVRequestHandler" startup="lazy">
+  	<bool name="httpCaching">false</bool>
+  </requestHandler>
+  
+  <requestHandler name="/update/extract" class="org.apache.solr.handler.ExtractingRequestHandler"/>
+
+
+  <highlighting>
+   <!-- Configure the standard fragmenter -->
+   <fragmenter name="gap" class="org.apache.solr.highlight.GapFragmenter" default="true">
+    <lst name="defaults">
+     <int name="hl.fragsize">100</int>
+    </lst>
+   </fragmenter>
+
+   <fragmenter name="regex" class="org.apache.solr.highlight.RegexFragmenter">
+    <lst name="defaults">
+     <int name="hl.fragsize">70</int>
+    </lst>
+   </fragmenter>
+
+   <!-- Configure the standard formatter -->
+   <formatter name="html" class="org.apache.solr.highlight.HtmlFormatter" default="true">
+    <lst name="defaults">
+     <str name="hl.simple.pre"><![CDATA[<em>]]></str>
+     <str name="hl.simple.post"><![CDATA[</em>]]></str>
+    </lst>
+   </formatter>
+  </highlighting>
+
+
+  <!-- enable streaming for testing... -->
+  <requestDispatcher handleSelect="true" >
+    <requestParsers enableRemoteStreaming="true" multipartUploadLimitInKB="2048" />
+    <httpCaching lastModifiedFrom="openTime" etagSeed="Solr" never304="false">
+      <cacheControl>max-age=30, public</cacheControl>
+    </httpCaching>
+  </requestDispatcher>
+
+  <admin>
+    <defaultQuery>solr</defaultQuery>
+    <gettableFiles>solrconfig.xml scheam.xml admin-extra.html</gettableFiles>
+  </admin>
+
+  <!-- test getting system property -->
+  <propTest attr1="${solr.test.sys.prop1}-$${literal}"
+            attr2="${non.existent.sys.prop:default-from-config}">prefix-${solr.test.sys.prop2}-suffix</propTest>
+
+
+
+
+
+</config>

Propchange: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/solrconfig.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/stopwords.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/stopwords.txt?rev=723977&view=auto
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/stopwords.txt (added)
+++ lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/stopwords.txt Sat Dec  6 05:04:26 2008
@@ -0,0 +1,16 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+stopworda
+stopwordb

Propchange: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/stopwords.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/synonyms.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/synonyms.txt?rev=723977&view=auto
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/synonyms.txt (added)
+++ lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/synonyms.txt Sat Dec  6 05:04:26 2008
@@ -0,0 +1,22 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+a => aa
+b => b1 b2
+c => c1,c2
+a\=>a => b\=>b
+a\,a => b\,b
+foo,bar,baz
+
+Television,TV,Televisions

Propchange: lucene/solr/trunk/contrib/extraction/src/test/resources/solr/conf/synonyms.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/solr/trunk/contrib/extraction/src/test/resources/version_control.txt
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/version_control.txt?rev=723977&view=auto
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/resources/version_control.txt (added)
+++ lucene/solr/trunk/contrib/extraction/src/test/resources/version_control.txt Sat Dec  6 05:04:26 2008
@@ -0,0 +1,18 @@
+Solr Version Control System
+ 
+Overview
+ 
+The Solr source code resides in the Apache Subversion (SVN) repository.
+The command-line SVN client can be obtained here or as an optional package
+for cygwin.
+
+The TortoiseSVN GUI client for Windows can be obtained here. There
+are also SVN plugins available for older versions of Eclipse and 
+IntelliJ IDEA that don't have subversion support already included.
+
+-------------------------------
+
+Note: This document is an excerpt from a document Licensed to the
+Apache Software Foundation (ASF) under one or more contributor
+license agreements. See the XML version (version_control.xml) for
+more details.

Propchange: lucene/solr/trunk/contrib/extraction/src/test/resources/version_control.txt
------------------------------------------------------------------------------
    svn:eol-style = native

Added: lucene/solr/trunk/contrib/extraction/src/test/resources/version_control.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/extraction/src/test/resources/version_control.xml?rev=723977&view=auto
==============================================================================
--- lucene/solr/trunk/contrib/extraction/src/test/resources/version_control.xml (added)
+++ lucene/solr/trunk/contrib/extraction/src/test/resources/version_control.xml Sat Dec  6 05:04:26 2008
@@ -0,0 +1,42 @@
+<?xml version="1.0"?>
+<!DOCTYPE document PUBLIC "-//APACHE//DTD Documentation V2.0//EN" "http://forrest.apache.org/dtd/document-v20.dtd">
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements.  See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ (the "License"); you may not use this file except in compliance with
+ the License.  You may obtain a copy of the License at
+
+     http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+-->
+
+<document>
+  
+  <header>
+    <title>Solr Version Control System</title>
+  </header>
+  
+  <body>
+  
+    <section>
+      <title>Overview</title>
+      <p>
+        The Solr source code resides in the Apache <a href="http://subversion.tigris.org/">Subversion (SVN)</a> repository.
+        The command-line SVN client can be obtained <a href="http://subversion.tigris.org/project_packages.html">here</a> or as an optional package for <a href="http://www.cygwin.com/">cygwin</a>.
+        The TortoiseSVN GUI client for Windows can be obtained <a href="http://tortoisesvn.tigris.org/">here</a>. There
+        are also SVN plugins available for older versions of <a href="http://subclipse.tigris.org/">Eclipse</a> and 
+        <a href="http://svnup.tigris.org/">IntelliJ IDEA</a> that don't have subversion support already included.
+      </p>
+    </section>
+    <p>Here is some more text.  It contains <a href="http://lucene.apache.org">a link</a>. </p>
+    <p>Text Here</p>
+  </body>
+  
+</document>

Propchange: lucene/solr/trunk/contrib/extraction/src/test/resources/version_control.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/solr/trunk/contrib/javascript/build.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/javascript/build.xml?rev=723977&r1=723976&r2=723977&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/javascript/build.xml (original)
+++ lucene/solr/trunk/contrib/javascript/build.xml Sat Dec  6 05:04:26 2008
@@ -105,6 +105,8 @@
 	  </target>
 	-->
 
+  <target name="example" depends="build"/>
+	
 
 	<!-- do nothing for now, required for generate maven artifacts -->
   <target name="build"/>

Modified: lucene/solr/trunk/contrib/velocity/build.xml
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/contrib/velocity/build.xml?rev=723977&r1=723976&r2=723977&view=diff
==============================================================================
--- lucene/solr/trunk/contrib/velocity/build.xml (original)
+++ lucene/solr/trunk/contrib/velocity/build.xml Sat Dec  6 05:04:26 2008
@@ -121,4 +121,6 @@
     <!-- TODO: Autolaunch Solr  --> 
   </target>
 
+  <target name="example" depends="build"/>
+
 </project>

Added: lucene/solr/trunk/src/java/org/apache/solr/common/util/DateUtil.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/common/util/DateUtil.java?rev=723977&view=auto
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/common/util/DateUtil.java (added)
+++ lucene/solr/trunk/src/java/org/apache/solr/common/util/DateUtil.java Sat Dec  6 05:04:26 2008
@@ -0,0 +1,200 @@
+package org.apache.solr.common.util;
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.text.DateFormat;
+import java.text.ParseException;
+import java.text.SimpleDateFormat;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Calendar;
+import java.util.Collection;
+import java.util.Date;
+import java.util.Iterator;
+import java.util.Locale;
+import java.util.TimeZone;
+
+
+/**
+ * This class has some code from HttpClient DateUtil.
+ */
+public class DateUtil {
+  //start HttpClient
+  /**
+   * Date format pattern used to parse HTTP date headers in RFC 1123 format.
+   */
+  public static final String PATTERN_RFC1123 = "EEE, dd MMM yyyy HH:mm:ss zzz";
+
+  /**
+   * Date format pattern used to parse HTTP date headers in RFC 1036 format.
+   */
+  public static final String PATTERN_RFC1036 = "EEEE, dd-MMM-yy HH:mm:ss zzz";
+
+  /**
+   * Date format pattern used to parse HTTP date headers in ANSI C
+   * <code>asctime()</code> format.
+   */
+  public static final String PATTERN_ASCTIME = "EEE MMM d HH:mm:ss yyyy";
+  //These are included for back compat
+  private static final Collection<String> DEFAULT_HTTP_CLIENT_PATTERNS = Arrays.asList(
+          PATTERN_ASCTIME, PATTERN_RFC1036, PATTERN_RFC1123);
+
+  private static final Date DEFAULT_TWO_DIGIT_YEAR_START;
+
+  static {
+    Calendar calendar = Calendar.getInstance();
+    calendar.set(2000, Calendar.JANUARY, 1, 0, 0);
+    DEFAULT_TWO_DIGIT_YEAR_START = calendar.getTime();
+  }
+
+  private static final TimeZone GMT = TimeZone.getTimeZone("GMT");
+
+  //end HttpClient
+
+  //---------------------------------------------------------------------------------------
+
+  /**
+   * A suite of default date formats that can be parsed, and thus transformed to the Solr specific format
+   */
+  public static final Collection<String> DEFAULT_DATE_FORMATS = new ArrayList<String>();
+
+  static {
+    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd'T'HH:mm:ss'Z'");
+    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd'T'HH:mm:ss");
+    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd");
+    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd hh:mm:ss");
+    DEFAULT_DATE_FORMATS.add("yyyy-MM-dd HH:mm:ss");
+    DEFAULT_DATE_FORMATS.add("EEE MMM d hh:mm:ss z yyyy");
+    DEFAULT_DATE_FORMATS.addAll(DEFAULT_HTTP_CLIENT_PATTERNS);
+  }
+
+  /**
+   * Returns a formatter that can be use by the current thread if needed to
+   * convert Date objects to the Internal representation.
+   *
+   * @param d The input date to parse
+   * @return The parsed {@link java.util.Date}
+   * @throws java.text.ParseException If the input can't be parsed
+   * @throws org.apache.commons.httpclient.util.DateParseException
+   *                                  If the input can't be parsed
+   */
+  public static Date parseDate(String d) throws ParseException {
+    return parseDate(d, DEFAULT_DATE_FORMATS);
+  }
+
+  public static Date parseDate(String d, Collection<String> fmts) throws ParseException {
+    // 2007-04-26T08:05:04Z
+    if (d.endsWith("Z") && d.length() > 20) {
+      return getThreadLocalDateFormat().parse(d);
+    }
+    return parseDate(d, fmts, null);
+  }
+
+  /**
+   * Slightly modified from org.apache.commons.httpclient.util.DateUtil.parseDate
+   * <p/>
+   * Parses the date value using the given date formats.
+   *
+   * @param dateValue   the date value to parse
+   * @param dateFormats the date formats to use
+   * @param startDate   During parsing, two digit years will be placed in the range
+   *                    <code>startDate</code> to <code>startDate + 100 years</code>. This value may
+   *                    be <code>null</code>. When <code>null</code> is given as a parameter, year
+   *                    <code>2000</code> will be used.
+   * @return the parsed date
+   * @throws ParseException if none of the dataFormats could parse the dateValue
+   */
+  public static Date parseDate(
+          String dateValue,
+          Collection<String> dateFormats,
+          Date startDate
+  ) throws ParseException {
+
+    if (dateValue == null) {
+      throw new IllegalArgumentException("dateValue is null");
+    }
+    if (dateFormats == null) {
+      dateFormats = DEFAULT_HTTP_CLIENT_PATTERNS;
+    }
+    if (startDate == null) {
+      startDate = DEFAULT_TWO_DIGIT_YEAR_START;
+    }
+    // trim single quotes around date if present
+    // see issue #5279
+    if (dateValue.length() > 1
+            && dateValue.startsWith("'")
+            && dateValue.endsWith("'")
+            ) {
+      dateValue = dateValue.substring(1, dateValue.length() - 1);
+    }
+
+    SimpleDateFormat dateParser = null;
+    Iterator formatIter = dateFormats.iterator();
+
+    while (formatIter.hasNext()) {
+      String format = (String) formatIter.next();
+      if (dateParser == null) {
+        dateParser = new SimpleDateFormat(format, Locale.US);
+        dateParser.setTimeZone(GMT);
+        dateParser.set2DigitYearStart(startDate);
+      } else {
+        dateParser.applyPattern(format);
+      }
+      try {
+        return dateParser.parse(dateValue);
+      } catch (ParseException pe) {
+        // ignore this exception, we will try the next format
+      }
+    }
+
+    // we were unable to parse the date
+    throw new ParseException("Unable to parse the date " + dateValue, 0);
+  }
+
+
+  /**
+   * Returns a formatter that can be use by the current thread if needed to
+   * convert Date objects to the Internal representation.
+   *
+   * @return The {@link java.text.DateFormat} for the current thread
+   */
+  public static DateFormat getThreadLocalDateFormat() {
+    return fmtThreadLocal.get();
+  }
+
+  public static TimeZone UTC = TimeZone.getTimeZone("UTC");
+  private static ThreadLocalDateFormat fmtThreadLocal = new ThreadLocalDateFormat();
+
+  private static class ThreadLocalDateFormat extends ThreadLocal<DateFormat> {
+    DateFormat proto;
+
+    public ThreadLocalDateFormat() {
+      super();
+      //2007-04-26T08:05:04Z
+      SimpleDateFormat tmp = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss.SSS'Z'");
+      tmp.setTimeZone(UTC);
+      proto = tmp;
+    }
+
+    @Override
+    protected DateFormat initialValue() {
+      return (DateFormat) proto.clone();
+    }
+  }
+
+
+}
\ No newline at end of file

Propchange: lucene/solr/trunk/src/java/org/apache/solr/common/util/DateUtil.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/solr/trunk/src/java/org/apache/solr/core/SolrResourceLoader.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/core/SolrResourceLoader.java?rev=723977&r1=723976&r2=723977&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/core/SolrResourceLoader.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/core/SolrResourceLoader.java Sat Dec  6 05:04:26 2008
@@ -31,6 +31,7 @@
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 import java.nio.charset.Charset;
+import java.lang.reflect.Constructor;
 
 import javax.naming.Context;
 import javax.naming.InitialContext;
@@ -308,6 +309,36 @@
     }
     return obj;
   }
+
+  public Object newInstance(String cName, String [] subPackages, Class[] params, Object[] args){
+    Class clazz = findClass(cName,subPackages);
+    if( clazz == null ) {
+      throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
+          "Can not find class: "+cName + " in " + classLoader, false);
+    }
+
+    Object obj = null;
+    try {
+
+      Constructor constructor = clazz.getConstructor(params);
+      obj = constructor.newInstance(args);
+    }
+    catch (Exception e) {
+      throw new SolrException( SolrException.ErrorCode.SERVER_ERROR,
+          "Error instantiating class: '" + clazz.getName()+"'", e, false );
+    }
+
+    if( obj instanceof SolrCoreAware ) {
+      assertAwareCompatibility( SolrCoreAware.class, obj );
+      waitingForCore.add( (SolrCoreAware)obj );
+    }
+    if( obj instanceof ResourceLoaderAware ) {
+      assertAwareCompatibility( ResourceLoaderAware.class, obj );
+      waitingForResources.add( (ResourceLoaderAware)obj );
+    }
+    return obj;
+  }
+
   
   /**
    * Tell all {@link SolrCoreAware} instances about the SolrCore
@@ -436,4 +467,4 @@
     throw new SolrException( SolrException.ErrorCode.SERVER_ERROR, builder.toString() );
   }
 
-}
\ No newline at end of file
+}

Modified: lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java
URL: http://svn.apache.org/viewvc/lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java?rev=723977&r1=723976&r2=723977&view=diff
==============================================================================
--- lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java (original)
+++ lucene/solr/trunk/src/java/org/apache/solr/util/TestHarness.java Sat Dec  6 05:04:26 2008
@@ -315,11 +315,7 @@
    * @see LocalSolrQueryRequest
    */
   public String query(String handler, SolrQueryRequest req) throws IOException, Exception {
-    SolrQueryResponse rsp = new SolrQueryResponse();
-    core.execute(core.getRequestHandler(handler),req,rsp);
-    if (rsp.getException() != null) {
-      throw rsp.getException();
-    }
+    SolrQueryResponse rsp = queryAndResponse(handler, req);
 
     StringWriter sw = new StringWriter(32000);
     QueryResponseWriter responseWriter = core.getQueryResponseWriter(req);
@@ -330,6 +326,15 @@
     return sw.toString();
   }
 
+  public SolrQueryResponse queryAndResponse(String handler, SolrQueryRequest req) throws Exception {
+    SolrQueryResponse rsp = new SolrQueryResponse();
+    core.execute(core.getRequestHandler(handler),req,rsp);
+    if (rsp.getException() != null) {
+      throw rsp.getException();
+    }
+    return rsp;
+  }
+
 
   /**
    * A helper method which valides a String against an array of XPath test