You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by si...@apache.org on 2011/05/31 13:25:50 UTC

svn commit: r1129631 [6/7] - in /lucene/dev/branches/docvalues: ./ dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/spellchecker/ dev-tools/idea/modules/suggest/ dev-tools/maven/lucene/contrib/ dev-tools/maven/lucene/contrib/spell...

Modified: lucene/dev/branches/docvalues/solr/contrib/extraction/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/contrib/extraction/CHANGES.txt?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/contrib/extraction/CHANGES.txt (original)
+++ lucene/dev/branches/docvalues/solr/contrib/extraction/CHANGES.txt Tue May 31 11:25:37 2011
@@ -24,16 +24,20 @@ Current Version: Tika 0.8 (released 11/0
 
 $Id$
 
-================== Release 4.0-dev ==================
+================== Release 4.0.0-dev ==============
 
 (No Changes)
 
-================== Release 3.2-dev ==================
+================== Release 3.3.0-dev ==============
+
+(No Changes)
+
+================== Release 3.2.0 ==================
 
 * SOLR-2480: Add ignoreTikaException flag so that users can ignore TikaException but index
   meta data. (Shinichiro Abe, koji)
 
-================== Release 3.1-dev ==================
+================== Release 3.1.0 ==================
 
 * SOLR-1902: Upgraded to Tika 0.8 and changed deprecated parse call
 

Modified: lucene/dev/branches/docvalues/solr/contrib/uima/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/contrib/uima/CHANGES.txt?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/contrib/uima/CHANGES.txt (original)
+++ lucene/dev/branches/docvalues/solr/contrib/uima/CHANGES.txt Tue May 31 11:25:37 2011
@@ -19,7 +19,15 @@ WhitespaceTokenizer v2.3.1-SNAPSHOT rev.
 
 $Id$
 
-==================  3.2.0-dev ==================
+==================  4.0.0-dev ==============
+
+(No Changes)
+
+==================  3.3.0-dev ==============
+
+(No Changes)
+
+==================  3.2.0 ==================
 
 Upgrading from Solr 3.1
 ----------------------

Modified: lucene/dev/branches/docvalues/solr/example/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/example/solr/conf/schema.xml?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/example/solr/conf/schema.xml (original)
+++ lucene/dev/branches/docvalues/solr/example/solr/conf/schema.xml Tue May 31 11:25:37 2011
@@ -45,15 +45,16 @@
     that avoids logging every request
 -->
 
-<schema name="example" version="1.3">
+<schema name="example" version="1.4">
   <!-- attribute "name" is the name of this schema and is only used for display purposes.
        Applications should change this to reflect the nature of the search collection.
-       version="1.2" is Solr's version number for the schema syntax and semantics.  It should
+       version="1.4" is Solr's version number for the schema syntax and semantics.  It should
        not normally be changed by applications.
        1.0: multiValued attribute did not exist, all fields are multiValued by nature
        1.1: multiValued attribute introduced, false by default 
        1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
        1.3: removed optional field compress feature
+       1.4: default auto-phrase (QueryParser feature) to off
      -->
 
   <types>
@@ -190,16 +191,87 @@
       </analyzer>
     </fieldType>
 
-    <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
-        words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
-        so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
-        Synonyms and stopwords are customized by external files, and stemming is enabled.
-        The attribute autoGeneratePhraseQueries="true" (the default) causes words that get split to
-        form phrase queries. For example, WordDelimiterFilter splitting text:pdp-11 will cause the parser
-        to generate text:"pdp 11" rather than (text:PDP OR text:11).
-        NOTE: autoGeneratePhraseQueries="true" tends to not work well for non whitespace delimited languages.
+    <!-- A general text field that has reasonable, generic
+         cross-language defaults: it tokenizes with StandardTokenizer,
+	 removes stop words from case-insensitive "stopwords.txt"
+	 (empty by default), and down cases.  At query time only, it
+	 also applies synonyms. -->
+    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- A text field with defaults appropriate for English: it
+         tokenizes with StandardTokenizer, removes English stop words
+         (stopwords_en.txt), down cases, protects words from protwords.txt, and
+         finally applies Porter's stemming.  The query time analyzer
+         also applies synonyms from synonyms.txt. -->
+    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <!-- Case insensitive stop word removal.
+          add enablePositionIncrements=true in both the index and query
+          analyzers to leave a 'gap' for more accurate phrase queries.
+        -->
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords_en.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.LowerCaseFilterFactory"/>
+	<filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+	<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
+	-->
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords_en.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.LowerCaseFilterFactory"/>
+	<filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+	<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
+	-->
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- A text field with defaults appropriate for English, plus
+	 aggressive word-splitting and autophrase features enabled.
+	 This field is just like text_en, except it adds
+	 WordDelimiterFilter to enable splitting and matching of
+	 words on case-change, alpha numeric boundaries, and
+	 non-alphanumeric chars.  This means certain compound word
+	 cases will work, for example query "wi fi" will match
+	 document "WiFi" or "wi-fi".  However, other cases will still
+	 not match, for example if the query is "wifi" and the
+	 document is "wi fi" or if the query is "wi-fi" and the
+	 document is "wifi".
         -->
-    <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+    <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
       <analyzer type="index">
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
         <!-- in this example, we will only use synonyms at query time
@@ -211,7 +283,7 @@
         -->
         <filter class="solr.StopFilterFactory"
                 ignoreCase="true"
-                words="stopwords.txt"
+                words="stopwords_en.txt"
                 enablePositionIncrements="true"
                 />
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
@@ -224,7 +296,7 @@
         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
         <filter class="solr.StopFilterFactory"
                 ignoreCase="true"
-                words="stopwords.txt"
+                words="stopwords_en.txt"
                 enablePositionIncrements="true"
                 />
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
@@ -234,14 +306,13 @@
       </analyzer>
     </fieldType>
 
-
     <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
          but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
-    <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
+    <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
       <analyzer>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
         <filter class="solr.LowerCaseFilterFactory"/>
         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
@@ -252,57 +323,27 @@
       </analyzer>
     </fieldType>
 
-
-    <!-- A general unstemmed text field - good if one does not know the language of the field -->
-    <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
-      <analyzer type="index">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-      </analyzer>
-      <analyzer type="query">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory"
-                ignoreCase="true"
-                words="stopwords.txt"
-                enablePositionIncrements="true"
-                />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-      </analyzer>
-    </fieldType>
-
-
-    <!-- A general unstemmed text field that indexes tokens normally and also
-         reversed (via ReversedWildcardFilterFactory), to enable more efficient 
-	 leading wildcard queries. -->
-    <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
+    <!-- Just like text_general except it reverses the characters of
+	 each token, to enable more efficient leading wildcard queries. -->
+    <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
       <analyzer type="index">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
         <filter class="solr.LowerCaseFilterFactory"/>
         <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
            maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
       </analyzer>
       <analyzer type="query">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory"
-                ignoreCase="true"
-                words="stopwords.txt"
-                enablePositionIncrements="true"
-                />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
         <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
     </fieldType>
 
     <!-- charFilter + WhitespaceTokenizer  -->
     <!--
-    <fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
+    <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
       <analyzer>
         <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -436,13 +477,13 @@
    -->
 
    <field name="id" type="string" indexed="true" stored="true" required="true" /> 
-   <field name="sku" type="textTight" indexed="true" stored="true" omitNorms="true"/>
-   <field name="name" type="textgen" indexed="true" stored="true"/>
+   <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
+   <field name="name" type="text_general" indexed="true" stored="true"/>
    <field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/>
-   <field name="manu" type="textgen" indexed="true" stored="true" omitNorms="true"/>
+   <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
    <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
-   <field name="features" type="text" indexed="true" stored="true" multiValued="true"/>
-   <field name="includes" type="text" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
+   <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
+   <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
 
    <field name="weight" type="float" indexed="true" stored="true"/>
    <field name="price"  type="float" indexed="true" stored="true"/>
@@ -460,13 +501,13 @@
      Some fields are multiValued only because Tika currently may return
      multiple values for them.
    -->
-   <field name="title" type="text" indexed="true" stored="true" multiValued="true"/>
-   <field name="subject" type="text" indexed="true" stored="true"/>
-   <field name="description" type="text" indexed="true" stored="true"/>
-   <field name="comments" type="text" indexed="true" stored="true"/>
-   <field name="author" type="textgen" indexed="true" stored="true"/>
-   <field name="keywords" type="textgen" indexed="true" stored="true"/>
-   <field name="category" type="textgen" indexed="true" stored="true"/>
+   <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
+   <field name="subject" type="text_general" indexed="true" stored="true"/>
+   <field name="description" type="text_general" indexed="true" stored="true"/>
+   <field name="comments" type="text_general" indexed="true" stored="true"/>
+   <field name="author" type="text_general" indexed="true" stored="true"/>
+   <field name="keywords" type="text_general" indexed="true" stored="true"/>
+   <field name="category" type="text_general" indexed="true" stored="true"/>
    <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
    <field name="last_modified" type="date" indexed="true" stored="true"/>
    <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
@@ -474,11 +515,11 @@
 
    <!-- catchall field, containing all other searchable text fields (implemented
         via copyField further on in this schema  -->
-   <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
+   <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
 
    <!-- catchall text field that indexes tokens both normally and in reverse for efficient
         leading wildcard queries. -->
-   <field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
+   <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
 
    <!-- non-tokenized version of manufacturer to make it easier to sort or group
         results by manufacturer.  copied from "manu" via copyField -->
@@ -504,8 +545,8 @@
    <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
    <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
    <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
-   <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
-   <dynamicField name="*_txt" type="text"    indexed="true"  stored="true" multiValued="true"/>
+   <dynamicField name="*_t"  type="text_general"    indexed="true"  stored="true"/>
+   <dynamicField name="*_txt" type="text_general"    indexed="true"  stored="true" multiValued="true"/>
    <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
    <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
    <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
@@ -526,7 +567,7 @@
    <dynamicField name="*_pi"  type="pint"    indexed="true"  stored="true"/>
 
    <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
-   <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/>
+   <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
 
    <dynamicField name="random_*" type="random" />
 

Modified: lucene/dev/branches/docvalues/solr/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/example/solr/conf/solrconfig.xml?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/example/solr/conf/solrconfig.xml (original)
+++ lucene/dev/branches/docvalues/solr/example/solr/conf/solrconfig.xml Tue May 31 11:25:37 2011
@@ -237,7 +237,7 @@
          of detailed information when indexing.
 
          Setting The value to true will instruct the underlying Lucene
-         IndexWriter to write it's debugging info the specified file
+         IndexWriter to write its debugging info the specified file
       -->
      <infoStream file="INFOSTREAM.txt">false</infoStream> 
 

Modified: lucene/dev/branches/docvalues/solr/example/solr/conf/stopwords.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/example/solr/conf/stopwords.txt?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/example/solr/conf/stopwords.txt (original)
+++ lucene/dev/branches/docvalues/solr/example/solr/conf/stopwords.txt Tue May 31 11:25:37 2011
@@ -12,47 +12,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-#-----------------------------------------------------------------------
-# a couple of test stopwords to test that the words are really being
-# configured from this file:
-stopworda
-stopwordb
-
-#Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-s
-such
-t
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
-

Modified: lucene/dev/branches/docvalues/solr/site/skin/basic.css
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/site/skin/basic.css?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/site/skin/basic.css (original)
+++ lucene/dev/branches/docvalues/solr/site/skin/basic.css Tue May 31 11:25:37 2011
@@ -163,4 +163,4 @@ p {
 .codefrag {
   font-family: "Courier New", Courier, monospace;
   font-size: 110%;
-}
+}
\ No newline at end of file

Modified: lucene/dev/branches/docvalues/solr/site/skin/print.css
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/site/skin/print.css?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/site/skin/print.css (original)
+++ lucene/dev/branches/docvalues/solr/site/skin/print.css Tue May 31 11:25:37 2011
@@ -51,4 +51,4 @@ a:link, a:visited {
 
 acronym {
   border: 0;
-}
+}
\ No newline at end of file

Modified: lucene/dev/branches/docvalues/solr/site/skin/profile.css
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/site/skin/profile.css?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/site/skin/profile.css (original)
+++ lucene/dev/branches/docvalues/solr/site/skin/profile.css Tue May 31 11:25:37 2011
@@ -172,4 +172,4 @@ a:hover { color:#6587ff} 
     }
       
     
-  
+  
\ No newline at end of file

Modified: lucene/dev/branches/docvalues/solr/site/skin/screen.css
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/site/skin/screen.css?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/site/skin/screen.css (original)
+++ lucene/dev/branches/docvalues/solr/site/skin/screen.css Tue May 31 11:25:37 2011
@@ -584,4 +584,4 @@ p.instruction {
   list-style-image: url('../images/instruction_arrow.png');
   list-style-position: outside;
   margin-left: 2em;
-} 
+} 
\ No newline at end of file

Modified: lucene/dev/branches/docvalues/solr/site/tutorial.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/site/tutorial.html?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/site/tutorial.html (original)
+++ lucene/dev/branches/docvalues/solr/site/tutorial.html Tue May 31 11:25:37 2011
@@ -474,9 +474,9 @@ SimplePostTool: COMMITting Solr index ch
 <p>
 You may have noticed that even though the file <span class="codefrag">solr.xml</span> has now
 been POSTed to the server twice, you still only get 1 result when searching for
-"solr".  This is because the example schema.xml specifies a "uniqueKey" field
+"solr".  This is because the example <span class="codefrag">schema.xml</span> specifies a "<span class="codefrag">uniqueKey</span>" field
 called "<span class="codefrag">id</span>".  Whenever you POST instructions to Solr to add a
-document with the same value for the uniqueKey as an existing document, it
+document with the same value for the <span class="codefrag">uniqueKey</span> as an existing document, it
 automatically replaces it for you.  You can see that that has happened by
 looking at the values for <span class="codefrag">numDocs</span> and <span class="codefrag">maxDoc</span> in the
 "CORE"/searcher section of the statistics page...  </p>
@@ -487,20 +487,20 @@ looking at the values for <span class="c
 </p>
 <p>
   
-<strong>numDocs</strong> represents the number of searchable documents in the
+<strong><span class="codefrag">numDocs</span></strong> represents the number of searchable documents in the
   index (and will be larger than the number of XML files since some files
-  contained more than one <span class="codefrag">&lt;doc&gt;</span>). <strong>maxDoc</strong>
-  may be larger as the maxDoc count includes logically deleted documents that
+  contained more than one <span class="codefrag">&lt;doc&gt;</span>). <strong><span class="codefrag">maxDoc</span></strong>
+  may be larger as the <span class="codefrag">maxDoc</span> count includes logically deleted documents that
   have not yet been removed from the index. You can re-post the sample XML
-  files over and over again as much as you want and numDocs will never
-  increase,because the new documents will constantly be replacing the old.
+  files over and over again as much as you want and <span class="codefrag">numDocs</span> will never
+  increase, because the new documents will constantly be replacing the old.
 </p>
 <p>
 Go ahead and edit the existing XML files to change some of the data, and re-run
 the <span class="codefrag">java -jar post.jar</span> command, you'll see your changes reflected
 in subsequent searches.
 </p>
-<a name="N1011B"></a><a name="Deleting+Data"></a>
+<a name="N1012C"></a><a name="Deleting+Data"></a>
 <h3 class="boxed">Deleting Data</h3>
 <p>You can delete data by POSTing a delete command to the update URL and specifying the value
       of the document's unique key field, or a query that matches multiple documents (be careful with that one!).  Since these commands
@@ -511,7 +511,7 @@ in subsequent searches.
 <p>Now if you go to the <a href="http://localhost:8983/solr/admin/stats.jsp">statistics</a> page and scroll down
        to the UPDATE_HANDLERS section and verify that "<span class="codefrag">deletesById : 1</span>"</p>
 <p>If you search for <a href="http://localhost:8983/solr/select?q=id:SP2514N">id:SP2514N</a> it will still be found,
-       because index changes are not visible until, and a new searcher is opened.  To cause
+       because index changes are not visible until changes are committed and a new searcher is opened.  To cause
        this to happen, send a commit command to Solr (post.jar does this for you by default):</p>
 <pre class="code">java -jar post.jar</pre>
 <p>Now re-execute the previous search and verify that no matching documents are found.  Also revisit the
@@ -520,7 +520,7 @@ in subsequent searches.
       <a href="http://localhost:8983/solr/select?q=name:DDR&fl=name">DDR</a> in the name:</p>
 <pre class="code">java -Ddata=args -jar post.jar "&lt;delete&gt;&lt;query&gt;name:DDR&lt;/query&gt;&lt;/delete&gt;"</pre>
 <p>Commit can be an expensive operation so it's best to make many changes to an index in a batch and
-      then send the commit command at the end.  There is also an optimize command that does the same thing as commit,
+      then send the <span class="codefrag">commit</span> command at the end.  There is also an <span class="codefrag">optimize</span> command that does the same thing as <span class="codefrag">commit</span>,
       in addition to merging all index segments into a single segment, making it faster to search and causing any
       deleted documents to be removed.  All of the update commands are documented <a href="http://wiki.apache.org/solr/UpdateXmlMessages">here</a>.
     </p>
@@ -529,14 +529,14 @@ in subsequent searches.
 </div>
 
 
-<a name="N10161"></a><a name="Querying+Data"></a>
+<a name="N1017B"></a><a name="Querying+Data"></a>
 <h2 class="boxed">Querying Data</h2>
 <div class="section">
 <p>
-    Searches are done via HTTP GET on the select URL with the query string in the q parameter.
+    Searches are done via HTTP GET on the <span class="codefrag">select</span> URL with the query string in the <span class="codefrag">q</span> parameter.
     You can pass a number of optional <a href="http://wiki.apache.org/solr/StandardRequestHandler">request parameters</a>
-    to the request handler to control what information is returned.  For example, you can use the "fl" parameter
-    to control what stored fields are returned, and if the relevancy score is returned...
+    to the request handler to control what information is returned.  For example, you can use the "<span class="codefrag">fl</span>" parameter
+    to control what stored fields are returned, and if the relevancy score is returned:
   </p>
 <ul>
       
@@ -558,13 +558,13 @@ in subsequent searches.
 </ul>
 <p>
     Solr provides a <a href="http://localhost:8983/solr/admin/form.jsp">query form</a> within the web admin interface
-    that allows setting the various request parameters and is useful when trying out or debugging queries.
+    that allows setting the various request parameters and is useful when testing or debugging queries.
   </p>
-<a name="N10196"></a><a name="Sorting"></a>
+<a name="N101B9"></a><a name="Sorting"></a>
 <h3 class="boxed">Sorting</h3>
 <p>
       Solr provides a simple method to sort on one or more indexed fields.
-      Use the 'sort' parameter to specify "field direction" pairs...
+      Use the "<span class="codefrag">sort</span>' parameter to specify "field direction" pairs, separated by commas if there's more than one sort field:
     </p>
 <ul>
       
@@ -582,7 +582,7 @@ in subsequent searches.
     
 </ul>
 <p>
-      "score" can also be used as a field name when specifying a sort...
+      "<span class="codefrag">score</span>" can also be used as a field name when specifying a sort:
     </p>
 <ul>
       
@@ -596,7 +596,7 @@ in subsequent searches.
     
 </ul>
 <p>
-      Complex functions may also be used to sort results...
+      Complex functions may also be used to sort results:
     </p>
 <ul>
       
@@ -612,12 +612,12 @@ in subsequent searches.
 
 
 
-<a name="N101D4"></a><a name="Highlighting"></a>
+<a name="N101FD"></a><a name="Highlighting"></a>
 <h2 class="boxed">Highlighting</h2>
 <div class="section">
 <p>
     Hit highlighting returns relevent snippets of each returned document, and highlights
-    keywords from the query within those context snippets.
+    terms from the query within those context snippets.
   </p>
 <p>
     The following example searches for <span class="codefrag">video card</span> and requests
@@ -639,7 +639,7 @@ in subsequent searches.
 
 
 
-<a name="N101FD"></a><a name="Faceted+Search"></a>
+<a name="N10226"></a><a name="Faceted+Search"></a>
 <h2 class="boxed">Faceted Search</h2>
 <div class="section">
 <p>
@@ -698,7 +698,7 @@ in subsequent searches.
 
 
 
-<a name="N1024E"></a><a name="Search+UI"></a>
+<a name="N10277"></a><a name="Search+UI"></a>
 <h2 class="boxed">Search UI</h2>
 <div class="section">
 <p>
@@ -716,28 +716,44 @@ in subsequent searches.
 
 
 
-<a name="N10261"></a><a name="Text+Analysis"></a>
+<a name="N1028A"></a><a name="Text+Analysis"></a>
 <h2 class="boxed">Text Analysis</h2>
 <div class="section">
 <p>
-    Text fields are typically indexed by breaking the field into words and applying various transformations such as
+    Text fields are typically indexed by breaking the text into words and applying various transformations such as
     lowercasing, removing plurals, or stemming to increase relevancy.  The same text transformations are normally
     applied to any queries in order to match what is indexed.
   </p>
-<p>Example queries demonstrating relevancy improving transformations:</p>
+<p>
+    The <a href="http://wiki.apache.org/solr/SchemaXml">schema</a> defines
+    the fields in the index and what type of analysis is applied to them.  The current schema your server is using
+    may be accessed via the <span class="codefrag">[SCHEMA]</span> link on the <a href="http://localhost:8983/solr/admin/">admin</a> page.
+  </p>
+<p>
+    The best analysis components (tokenization and filtering) for your textual content depends heavily on language.
+    As you can see in the above <span class="codefrag">[SCHEMA]</span> link, the fields in the example schema are using a <span class="codefrag">fieldType</span>
+    named <span class="codefrag">text_general</span>, which has defaults appropriate for all languages.
+  </p>
+<p>
+    If you know your textual content is English, as is the case for the example documents in this tutorial,
+    and you'd like to apply English-specific stemming and stop word removal, as well as split compound words, you can use the <span class="codefrag">text_en_splitting</span> fieldType instead.
+    Go ahead and edit the <span class="codefrag">schema.xml</span> under the <span class="codefrag">solr/example/solr/conf</span> directory,
+    and change the <span class="codefrag">type</span> for fields <span class="codefrag">text</span> and <span class="codefrag">features</span> from <span class="codefrag">text_general</span> to <span class="codefrag">text_en_splitting</span>.
+    Restart the server and then re-post all of the documents, and then these queries will show the English-specific transformations:
+  </p>
 <ul>
     
 <li>A search for
        <a href="http://localhost:8983/solr/select/?indent=on&q=power-shot&fl=name">power-shot</a>
        matches <span class="codefrag">PowerShot</span>, and
       <a href="http://localhost:8983/solr/select/?indent=on&q=adata&fl=name">adata</a>
-      matches <span class="codefrag">A-DATA</span> due to the use of WordDelimiterFilter and LowerCaseFilter.
+      matches <span class="codefrag">A-DATA</span> due to the use of <span class="codefrag">WordDelimiterFilter</span> and <span class="codefrag">LowerCaseFilter</span>.
     </li>
 
     
 <li>A search for
       <a href="http://localhost:8983/solr/select/?indent=on&q=features:recharging&fl=name,features">features:recharging</a>
-       matches <span class="codefrag">Rechargeable</span> due to stemming with the EnglishPorterFilter.
+       matches <span class="codefrag">Rechargeable</span> due to stemming with the <span class="codefrag">EnglishPorterFilter</span>.
     </li>
 
     
@@ -745,20 +761,15 @@ in subsequent searches.
        <a href="http://localhost:8983/solr/select/?indent=on&q=%221 gigabyte%22&fl=name">"1 gigabyte"</a>
        matches things with <span class="codefrag">GB</span>, and the misspelled
       <a href="http://localhost:8983/solr/select/?indent=on&q=pixima&fl=name">pixima</a>
-       matches <span class="codefrag">Pixma</span> due to use of a SynonymFilter.
+       matches <span class="codefrag">Pixma</span> due to use of a <span class="codefrag">SynonymFilter</span>.
     </li>
 
   
 </ul>
-<p>
-    The <a href="http://wiki.apache.org/solr/SchemaXml">schema</a> defines
-    the fields in the index and what type of analysis is applied to them.  The current schema your server is using
-    may be accessed via the <span class="codefrag">[SCHEMA]</span> link on the <a href="http://localhost:8983/solr/admin/">admin</a> page.
-  </p>
 <p>A full description of the analysis components, Analyzers, Tokenizers, and TokenFilters
     available for use is <a href="http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters">here</a>.
   </p>
-<a name="N102B1"></a><a name="Analysis+Debugging"></a>
+<a name="N1030A"></a><a name="Analysis+Debugging"></a>
 <h3 class="boxed">Analysis Debugging</h3>
 <p>There is a handy <a href="http://localhost:8983/solr/admin/analysis.jsp">analysis</a>
       debugging page where you can see how a text value is broken down into words,
@@ -768,7 +779,7 @@ in subsequent searches.
       
 <a href="http://localhost:8983/solr/admin/analysis.jsp?name=name&val=Canon+Power-Shot+SD500">This</a>
       shows how "<span class="codefrag">Canon Power-Shot SD500</span>" would be indexed as a value in the name field.  Each row of
-      the table shows the resulting tokens after having passed through the next TokenFilter in the Analyzer for the <span class="codefrag">name</span> field.
+      the table shows the resulting tokens after having passed through the next <span class="codefrag">TokenFilter</span> in the analyzer for the <span class="codefrag">name</span> field.
       Notice how both <span class="codefrag">powershot</span> and <span class="codefrag">power</span>, <span class="codefrag">shot</span> are indexed.  Tokens generated at the same position
       are shown in the same column, in this case <span class="codefrag">shot</span> and <span class="codefrag">powershot</span>.
     </p>
@@ -787,12 +798,12 @@ in subsequent searches.
 </div>
 
 
-<a name="N102F0"></a><a name="Conclusion"></a>
+<a name="N1034C"></a><a name="Conclusion"></a>
 <h2 class="boxed">Conclusion</h2>
 <div class="section">
 <p>
   Congratulations!  You successfully ran a small Solr instance, added some
-  documents, and made changes to the index.  You learned about queries, text
+  documents, and made changes to the index and schema.  You learned about queries, text
   analysis, and the Solr admin interface.  You're ready to start using Solr on
   your own project!  Continue on with the following steps:
 </p>
@@ -800,22 +811,21 @@ in subsequent searches.
   
 <li>Subscribe to the Solr <a href="mailing_lists.html">mailing lists</a>!</li>
   
-<li>Make a copy of the Solr example directory as a template for your project.</li>
+<li>Make a copy of the Solr <span class="codefrag">example</span> directory as a template for your project.</li>
   
-<li>Customize the schema and other config in solr/conf/ to meet your needs.</li> 
+<li>Customize the schema and other config in <span class="codefrag">solr/conf/</span> to meet your needs.</li> 
 
 </ul>
 <p>
-  Solr as a ton of other features that we haven't touched on here, including
+  Solr has a ton of other features that we haven't touched on here, including
   <a href="http://wiki.apache.org/solr/DistributedSearch">distributed search</a>
   to handle huge document collections,
   <a href="http://wiki.apache.org/solr/FunctionQuery">function queries</a>,
   <a href="http://wiki.apache.org/solr/StatsComponent">numeric field statistics</a>,
   and
   <a href="http://wiki.apache.org/solr/ClusteringComponent">search results clustering</a>.
-  Explore the <a href="http://wiki.apache.org/solr/FrontPage">Solr Wiki</a> to find out
-  more details about Solr's many
-  <a href="features.html">features</a>.
+  Explore the <a href="http://wiki.apache.org/solr/FrontPage">Solr Wiki</a> to find
+  more details about Solr's many <a href="features.html">features</a>.
 </p>
 <p>
   Have Fun, and we'll see you on the Solr mailing lists!

Modified: lucene/dev/branches/docvalues/solr/site/tutorial.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/site/tutorial.pdf?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/dev/branches/docvalues/solr/src/common/org/apache/solr/common/util/FileUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/common/org/apache/solr/common/util/FileUtils.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/common/org/apache/solr/common/util/FileUtils.java (original)
+++ lucene/dev/branches/docvalues/solr/src/common/org/apache/solr/common/util/FileUtils.java Tue May 31 11:25:37 2011
@@ -54,7 +54,7 @@ public class FileUtils {
   }
 
   /**
-   * Copied from Lucene's FSDirectory.sync(String) <!-- protected -->
+   * Copied from Lucene's FSDirectory.fsync(String) <!-- protected -->
    *
    * @param fullFile the File to be synced to disk
    * @throws IOException if the file could not be synced

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/IndexReaderFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/IndexReaderFactory.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/IndexReaderFactory.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/IndexReaderFactory.java Tue May 31 11:25:37 2011
@@ -19,6 +19,7 @@ package org.apache.solr.core;
 import java.io.IOException;
 
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.util.plugin.NamedListInitializedPlugin;
@@ -28,6 +29,7 @@ import org.apache.solr.util.plugin.Named
  */
 public abstract class IndexReaderFactory implements NamedListInitializedPlugin {
   protected int termInfosIndexDivisor = 1;//IndexReader.DEFAULT_TERMS_INDEX_DIVISOR;  Set this once Lucene makes this public.
+  protected CodecProvider provider;
   /**
    * Potentially initializes {@link #termInfosIndexDivisor}.  Overriding classes should call super.init() in order
    * to make sure termInfosIndexDivisor is set.
@@ -63,4 +65,11 @@ public abstract class IndexReaderFactory
    */
   public abstract IndexReader newReader(Directory indexDir, boolean readOnly)
       throws IOException;
+  
+  /**
+   * Sets the codec provider for this IndexReaderFactory
+   */
+  public void setCodecProvider(CodecProvider provider) {
+    this.provider = provider;
+  }
 }

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/SolrConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/SolrConfig.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/SolrConfig.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/SolrConfig.java Tue May 31 11:25:37 2011
@@ -39,6 +39,8 @@ import org.apache.solr.spelling.QueryCon
 import org.apache.solr.highlight.SolrHighlighter;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.index.IndexDeletionPolicy;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.util.Version;
 
 import org.slf4j.Logger;
@@ -202,6 +204,7 @@ public class SolrConfig extends Config {
 
      loadPluginInfo(DirectoryFactory.class,"directoryFactory",false, true);
      loadPluginInfo(IndexDeletionPolicy.class,"mainIndex/deletionPolicy",false, true);
+     loadPluginInfo(CodecProviderFactory.class,"mainIndex/codecProviderFactory",false, false);
      loadPluginInfo(IndexReaderFactory.class,"indexReaderFactory",false, true);
      loadPluginInfo(UpdateRequestProcessorChain.class,"updateRequestProcessorChain",false, false);
 

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/SolrCore.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/SolrCore.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/SolrCore.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/SolrCore.java Tue May 31 11:25:37 2011
@@ -20,6 +20,8 @@ package org.apache.solr.core;
 import org.apache.lucene.index.IndexDeletionPolicy;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.solr.common.SolrException;
@@ -93,6 +95,7 @@ public final class SolrCore implements S
   private IndexDeletionPolicyWrapper solrDelPolicy;
   private DirectoryFactory directoryFactory;
   private IndexReaderFactory indexReaderFactory;
+  private final CodecProvider codecProvider;
 
   public long getStartTime() { return startTime; }
 
@@ -330,6 +333,7 @@ public final class SolrCore implements S
       indexReaderFactory = new StandardIndexReaderFactory();
     } 
     this.indexReaderFactory = indexReaderFactory;
+    this.indexReaderFactory.setCodecProvider(codecProvider);
   }
   
   // protect via synchronized(SolrCore.class)
@@ -366,7 +370,7 @@ public final class SolrCore implements S
         log.warn(logid+"Solr index directory '" + new File(indexDir) + "' doesn't exist."
                 + " Creating new index...");
 
-        SolrIndexWriter writer = new SolrIndexWriter("SolrCore.initIndex", indexDir, getDirectoryFactory(), true, schema, solrConfig.mainIndexConfig, solrDelPolicy);
+        SolrIndexWriter writer = new SolrIndexWriter("SolrCore.initIndex", indexDir, getDirectoryFactory(), true, schema, solrConfig.mainIndexConfig, solrDelPolicy, codecProvider);
         writer.close();
       }
 
@@ -493,6 +497,7 @@ public final class SolrCore implements S
 
     initDeletionPolicy();
 
+    this.codecProvider = initCodecProvider(solrConfig, schema);
     initIndex();
 
     initWriters();
@@ -555,6 +560,19 @@ public final class SolrCore implements S
     resourceLoader.inform(infoRegistry);
   }
 
+  private CodecProvider initCodecProvider(SolrConfig solrConfig, IndexSchema schema) {
+    final PluginInfo info = solrConfig.getPluginInfo(CodecProviderFactory.class.getName());
+    CodecProvider cp;
+    if (info != null) {
+      CodecProviderFactory factory = (CodecProviderFactory) schema.getResourceLoader().newInstance(info.className);
+      factory.init(info.initArgs);
+      cp = factory.create();
+    } else {
+      // make sure we use the default if nothing is configured
+      cp = CodecProvider.getDefault();
+    }
+    return new SchemaCodecProvider(schema, cp);
+  }
 
   /**
    * Load the request processors
@@ -1618,6 +1636,10 @@ public final class SolrCore implements S
     }
     return lst;
   }
+  
+  public CodecProvider getCodecProvider() {
+    return codecProvider;
+  }
 
 }
 

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/StandardIndexReaderFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/StandardIndexReaderFactory.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/StandardIndexReaderFactory.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/core/StandardIndexReaderFactory.java Tue May 31 11:25:37 2011
@@ -35,6 +35,6 @@ public class StandardIndexReaderFactory 
   @Override
   public IndexReader newReader(Directory indexDir, boolean readOnly)
       throws IOException {
-    return IndexReader.open(indexDir, null, readOnly, termInfosIndexDivisor);
+    return IndexReader.open(indexDir, null, readOnly, termInfosIndexDivisor, provider);
   }
 }

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java Tue May 31 11:25:37 2011
@@ -27,6 +27,7 @@ import org.apache.lucene.index.Payload;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.AttributeReflector;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.SorterTemplate;
 import org.apache.solr.analysis.CharFilterFactory;
 import org.apache.solr.analysis.TokenFilterFactory;
@@ -39,8 +40,6 @@ import org.apache.solr.request.SolrQuery
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.schema.FieldType;
 
-import org.apache.noggit.CharArr;
-
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.*;
@@ -235,18 +234,13 @@ public abstract class AnalysisRequestHan
 
     FieldType fieldType = context.getFieldType();
 
-    final CharArr textBuf = new CharArr();
     for (int i = 0, c = tokens.size(); i < c; i++) {
       AttributeSource token = tokens.get(i);
       final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
       final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
       BytesRef rawBytes = termAtt.getBytesRef();
       termAtt.fillBytesRef();
-
-      textBuf.reset();
-      fieldType.indexedToReadable(rawBytes, textBuf);
-      final String text = textBuf.toString();
-
+      final String text = fieldType.indexedToReadable(rawBytes, new CharsRef(rawBytes.length)).toString();
       tokenNamedList.add("text", text);
       
       if (token.hasAttribute(CharTermAttribute.class)) {

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java Tue May 31 11:25:37 2011
@@ -46,6 +46,7 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.BytesRef;
 import org.apache.solr.analysis.CharFilterFactory;
@@ -232,6 +233,7 @@ public class LukeRequestHandler extends 
   
   private static SimpleOrderedMap<Object> getDocumentFieldsInfo( Document doc, int docId, IndexReader reader, IndexSchema schema ) throws IOException
   { 
+    final CharsRef spare = new CharsRef();
     SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
     for( Object o : doc.getFields() ) {
       Fieldable fieldable = (Fieldable)o;
@@ -265,7 +267,7 @@ public class LukeRequestHandler extends 
           if( v != null ) {
             SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>();
             for( int i=0; i<v.size(); i++ ) {
-              tfv.add( v.getTerms()[i].utf8ToString(), v.getTermFrequencies()[i] );
+              tfv.add( v.getTerms()[i].utf8ToChars(spare).toString(), v.getTermFrequencies()[i] );
             }
             f.add( "termVector", tfv );
           }
@@ -624,7 +626,7 @@ public class LukeRequestHandler extends 
   private static Map<String,TopTermQueue> getTopTerms( IndexReader reader, Set<String> fields, int numTerms, Set<String> junkWords ) throws Exception 
   {
     Map<String,TopTermQueue> info = new HashMap<String, TopTermQueue>();
-
+    final CharsRef spare = new CharsRef();
     Fields fieldsC = MultiFields.getFields(reader);
     if (fieldsC != null) {
       FieldsEnum fieldsEnum = fieldsC.iterator();
@@ -634,7 +636,7 @@ public class LukeRequestHandler extends 
         TermsEnum termsEnum = fieldsEnum.terms();
         BytesRef text;
         while((text = termsEnum.next()) != null) {
-          String t = text.utf8ToString();
+          String t = text.utf8ToChars(spare).toString();
   
           // Compute distinct terms for every field
           TopTermQueue tiq = info.get( field );

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/component/QueryComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/component/QueryComponent.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/component/QueryComponent.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/component/QueryComponent.java Tue May 31 11:25:37 2011
@@ -24,6 +24,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.search.*;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.ReaderUtil;
 import org.apache.solr.cloud.CloudDescriptor;
 import org.apache.solr.cloud.ZkController;
@@ -455,7 +456,7 @@ public class QueryComponent extends Sear
   {
     SolrQueryRequest req = rb.req;
     SolrQueryResponse rsp = rb.rsp;
-
+    final CharsRef spare = new CharsRef();
     // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't
     // currently have an option to return sort field values.  Because of this, we
     // take the documents given and re-derive the sort values.
@@ -524,7 +525,7 @@ public class QueryComponent extends Sear
           // String field in Lucene, which returns the terms
           // data as BytesRef:
           if (val instanceof BytesRef) {
-            field.setValue(((BytesRef)val).utf8ToString());
+            field.setValue(((BytesRef)val).utf8ToChars(spare).toString());
             val = ft.toObject(field);
           }
 

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/component/StatsComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/component/StatsComponent.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/component/StatsComponent.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/component/StatsComponent.java Tue May 31 11:25:37 2011
@@ -23,6 +23,7 @@ import java.util.Map;
 
 import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
 import org.apache.noggit.CharArr;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.StatsParams;
@@ -270,19 +271,15 @@ class SimpleStats {
       }
       finfo[i++] = new FieldFacetStats( f, si, ft, 0 );
     }
-
+    final CharsRef spare = new CharsRef();
     final BytesRef tempBR = new BytesRef();
-    final CharArr spare = new CharArr();
-
     DocIterator iter = docs.iterator();
     while (iter.hasNext()) {
       int docID = iter.nextDoc();
       BytesRef raw = all.getTermText(docID, tempBR);
       Double v = null;
       if( raw != null ) {
-        spare.reset();
-        all.ft.indexedToReadable(raw, spare);
-        v = Double.parseDouble(spare.toString());
+        v = Double.parseDouble(all.ft.indexedToReadable(raw, spare).toString());
         allstats.accumulate(v);
       }
       else {

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/component/TermsComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/component/TermsComponent.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/component/TermsComponent.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/handler/component/TermsComponent.java Tue May 31 11:25:37 2011
@@ -18,7 +18,7 @@ package org.apache.solr.handler.componen
 
 import org.apache.lucene.index.*;
 import org.apache.lucene.util.BytesRef;
-import org.apache.noggit.CharArr;
+import org.apache.lucene.util.CharsRef;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.*;
 import org.apache.solr.common.util.NamedList;
@@ -178,8 +178,7 @@ public class TermsComponent extends Sear
 
       int i = 0;
       BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit) : null);
-      CharArr external = new CharArr();
-
+      CharsRef external = new CharsRef();
       while (term != null && (i<limit || sort)) {
         boolean externalized = false; // did we fill in "external" yet for this term?
 
@@ -189,8 +188,8 @@ public class TermsComponent extends Sear
         if (pattern != null) {
           // indexed text or external text?
           // TODO: support "raw" mode?
-          external.reset();
           ft.indexedToReadable(term, external);
+          externalized = true;
           if (!pattern.matcher(external).matches()) {
             term = termsEnum.next();
             continue;
@@ -213,13 +212,9 @@ public class TermsComponent extends Sear
 
             // TODO: handle raw somehow
             if (!externalized) {
-              external.reset();
               ft.indexedToReadable(term, external);
             }
-            String label = external.toString();
-            
-
-            fieldTerms.add(label, docFreq);
+            fieldTerms.add(external.toString(), docFreq);
             i++;
           }
         }
@@ -230,7 +225,6 @@ public class TermsComponent extends Sear
       if (sort) {
         for (CountPair<BytesRef, Integer> item : queue) {
           if (i >= limit) break;
-          external.reset();
           ft.indexedToReadable(item.key, external);          
           fieldTerms.add(external.toString(), item.val);
           i++;

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/request/PerSegmentSingleValuedFaceting.java Tue May 31 11:25:37 2011
@@ -23,9 +23,11 @@ import org.apache.lucene.search.DocIdSet
 import org.apache.lucene.search.DocIdSetIterator;
 import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.search.Filter;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.BytesRef;
 import org.apache.lucene.util.ReaderUtil;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.packed.Direct16;
 import org.apache.lucene.util.packed.Direct32;
 import org.apache.lucene.util.packed.Direct8;
@@ -37,7 +39,6 @@ import org.apache.solr.schema.FieldType;
 import org.apache.solr.search.DocSet;
 import org.apache.solr.search.SolrIndexSearcher;
 import org.apache.solr.util.BoundedTreeSet;
-import org.apache.solr.util.ByteUtils;
 
 import java.io.IOException;
 import java.util.*;
@@ -244,7 +245,7 @@ class PerSegmentSingleValuedFaceting {
         BytesRef prefixRef = new BytesRef(prefix);
         startTermIndex = si.binarySearchLookup(prefixRef, tempBR);
         if (startTermIndex<0) startTermIndex=-startTermIndex-1;
-        prefixRef.append(ByteUtils.bigTerm);
+        prefixRef.append(UnicodeUtil.BIG_TERM);
         // TODO: we could constrain the lower endpoint if we had a binarySearch method that allowed passing start/end
         endTermIndex = si.binarySearchLookup(prefixRef, tempBR);
         assert endTermIndex < 0;
@@ -339,6 +340,8 @@ abstract class FacetCollector {
 
 // This collector expects facets to be collected in index order
 class CountSortedFacetCollector extends FacetCollector {
+  private final CharsRef spare = new CharsRef();
+
   final int offset;
   final int limit;
   final int maxsize;
@@ -360,7 +363,7 @@ class CountSortedFacetCollector extends 
       // NOTE: we use c>min rather than c>=min as an optimization because we are going in
       // index order, so we already know that the keys are ordered.  This can be very
       // important if a lot of the counts are repeated (like zero counts would be).
-      queue.add(new SimpleFacets.CountPair<String,Integer>(term.utf8ToString(), count));
+      queue.add(new SimpleFacets.CountPair<String,Integer>(term.utf8ToChars(spare).toString(), count));
       if (queue.size()>=maxsize) min=queue.last().val;
     }
     return false;
@@ -383,12 +386,13 @@ class CountSortedFacetCollector extends 
 
 // This collector expects facets to be collected in index order
 class IndexSortedFacetCollector extends FacetCollector {
+  private final CharsRef spare = new CharsRef();
+
   int offset;
   int limit;
   final int mincount;
   final NamedList<Integer> res = new NamedList<Integer>();
 
-
   public IndexSortedFacetCollector(int offset, int limit, int mincount) {
     this.offset = offset;
     this.limit = limit>0 ? limit : Integer.MAX_VALUE;
@@ -407,7 +411,7 @@ class IndexSortedFacetCollector extends 
     }
 
     if (limit > 0) {
-      res.add(term.utf8ToString(), count);
+      res.add(term.utf8ToChars(spare).toString(), count);
       limit--;
     }
 

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/request/SimpleFacets.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/request/SimpleFacets.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/request/SimpleFacets.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/request/SimpleFacets.java Tue May 31 11:25:37 2011
@@ -21,12 +21,13 @@ import org.apache.lucene.index.*;
 import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.search.*;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.StringHelper;
+import org.apache.lucene.util.UnicodeUtil;
 import org.apache.lucene.util.packed.Direct16;
 import org.apache.lucene.util.packed.Direct32;
 import org.apache.lucene.util.packed.Direct8;
 import org.apache.lucene.util.packed.PackedInts;
-import org.apache.noggit.CharArr;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.params.RequiredSolrParams;
@@ -41,7 +42,6 @@ import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.*;
 import org.apache.solr.search.*;
 import org.apache.solr.util.BoundedTreeSet;
-import org.apache.solr.util.ByteUtils;
 import org.apache.solr.util.DateMathParser;
 import org.apache.solr.handler.component.ResponseBuilder;
 import org.apache.solr.util.LongPriorityQueue;
@@ -109,7 +109,7 @@ public class SimpleFacets {
     if (localParams == null) return;
 
     // remove local params unless it's a query
-    if (type != FacetParams.FACET_QUERY) {
+    if (type != FacetParams.FACET_QUERY) { // TODO Cut over to an Enum here
       facetValue = localParams.get(CommonParams.VALUE);
     }
 
@@ -128,7 +128,7 @@ public class SimpleFacets {
     String excludeStr = localParams.get(CommonParams.EXCLUDE);
     if (excludeStr == null) return;
 
-    Map tagMap = (Map)req.getContext().get("tags");
+    Map<?,?> tagMap = (Map<?,?>)req.getContext().get("tags");
     if (tagMap != null && rb != null) {
       List<String> excludeTagList = StrUtils.splitSmart(excludeStr,',');
 
@@ -137,7 +137,7 @@ public class SimpleFacets {
         Object olst = tagMap.get(excludeTag);
         // tagMap has entries of List<String,List<QParser>>, but subject to change in the future
         if (!(olst instanceof Collection)) continue;
-        for (Object o : (Collection)olst) {
+        for (Object o : (Collection<?>)olst) {
           if (!(o instanceof QParser)) continue;
           QParser qp = (QParser)o;
           excludeSet.put(qp.getQuery(), Boolean.TRUE);
@@ -435,7 +435,7 @@ public class SimpleFacets {
     if (prefix!=null) {
       startTermIndex = si.binarySearchLookup(prefixRef, br);
       if (startTermIndex<0) startTermIndex=-startTermIndex-1;
-      prefixRef.append(ByteUtils.bigTerm);
+      prefixRef.append(UnicodeUtil.BIG_TERM);
       endTermIndex = si.binarySearchLookup(prefixRef, br);
       assert endTermIndex < 0;
       endTermIndex = -endTermIndex-1;
@@ -446,8 +446,7 @@ public class SimpleFacets {
 
     final int nTerms=endTermIndex-startTermIndex;
     int missingCount = -1; 
-
-    CharArr spare = new CharArr();
+    final CharsRef charsRef = new CharsRef(10);
     if (nTerms>0 && docs.size() >= mincount) {
 
       // count collection array only needs to be as big as the number of terms we are
@@ -547,10 +546,8 @@ public class SimpleFacets {
           long pair = sorted[i];
           int c = (int)(pair >>> 32);
           int tnum = Integer.MAX_VALUE - (int)pair;
-
-          spare.reset();
-          ft.indexedToReadable(si.lookup(startTermIndex+tnum, br), spare);
-          res.add(spare.toString(), c);
+          ft.indexedToReadable(si.lookup(startTermIndex+tnum, br), charsRef);
+          res.add(charsRef.toString(), c);
         }
       
       } else {
@@ -567,9 +564,8 @@ public class SimpleFacets {
           int c = counts[i];
           if (c<mincount || --off>=0) continue;
           if (--lim<0) break;
-          spare.reset();
-          ft.indexedToReadable(si.lookup(startTermIndex+i, br), spare);
-          res.add(spare.toString(), c);
+          ft.indexedToReadable(si.lookup(startTermIndex+i, br), charsRef);
+          res.add(charsRef.toString(), c);
         }
       }
     }
@@ -657,7 +653,7 @@ public class SimpleFacets {
     }
 
     DocsEnum docsEnum = null;
-    CharArr spare = new CharArr();
+    CharsRef charsRef = new CharsRef(10);
 
     if (docs.size() >= mincount) {
       while (term != null) {
@@ -742,9 +738,8 @@ public class SimpleFacets {
           } else {
             if (c >= mincount && --off<0) {
               if (--lim<0) break;
-              spare.reset();
-              ft.indexedToReadable(term, spare);
-              res.add(spare.toString(), c);
+              ft.indexedToReadable(term, charsRef);
+              res.add(charsRef.toString(), c);
             }
           }
         }
@@ -757,9 +752,8 @@ public class SimpleFacets {
       for (CountPair<BytesRef,Integer> p : queue) {
         if (--off>=0) continue;
         if (--lim<0) break;
-        spare.reset();
-        ft.indexedToReadable(p.key, spare);
-        res.add(spare.toString(), p.val);
+        ft.indexedToReadable(p.key, charsRef);
+        res.add(charsRef.toString(), p.val);
       }
     }
 

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/request/UnInvertedField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/request/UnInvertedField.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/request/UnInvertedField.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/request/UnInvertedField.java Tue May 31 11:25:37 2011
@@ -24,7 +24,6 @@ import org.apache.lucene.index.TermsEnum
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.util.StringHelper;
-import org.apache.noggit.CharArr;
 import org.apache.solr.common.params.FacetParams;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.common.SolrException;
@@ -33,13 +32,14 @@ import org.apache.solr.core.SolrCore;
 import org.apache.solr.schema.FieldType;
 import org.apache.solr.schema.TrieField;
 import org.apache.solr.search.*;
-import org.apache.solr.util.ByteUtils;
 import org.apache.solr.util.LongPriorityQueue;
 import org.apache.solr.util.PrimUtils;
 import org.apache.solr.handler.component.StatsValues;
 import org.apache.solr.handler.component.FieldFacetStats;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.OpenBitSet;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.UnicodeUtil;
 
 import java.io.IOException;
 import java.util.HashMap;
@@ -227,13 +227,13 @@ public class UnInvertedField extends Doc
 
       TermsEnum te = getOrdTermsEnum(searcher.getIndexReader());
       if (prefix != null && prefix.length() > 0) {
-        BytesRef prefixBr = new BytesRef(prefix);
+        final BytesRef prefixBr = new BytesRef(prefix);
         if (te.seek(prefixBr, true) == TermsEnum.SeekStatus.END) {
           startTerm = numTermsInField;
         } else {
           startTerm = (int) te.ord();
         }
-        prefixBr.append(ByteUtils.bigTerm);
+        prefixBr.append(UnicodeUtil.BIG_TERM);
         if (te.seek(prefixBr, true) == TermsEnum.SeekStatus.END) {
           endTerm = numTermsInField;
         } else {
@@ -331,8 +331,7 @@ public class UnInvertedField extends Doc
           }
         }
       }
-
-      CharArr spare = new CharArr();
+      final CharsRef charsRef = new CharsRef();
 
       int off=offset;
       int lim=limit>=0 ? limit : Integer.MAX_VALUE;
@@ -408,7 +407,7 @@ public class UnInvertedField extends Doc
         for (int i=sortedIdxStart; i<sortedIdxEnd; i++) {
           int idx = indirect[i];
           int tnum = (int)sorted[idx];
-          String label = getReadableValue(getTermValue(te, tnum), ft, spare);
+          final String label = getReadableValue(getTermValue(te, tnum), ft, charsRef);
           //System.out.println("  label=" + label);
           res.setName(idx - sortedIdxStart, label);
         }
@@ -428,7 +427,7 @@ public class UnInvertedField extends Doc
           if (c<mincount || --off>=0) continue;
           if (--lim<0) break;
 
-          String label = getReadableValue(getTermValue(te, i), ft, spare);
+          final String label = getReadableValue(getTermValue(te, i), ft, charsRef);
           res.add(label, c);
         }
       }
@@ -582,14 +581,12 @@ public class UnInvertedField extends Doc
         }
       }
     }
-
+    final CharsRef charsRef = new CharsRef();
     // add results in index order
-    CharArr spare = new CharArr();
-
     for (i = 0; i < numTermsInField; i++) {
       int c = doNegative ? maxTermCounts[i] - counts[i] : counts[i];
       if (c == 0) continue;
-      String label = getReadableValue(getTermValue(te, i), ft, spare);
+      String label = getReadableValue(getTermValue(te, i), ft, charsRef);
       // TODO: we should avoid this re-parse
       Double value = Double.parseDouble(label);
 
@@ -621,14 +618,8 @@ public class UnInvertedField extends Doc
 
   }
 
-  String getReadableValue(BytesRef termval, FieldType ft, CharArr spare) {
-    if (spare == null) {
-      spare = new CharArr();
-    } else {
-      spare.reset();
-    }
-    ft.indexedToReadable(termval, spare);
-    return spare.toString();    
+  String getReadableValue(BytesRef termval, FieldType ft, CharsRef charsRef) {
+    return ft.indexedToReadable(termval, charsRef).toString();
   }
 
   /** may return a reused BytesRef */

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/BoolField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/BoolField.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/BoolField.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/BoolField.java Tue May 31 11:25:37 2011
@@ -19,7 +19,7 @@ package org.apache.solr.schema;
 
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.BytesRef;
-import org.apache.noggit.CharArr;
+import org.apache.lucene.util.CharsRef;
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.function.ValueSource;
 import org.apache.solr.search.function.OrdFieldSource;
@@ -130,13 +130,17 @@ public class BoolField extends FieldType
     return ch=='T' ? "true" : "false";
   }
 
+  private static final CharsRef TRUE = new CharsRef("true");
+  private static final CharsRef FALSE = new CharsRef("false");
+  
   @Override
-  public void indexedToReadable(BytesRef input, CharArr out) {
+  public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
     if (input.length > 0 && input.bytes[input.offset] == 'T') {
-      out.write("true");
+      charsRef.copy(TRUE);
     } else {
-      out.write("false");
+      charsRef.copy(FALSE);
     }
+    return charsRef;
   }
 
   @Override

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/DateField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/DateField.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/DateField.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/DateField.java Tue May 31 11:25:37 2011
@@ -23,14 +23,13 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.util.BytesRef;
-import org.apache.noggit.CharArr;
+import org.apache.lucene.util.CharsRef;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.util.DateUtil;
 import org.apache.solr.request.SolrQueryRequest;
 import org.apache.solr.response.TextResponseWriter;
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.function.*;
-import org.apache.solr.util.ByteUtils;
 import org.apache.solr.util.DateMathParser;
 
 import java.io.IOException;
@@ -131,6 +130,8 @@ public class DateField extends FieldType
 
   protected static String NOW = "NOW";
   protected static char Z = 'Z';
+  private static char[] Z_ARRAY = new char[] {Z};
+  
   
   @Override
   public String toInternal(String val) {
@@ -184,7 +185,7 @@ public class DateField extends FieldType
   public Fieldable createField(SchemaField field, Object value, float boost) {
     // Convert to a string before indexing
     if(value instanceof Date) {
-      value = toInternal( (Date)value ) + 'Z';
+      value = toInternal( (Date)value ) + Z;
     }
     return super.createField(field, value, boost);
   }
@@ -199,9 +200,10 @@ public class DateField extends FieldType
   }
 
   @Override
-  public void indexedToReadable(BytesRef input, CharArr out) {
-    ByteUtils.UTF8toUTF16(input, out);
-    out.write(Z);
+  public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
+    input.utf8ToChars(charsRef);
+    charsRef.append(Z_ARRAY, 0, 1);
+    return charsRef;
   }
 
   @Override
@@ -479,10 +481,8 @@ class DateFieldSource extends FieldCache
         if (ord == 0) {
           return null;
         } else {
-          BytesRef br = termsIndex.lookup(ord, new BytesRef());
-          CharArr spare = new CharArr();
-          ft.indexedToReadable(br, spare);
-          return spare.toString();
+          final BytesRef br = termsIndex.lookup(ord, spare);
+          return ft.indexedToReadable(br, spareChars).toString();
         }
       }
 
@@ -492,7 +492,7 @@ class DateFieldSource extends FieldCache
         if (ord == 0) {
           return null;
         } else {
-          BytesRef br = termsIndex.lookup(ord, new BytesRef());
+          final BytesRef br = termsIndex.lookup(ord, new BytesRef());
           return ft.toObject(null, br);
         }
       }

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/FieldType.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/FieldType.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/FieldType.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/FieldType.java Tue May 31 11:25:37 2011
@@ -30,8 +30,8 @@ import org.apache.lucene.search.SortFiel
 import org.apache.lucene.search.TermQuery;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.UnicodeUtil;
-import org.apache.noggit.CharArr;
 import org.apache.solr.analysis.SolrAnalyzer;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.SolrException.ErrorCode;
@@ -39,7 +39,6 @@ import org.apache.solr.response.TextResp
 import org.apache.solr.search.QParser;
 import org.apache.solr.search.Sorting;
 import org.apache.solr.search.function.ValueSource;
-import org.apache.solr.util.ByteUtils;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
 
@@ -167,6 +166,12 @@ public abstract class FieldType extends 
       initArgs.remove("positionIncrementGap");
     }
 
+    final String codec = initArgs.get("codec");
+    if (codec != null) {
+      this.codec = codec;
+      initArgs.remove("codec");
+    }
+
     if (initArgs.size() > 0) {
       throw new RuntimeException("schema fieldtype " + typeName
               + "("+ this.getClass().getName() + ")"
@@ -348,9 +353,9 @@ public abstract class FieldType extends 
   }
 
   public Object toObject(SchemaField sf, BytesRef term) {
-    CharArr ext = new CharArr(term.length);
-    indexedToReadable(term, ext);
-    Fieldable f = createField(sf, ext.toString(), 1.0f);
+    final CharsRef ref = new CharsRef(term.length);
+    indexedToReadable(term, ref);
+    final Fieldable f = createField(sf, ref.toString(), 1.0f);
     return toObject(f);
   }
 
@@ -359,9 +364,10 @@ public abstract class FieldType extends 
     return indexedForm;
   }
 
-  /** Given an indexed term, append the human readable representation to out */
-  public void indexedToReadable(BytesRef input, CharArr out) {
-    ByteUtils.UTF8toUTF16(input, out);
+  /** Given an indexed term, append the human readable representation*/
+  public CharsRef indexedToReadable(BytesRef input, CharsRef output) {
+    input.utf8ToChars(output);
+    return output;
   }
 
   /** Given the stored field, return the human readable representation */
@@ -384,7 +390,7 @@ public abstract class FieldType extends 
 
   /** Given the readable value, return the term value that will match it. */
   public void readableToIndexed(CharSequence val, BytesRef result) {
-    String internal = readableToIndexed(val.toString());
+    final String internal = readableToIndexed(val.toString());
     UnicodeUtil.UTF16toUTF8(internal, 0, internal.length(), result);
   }
 
@@ -538,6 +544,15 @@ public abstract class FieldType extends 
   }
   
   /**
+   * The codec ID used for this field type
+   */
+  protected String codec;
+  
+  public String getCodec() {
+    return codec;
+  }
+  
+  /**
    * calls back to TextResponseWriter to write the field value
    */
   public abstract void write(TextResponseWriter writer, String name, Fieldable f) throws IOException;

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableDoubleField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableDoubleField.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableDoubleField.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableDoubleField.java Tue May 31 11:25:37 2011
@@ -19,7 +19,7 @@ package org.apache.solr.schema;
 
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.BytesRef;
-import org.apache.noggit.CharArr;
+import org.apache.lucene.util.CharsRef;
 import org.apache.solr.search.MutableValueDouble;
 import org.apache.solr.search.MutableValue;
 import org.apache.solr.search.QParser;
@@ -29,7 +29,6 @@ import org.apache.solr.search.function.D
 import org.apache.solr.search.function.StringIndexDocValues;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.solr.util.ByteUtils;
 import org.apache.solr.util.NumberUtils;
 import org.apache.solr.response.TextResponseWriter;
 
@@ -78,9 +77,12 @@ public class SortableDoubleField extends
   }
 
   @Override
-  public void indexedToReadable(BytesRef input, CharArr out) {
+  public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
     // TODO: this could be more efficient, but the sortable types should be deprecated instead
-    out.write( indexedToReadable(ByteUtils.UTF8toUTF16(input)) );
+    input.utf8ToChars(charsRef);
+    final char[] indexedToReadable = indexedToReadable(charsRef.toString()).toCharArray();
+    charsRef.copy(indexedToReadable, 0, indexedToReadable.length);
+    return charsRef;
   }
 
   @Override
@@ -90,9 +92,6 @@ public class SortableDoubleField extends
   }
 }
 
-
-
-
 class SortableDoubleFieldSource extends FieldCacheSource {
   protected double defVal;
 

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableFloatField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableFloatField.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableFloatField.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableFloatField.java Tue May 31 11:25:37 2011
@@ -19,7 +19,7 @@ package org.apache.solr.schema;
 
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.BytesRef;
-import org.apache.noggit.CharArr;
+import org.apache.lucene.util.CharsRef;
 import org.apache.solr.search.MutableValueFloat;
 import org.apache.solr.search.MutableValue;
 import org.apache.solr.search.QParser;
@@ -29,7 +29,6 @@ import org.apache.solr.search.function.D
 import org.apache.solr.search.function.StringIndexDocValues;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.solr.util.ByteUtils;
 import org.apache.solr.util.NumberUtils;
 import org.apache.solr.response.TextResponseWriter;
 
@@ -77,10 +76,11 @@ public class SortableFloatField extends 
     return NumberUtils.SortableStr2floatStr(indexedForm);
   }
 
-  @Override
-  public void indexedToReadable(BytesRef input, CharArr out) {
+  public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
     // TODO: this could be more efficient, but the sortable types should be deprecated instead
-    out.write( indexedToReadable(ByteUtils.UTF8toUTF16(input)) );
+    final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray();
+    charsRef.copy(indexedToReadable, 0, indexedToReadable.length);
+    return charsRef;
   }
 
   @Override

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableIntField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableIntField.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableIntField.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableIntField.java Tue May 31 11:25:37 2011
@@ -19,7 +19,7 @@ package org.apache.solr.schema;
 
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.BytesRef;
-import org.apache.noggit.CharArr;
+import org.apache.lucene.util.CharsRef;
 import org.apache.solr.search.MutableValueInt;
 import org.apache.solr.search.MutableValue;
 import org.apache.solr.search.QParser;
@@ -29,7 +29,6 @@ import org.apache.solr.search.function.D
 import org.apache.solr.search.function.StringIndexDocValues;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.solr.util.ByteUtils;
 import org.apache.solr.util.NumberUtils;
 import org.apache.solr.response.TextResponseWriter;
 
@@ -75,10 +74,11 @@ public class SortableIntField extends Fi
     return NumberUtils.SortableStr2int(indexedForm);
   }
 
-  @Override
-  public void indexedToReadable(BytesRef input, CharArr out) {
+  public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
     // TODO: this could be more efficient, but the sortable types should be deprecated instead
-    out.write( indexedToReadable(ByteUtils.UTF8toUTF16(input)) );
+    final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray();
+    charsRef.copy(indexedToReadable, 0, indexedToReadable.length);
+    return charsRef;
   }
 
   @Override

Modified: lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableLongField.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableLongField.java?rev=1129631&r1=1129630&r2=1129631&view=diff
==============================================================================
--- lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableLongField.java (original)
+++ lucene/dev/branches/docvalues/solr/src/java/org/apache/solr/schema/SortableLongField.java Tue May 31 11:25:37 2011
@@ -19,7 +19,7 @@ package org.apache.solr.schema;
 
 import org.apache.lucene.search.SortField;
 import org.apache.lucene.util.BytesRef;
-import org.apache.noggit.CharArr;
+import org.apache.lucene.util.CharsRef;
 import org.apache.solr.search.MutableValueLong;
 import org.apache.solr.search.MutableValue;
 import org.apache.solr.search.QParser;
@@ -29,7 +29,6 @@ import org.apache.solr.search.function.D
 import org.apache.solr.search.function.StringIndexDocValues;
 import org.apache.lucene.document.Fieldable;
 import org.apache.lucene.index.IndexReader.AtomicReaderContext;
-import org.apache.solr.util.ByteUtils;
 import org.apache.solr.util.NumberUtils;
 import org.apache.solr.response.TextResponseWriter;
 
@@ -67,10 +66,11 @@ public class SortableLongField extends F
     return NumberUtils.SortableStr2long(indexedForm);
   }
 
-  @Override
-  public void indexedToReadable(BytesRef input, CharArr out) {
+  public CharsRef indexedToReadable(BytesRef input, CharsRef charsRef) {
     // TODO: this could be more efficient, but the sortable types should be deprecated instead
-    out.write( indexedToReadable(ByteUtils.UTF8toUTF16(input)) );
+    final char[] indexedToReadable = indexedToReadable(input.utf8ToChars(charsRef).toString()).toCharArray();
+    charsRef.copy(indexedToReadable, 0, indexedToReadable.length);
+    return charsRef;
   }
   
   @Override