You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sa...@apache.org on 2011/05/30 16:51:37 UTC

svn commit: r1129205 [5/7] - in /lucene/dev/branches/solr2452: ./ dev-tools/eclipse/ dev-tools/idea/.idea/ dev-tools/idea/lucene/contrib/spellchecker/ dev-tools/idea/modules/suggest/ dev-tools/maven/lucene/contrib/ dev-tools/maven/lucene/contrib/spellc...

Modified: lucene/dev/branches/solr2452/lucene/src/test/org/apache/lucene/util/TestUnicodeUtil.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/lucene/src/test/org/apache/lucene/util/TestUnicodeUtil.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/lucene/src/test/org/apache/lucene/util/TestUnicodeUtil.java (original)
+++ lucene/dev/branches/solr2452/lucene/src/test/org/apache/lucene/util/TestUnicodeUtil.java Mon May 30 14:51:25 2011
@@ -85,37 +85,6 @@ package org.apache.lucene.util;
  */
 
 public class TestUnicodeUtil extends LuceneTestCase {
-  public void testNextValidUTF16String() {
-    // valid UTF-16
-    assertEquals("dogs", UnicodeUtil.nextValidUTF16String("dogs"));
-    assertEquals("dogs\uD802\uDC02", UnicodeUtil
-        .nextValidUTF16String("dogs\uD802\uDC02"));
-    
-    // an illegal combination, where we have not yet enumerated into the supp
-    // plane so we increment to H + \uDC00 (the lowest possible trail surrogate)
-    assertEquals("dogs\uD801\uDC00", UnicodeUtil
-        .nextValidUTF16String("dogs\uD801"));
-    assertEquals("dogs\uD801\uDC00", UnicodeUtil
-        .nextValidUTF16String("dogs\uD801b"));
-    assertEquals("dogs\uD801\uDC00", UnicodeUtil
-        .nextValidUTF16String("dogs\uD801\uD800"));
-    
-    // an illegal combination where we have already enumerated the trail
-    // we must increment the lead and start the trail back at the beginning.
-    assertEquals("dogs\uD802\uDC00", UnicodeUtil
-        .nextValidUTF16String("dogs\uD801\uE001"));
-    
-    // an illegal combination where we have exhausted the supp plane
-    // we must now move to the lower bmp.
-    assertEquals("dogs\uE000", UnicodeUtil
-        .nextValidUTF16String("dogs\uDBFF\uE001"));
-
-    // an unpaired trail surrogate. this is invalid when not preceded by a lead
-    // surrogate. in this case we have to bump to \uE000 (the lowest possible
-    // "upper BMP")
-    assertEquals("dogs\uE000", UnicodeUtil.nextValidUTF16String("dogs\uDC00"));
-    assertEquals("\uE000", UnicodeUtil.nextValidUTF16String("\uDC00dogs"));
-  }
 
   public void testCodePointCount() {
     BytesRef utf8 = new BytesRef(20);
@@ -197,4 +166,19 @@ public class TestUnicodeUtil extends Luc
       assertTrue(rc == -1);
     }
   }
+  
+  public void testUTF8UTF16CharsRef() {
+    for (int i = 0; i < 3989 * RANDOM_MULTIPLIER; i++) {
+      String unicode = _TestUtil.randomRealisticUnicodeString(random);
+      BytesRef ref = new BytesRef(unicode);
+      char[] arr = new char[1 + random.nextInt(100)];
+      int offset = random.nextInt(arr.length);
+      int len = random.nextInt(arr.length - offset);
+      CharsRef cRef = new CharsRef(arr, offset, len);
+      UnicodeUtil.UTF8toUTF16(ref, cRef);
+      assertEquals(cRef.toString(), unicode);
+      assertEquals(cRef, unicode); // CharSeq
+      assertEquals(cRef, ref.utf8ToString()); // CharSeq
+    }
+  }
 }

Modified: lucene/dev/branches/solr2452/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java (original)
+++ lucene/dev/branches/solr2452/modules/analysis/common/src/java/org/apache/lucene/analysis/query/QueryAutoStopWordAnalyzer.java Mon May 30 14:51:25 2011
@@ -24,6 +24,7 @@ import org.apache.lucene.index.MultiFiel
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.analysis.TokenStream;
 import org.apache.lucene.analysis.core.StopFilter;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.Version;
 import org.apache.lucene.util.BytesRef;
 
@@ -143,13 +144,14 @@ public final class QueryAutoStopWordAnal
    */
   public int addStopWords(IndexReader reader, String fieldName, int maxDocFreq) throws IOException {
     HashSet<String> stopWords = new HashSet<String>();
-    Terms terms = MultiFields.getTerms(reader, fieldName);
+    final Terms terms = MultiFields.getTerms(reader, fieldName);
+    final CharsRef spare = new CharsRef();
     if (terms != null) {
-      TermsEnum te = terms.iterator();
+      final TermsEnum te = terms.iterator();
       BytesRef text;
       while ((text = te.next()) != null) {
         if (te.docFreq() > maxDocFreq) {
-          stopWords.add(text.utf8ToString());
+          stopWords.add(text.utf8ToChars(spare).toString());
         }
       }
     }

Modified: lucene/dev/branches/solr2452/modules/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/benchmark/CHANGES.txt?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/benchmark/CHANGES.txt (original)
+++ lucene/dev/branches/solr2452/modules/benchmark/CHANGES.txt Mon May 30 14:51:25 2011
@@ -2,6 +2,9 @@ Lucene Benchmark Contrib Change Log
 
 The Benchmark contrib package contains code for benchmarking Lucene in a variety of ways.
 
+05/25/2011
+  LUCENE-3137: ExtractReuters supports out-dir param suffixed by a slash. (Doron Cohen)
+
 03/31/2011
   Updated ReadTask to the new method for obtaining a top-level deleted docs
   bitset.  Also checking the bitset for null, when there are no deleted docs.

Modified: lucene/dev/branches/solr2452/modules/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/benchmark/build.xml?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/benchmark/build.xml (original)
+++ lucene/dev/branches/solr2452/modules/benchmark/build.xml Mon May 30 14:51:25 2011
@@ -1,4 +1,22 @@
 <?xml version="1.0"?>
+
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
 <project name="benchmark" default="default">
 
     <description>

Modified: lucene/dev/branches/solr2452/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java (original)
+++ lucene/dev/branches/solr2452/modules/benchmark/src/java/org/apache/lucene/benchmark/utils/ExtractReuters.java Mon May 30 14:51:25 2011
@@ -122,17 +122,19 @@ public class ExtractReuters {
 
   public static void main(String[] args) {
     if (args.length != 2) {
-      printUsage();
+      usage("Wrong number of arguments ("+args.length+")");
+      return;
     }
     File reutersDir = new File(args[0]);
     if (!reutersDir.exists()) {
-      printUsage();
+      usage("Cannot find Path to Reuters SGM files ("+reutersDir+")");
       return;
     }
     
     // First, extract to a tmp directory and only if everything succeeds, rename
     // to output directory.
-    File outputDir = new File(args[1] + "-tmp");
+    File outputDir = new File(args[1]);
+    outputDir = new File(outputDir.getAbsolutePath() + "-tmp");
     outputDir.mkdirs();
     ExtractReuters extractor = new ExtractReuters(reutersDir, outputDir);
     extractor.extract();
@@ -140,8 +142,8 @@ public class ExtractReuters {
     outputDir.renameTo(new File(args[1]));
   }
 
-  private static void printUsage() {
-    System.err.println("Usage: java -cp <...> org.apache.lucene.benchmark.utils.ExtractReuters <Path to Reuters SGM files> <Output Path>");
+  private static void usage(String msg) {
+    System.err.println("Usage: "+msg+" :: java -cp <...> org.apache.lucene.benchmark.utils.ExtractReuters <Path to Reuters SGM files> <Output Path>");
   }
   
 }

Modified: lucene/dev/branches/solr2452/modules/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/build.xml?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/build.xml (original)
+++ lucene/dev/branches/solr2452/modules/build.xml Mon May 30 14:51:25 2011
@@ -25,6 +25,7 @@
         <fileset dir="analysis" includes="build.xml" />
         <fileset dir="benchmark" includes="build.xml" />
         <fileset dir="grouping" includes="build.xml" />
+        <fileset dir="suggest" includes="build.xml" />
       </subant>
     </sequential>
   </target>
@@ -35,6 +36,7 @@
         <fileset dir="analysis" includes="build.xml" />
         <fileset dir="benchmark" includes="build.xml" />
         <fileset dir="grouping" includes="build.xml" />
+        <fileset dir="suggest" includes="build.xml" />
       </subant>
     </sequential>
   </target>
@@ -45,6 +47,7 @@
         <fileset dir="analysis" includes="build.xml" />
         <fileset dir="benchmark" includes="build.xml" />
         <fileset dir="grouping" includes="build.xml" />
+        <fileset dir="suggest" includes="build.xml" />
       </subant>
     </sequential>
   </target>
@@ -55,6 +58,7 @@
         <fileset dir="analysis" includes="build.xml" />
         <fileset dir="benchmark" includes="build.xml" />
         <fileset dir="grouping" includes="build.xml" />
+        <fileset dir="suggest" includes="build.xml" />
       </subant>
     </sequential>
   </target>
@@ -66,6 +70,7 @@
         <fileset dir="analysis" includes="build.xml" />
         <fileset dir="benchmark" includes="build.xml" />
         <fileset dir="grouping" includes="build.xml" />
+        <fileset dir="suggest" includes="build.xml" />
       </subant>
     </sequential>
   </target>
@@ -96,6 +101,7 @@
         <fileset dir="analysis" includes="build.xml" />
         <fileset dir="benchmark" includes="build.xml" />
         <fileset dir="grouping" includes="build.xml" />
+        <fileset dir="suggest" includes="build.xml" />
       </subant>
     </sequential>
   </target>

Modified: lucene/dev/branches/solr2452/modules/grouping/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/grouping/build.xml?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/grouping/build.xml (original)
+++ lucene/dev/branches/solr2452/modules/grouping/build.xml Mon May 30 14:51:25 2011
@@ -1,4 +1,22 @@
 <?xml version="1.0"?>
+
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+
+        http://www.apache.org/licenses/LICENSE-2.0
+
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
 <project name="grouping" default="default">
     <description>
         Collectors for grouping search results

Modified: lucene/dev/branches/solr2452/modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java (original)
+++ lucene/dev/branches/solr2452/modules/grouping/src/java/org/apache/lucene/search/grouping/AllGroupsCollector.java Mon May 30 14:51:25 2011
@@ -88,7 +88,7 @@ public class AllGroupsCollector extends 
     int key = index.getOrd(doc);
     if (!ordSet.exists(key)) {
       ordSet.put(key);
-      BytesRef term = key == 0 ? null : index.getTerm(doc, new BytesRef());
+      BytesRef term = key == 0 ? null : index.lookup(key, new BytesRef());
       groups.add(term);
     }
   }

Copied: lucene/dev/branches/solr2452/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java (from r1128798, lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java)
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java?p2=lucene/dev/branches/solr2452/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java&p1=lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java&r1=1128798&r2=1129205&rev=1129205&view=diff
==============================================================================
--- lucene/dev/trunk/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java (original)
+++ lucene/dev/branches/solr2452/modules/suggest/src/java/org/apache/lucene/search/suggest/fst/FSTLookup.java Mon May 30 14:51:25 2011
@@ -15,11 +15,11 @@ import java.util.List;
 
 import org.apache.lucene.util.IOUtils;
 import org.apache.lucene.util.IntsRef;
-import org.apache.lucene.util.automaton.fst.Builder;
-import org.apache.lucene.util.automaton.fst.FST;
-import org.apache.lucene.util.automaton.fst.FST.Arc;
-import org.apache.lucene.util.automaton.fst.NoOutputs;
-import org.apache.lucene.util.automaton.fst.Outputs;
+import org.apache.lucene.util.fst.Builder;
+import org.apache.lucene.util.fst.FST;
+import org.apache.lucene.util.fst.FST.Arc;
+import org.apache.lucene.util.fst.NoOutputs;
+import org.apache.lucene.util.fst.Outputs;
 
 import org.apache.lucene.search.suggest.Lookup;
 import org.apache.lucene.search.suggest.tst.TSTLookup;
@@ -510,7 +510,7 @@ public class FSTLookup extends Lookup {
       this.automaton = new FST<Object>(new InputStreamDataInput(is), NoOutputs.getSingleton());
       cacheRootArcs();
     } finally {
-      IOUtils.closeSafely(is);
+      IOUtils.closeSafely(false, is);
     }
     return true;
   }
@@ -532,7 +532,7 @@ public class FSTLookup extends Lookup {
     try {
       this.automaton.save(new OutputStreamDataOutput(os));
     } finally {
-      IOUtils.closeSafely(os);
+      IOUtils.closeSafely(false, os);
     }
 
     return true;

Modified: lucene/dev/branches/solr2452/solr/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/CHANGES.txt?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/CHANGES.txt (original)
+++ lucene/dev/branches/solr2452/solr/CHANGES.txt Mon May 30 14:51:25 2011
@@ -31,7 +31,7 @@ Velocity 1.6.4 and Velocity Tools 2.0
 Apache UIMA 2.3.1-SNAPSHOT
 
 
-Upgrading from Solr 3.2-dev
+Upgrading from Solr 3.3-dev
 ----------------------
 
 * The Lucene index format has changed and as a result, once you upgrade, 
@@ -144,6 +144,11 @@ New Features
     fq={!join from=name to=parent}eyes:blue
   (yonik)
 
+* SOLR-1942: Added the ability to select codec per fieldType in schema.xml
+  as well as support custom CodecProviders in solrconfig.xml.
+  NOTE: IndexReaderFactory now has a codecProvider that should be passed
+  to IndexReader.open (in the case you have a custom IndexReaderFactory).
+  (simonw via rmuir)
 
 
 Optimizations
@@ -246,15 +251,27 @@ Other Changes
   variance in asserting score comparisons in unit tests.
   (David Smiley, Chris Hostetter)
 
+* LUCENE-2995: Moved some spellchecker and suggest APIs to modules/suggest:
+  HighFrequencyDictionary, SortedIterator, TermFreqIterator, and the
+  suggester APIs and implementations. (rmuir)
+
 Documentation
 ----------------------
 
 * SOLR-2232: Improved README info on solr.solr.home in examples
   (Eric Pugh and hossman)
 
-* LUCENE-3006: Building javadocs will fail on warnings by default.  Override with -Dfailonjavadocwarning=false (sarowe, gsingers)
+======================= 3.x (not yet released) ================
+
+Bug Fixes
+----------------------
 
-==================  3.2.0-dev ==================
+* SOLR-2519: Improve text_* fieldTypes in example schema.xml: improve
+  cross-language defaults for text_general; break out separate
+  English-specific fieldTypes (Jan Høydahl, hossman, Robert Muir,
+  yonik, Mike McCandless)
+
+==================  3.2.0  ==================
 Versions of Major Components
 ---------------------
 Apache Lucene trunk
@@ -333,6 +350,12 @@ Bug Fixes
   in strings since those characters are not valid in javascript strings
   (although they are valid in JSON strings).  (yonik)
 
+* SOLR-2536: Add ReloadCacheRequestHandler to fix ExternalFileField bug (if reopenReaders
+  set to true and no index segments have been changed, commit cannot trigger reload
+  external file). (koji)
+
+* SOLR-2539: VectorValueSource.floatVal incorrectly used byteVal on sub-sources.
+  (Tom Liu via yonik)
 
 Other Changes
 ----------------------
@@ -346,6 +369,9 @@ Other Changes
 Build
 ----------------------
 
+* LUCENE-3006: Building javadocs will fail on warnings by default.  Override with -Dfailonjavadocwarning=false (sarowe, gsingers)
+
+
 Documentation
 ----------------------
 

Modified: lucene/dev/branches/solr2452/solr/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/build.xml?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/build.xml (original)
+++ lucene/dev/branches/solr2452/solr/build.xml Mon May 30 14:51:25 2011
@@ -47,9 +47,9 @@
  
   <!-- hackish we have to call init/clover here, but solr core depends upon solrj? -->
   <target name="compile-core" depends="init, clover, compile-analyzers-common, 
-        compile-analyzers-phonetic, compile-highlighter, compile-memory, compile-misc, 
-        compile-queries, compile-spatial, compile-spellchecker, compile-solrj, 
-        common.compile-core, compile-webapp" 
+        compile-analyzers-phonetic, compile-suggest, compile-highlighter,
+        compile-memory, compile-misc, compile-queries, compile-spatial,
+        compile-solrj, common.compile-core, compile-webapp"
   	  unless="solr-core.compiled">
   </target>
  
@@ -147,6 +147,9 @@
   <target name="compile-analyzers-phonetic" unless="analyzers-phonetic.uptodate">
   	<ant dir="${common.dir}/../modules/analysis/phonetic" target="default" inheritAll="false"/>
   </target>
+  <target name="compile-suggest" unless="suggest.uptodate">
+  	<ant dir="${common.dir}/../modules/suggest" target="default" inheritAll="false"/>
+  </target>
   <target name="compile-highlighter" unless="highlighter.uptodate">
   	<ant dir="${common.dir}/contrib/highlighter" target="default" inheritAll="false"/>
   </target>
@@ -162,8 +165,5 @@
   <target name="compile-spatial" unless="spatial.uptodate">
   	<ant dir="${common.dir}/contrib/spatial" target="default" inheritAll="false"/>
   </target>
-  <target name="compile-spellchecker" unless="spellchecker.uptodate">
-  	<ant dir="${common.dir}/contrib/spellchecker" target="default" inheritAll="false"/>
-  </target>
 
 </project>

Modified: lucene/dev/branches/solr2452/solr/common-build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/common-build.xml?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/common-build.xml (original)
+++ lucene/dev/branches/solr2452/solr/common-build.xml Mon May 30 14:51:25 2011
@@ -57,13 +57,14 @@
         property="analyzers-common.uptodate" classpath.property="analyzers-common.jar"/>
   <module-uptodate name="analysis/phonetic" jarfile="${common.dir}/../modules/analysis/build/phonetic/lucene-analyzers-phonetic-${version}.jar"
         property="analyzers-phonetic.uptodate" classpath.property="analyzers-phonetic.jar"/>
+  <module-uptodate name="suggest" jarfile="${common.dir}/../modules/suggest/build/lucene-suggest-${version}.jar"
+        property="suggest.uptodate" classpath.property="suggest.jar"/>
   <contrib-uptodate name="highlighter" property="highlighter.uptodate" classpath.property="highlighter.jar"/>
   <contrib-uptodate name="memory" property="memory.uptodate" classpath.property="memory.jar"/>
   <contrib-uptodate name="misc" property="misc.uptodate" classpath.property="misc.jar"/>
   <contrib-uptodate name="queries" property="queries.uptodate" classpath.property="queries.jar"/>
   <contrib-uptodate name="spatial" property="spatial.uptodate" classpath.property="spatial.jar"/>
-  <contrib-uptodate name="spellchecker" property="spellchecker.uptodate" classpath.property="spellchecker.jar"/>
-	
+
   <path id="solr.base.classpath">
   	<pathelement path="${analyzers-common.jar}"/>
   	<pathelement path="${analyzers-phonetic.jar}"/>
@@ -72,7 +73,7 @@
   	<pathelement path="${misc.jar}"/>
   	<pathelement path="${queries.jar}"/>
   	<pathelement path="${spatial.jar}"/>
-  	<pathelement path="${spellchecker.jar}"/>
+  	<pathelement path="${suggest.jar}"/>
   	<pathelement location="${common-solr.dir}/build/classes/solrj"/>
   	<pathelement location="${common-solr.dir}/build/classes/webapp"/>
   	<pathelement location="${common-solr.dir}/build/classes/java"/>

Modified: lucene/dev/branches/solr2452/solr/contrib/analysis-extras/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/contrib/analysis-extras/build.xml?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/contrib/analysis-extras/build.xml (original)
+++ lucene/dev/branches/solr2452/solr/contrib/analysis-extras/build.xml Mon May 30 14:51:25 2011
@@ -23,8 +23,8 @@
     Additional analysis components
   </description>
 
-  <property name="src.dir" location="src/main/java"/>
-  <property name="tests.src.dir" location="src/test/java"/>
+  <property name="src.dir" location="src/java"/>
+  <property name="tests.src.dir" location="src/test"/>
   <property name="tests.userdir" location="src/test-files"/>
 
   <import file="../contrib-build.xml"/>

Modified: lucene/dev/branches/solr2452/solr/contrib/clustering/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/contrib/clustering/build.xml?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/contrib/clustering/build.xml (original)
+++ lucene/dev/branches/solr2452/solr/contrib/clustering/build.xml Mon May 30 14:51:25 2011
@@ -23,9 +23,9 @@
     Clustering Integraton
   </description>
 
-  <property name="src.dir" location="src/main/java"/>
-  <property name="tests.src.dir" location="src/test/java"/>
-  <property name="tests.userdir" location="src/test/resources"/>
+  <property name="src.dir" location="src/java"/>
+  <property name="tests.src.dir" location="src/test"/>
+  <property name="tests.userdir" location="src/test-files"/>
 
   <import file="../contrib-build.xml"/>
 </project>

Modified: lucene/dev/branches/solr2452/solr/contrib/dataimporthandler-extras/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/contrib/dataimporthandler-extras/build.xml?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/contrib/dataimporthandler-extras/build.xml (original)
+++ lucene/dev/branches/solr2452/solr/contrib/dataimporthandler-extras/build.xml Mon May 30 14:51:25 2011
@@ -23,9 +23,9 @@
     Data Import Handler Extras
   </description>
   
-  <property name="src.dir" location="src/main/java"/>
-  <property name="tests.src.dir" location="src/test/java"/>
-  <property name="tests.userdir" location="src/test/resources"/>
+  <property name="src.dir" location="src/java"/>
+  <property name="tests.src.dir" location="src/test"/>
+  <property name="tests.userdir" location="src/test-files"/>
 
   <import file="../contrib-build.xml"/>
 

Modified: lucene/dev/branches/solr2452/solr/contrib/dataimporthandler/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/contrib/dataimporthandler/build.xml?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/contrib/dataimporthandler/build.xml (original)
+++ lucene/dev/branches/solr2452/solr/contrib/dataimporthandler/build.xml Mon May 30 14:51:25 2011
@@ -23,9 +23,9 @@
     Data Import Handler
   </description>
 
-  <property name="src.dir" location="src/main/java"/>
-  <property name="tests.src.dir" location="src/test/java"/>
-  <property name="tests.userdir" location="src/test/resources"/>
+  <property name="src.dir" location="src/java"/>
+  <property name="tests.src.dir" location="src/test"/>
+  <property name="tests.userdir" location="src/test-files"/>
 
   <import file="../contrib-build.xml"/>
   

Modified: lucene/dev/branches/solr2452/solr/contrib/extraction/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/contrib/extraction/build.xml?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/contrib/extraction/build.xml (original)
+++ lucene/dev/branches/solr2452/solr/contrib/extraction/build.xml Mon May 30 14:51:25 2011
@@ -23,9 +23,9 @@
     Solr Integration with Tika for extracting content from binary file formats such as Microsoft Word and Adobe PDF.
   </description>
 
-  <property name="src.dir" location="src/main/java"/>
-  <property name="tests.src.dir" location="src/test/java"/>
-  <property name="tests.userdir" location="src/test/resources"/>
+  <property name="src.dir" location="src/java"/>
+  <property name="tests.src.dir" location="src/test"/>
+  <property name="tests.userdir" location="src/test-files"/>
 
   <import file="../contrib-build.xml"/>
 

Modified: lucene/dev/branches/solr2452/solr/contrib/uima/build.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/contrib/uima/build.xml?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/contrib/uima/build.xml (original)
+++ lucene/dev/branches/solr2452/solr/contrib/uima/build.xml Mon May 30 14:51:25 2011
@@ -23,9 +23,9 @@
     Solr Integration with UIMA for extracting metadata from arbitrary (text) fields and enrich document with features extracted from UIMA types (language, sentences, concepts, named entities, etc.)
   </description>
 
-  <property name="src.dir" location="src/main/java"/>
-  <property name="tests.src.dir" location="src/test/java"/>
-  <property name="tests.userdir" location="src/test/resources"/>
+  <property name="src.dir" location="src/java"/>
+  <property name="tests.src.dir" location="src/test"/>
+  <property name="tests.userdir" location="src/test-files"/>
 
   <import file="../contrib-build.xml"/>
 

Modified: lucene/dev/branches/solr2452/solr/example/solr/conf/schema.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/example/solr/conf/schema.xml?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/example/solr/conf/schema.xml (original)
+++ lucene/dev/branches/solr2452/solr/example/solr/conf/schema.xml Mon May 30 14:51:25 2011
@@ -45,15 +45,16 @@
     that avoids logging every request
 -->
 
-<schema name="example" version="1.3">
+<schema name="example" version="1.4">
   <!-- attribute "name" is the name of this schema and is only used for display purposes.
        Applications should change this to reflect the nature of the search collection.
-       version="1.2" is Solr's version number for the schema syntax and semantics.  It should
+       version="1.4" is Solr's version number for the schema syntax and semantics.  It should
        not normally be changed by applications.
        1.0: multiValued attribute did not exist, all fields are multiValued by nature
        1.1: multiValued attribute introduced, false by default 
        1.2: omitTermFreqAndPositions attribute introduced, true by default except for text fields.
        1.3: removed optional field compress feature
+       1.4: default auto-phrase (QueryParser feature) to off
      -->
 
   <types>
@@ -190,16 +191,87 @@
       </analyzer>
     </fieldType>
 
-    <!-- A text field that uses WordDelimiterFilter to enable splitting and matching of
-        words on case-change, alpha numeric boundaries, and non-alphanumeric chars,
-        so that a query of "wifi" or "wi fi" could match a document containing "Wi-Fi".
-        Synonyms and stopwords are customized by external files, and stemming is enabled.
-        The attribute autoGeneratePhraseQueries="true" (the default) causes words that get split to
-        form phrase queries. For example, WordDelimiterFilter splitting text:pdp-11 will cause the parser
-        to generate text:"pdp 11" rather than (text:PDP OR text:11).
-        NOTE: autoGeneratePhraseQueries="true" tends to not work well for non whitespace delimited languages.
+    <!-- A general text field that has reasonable, generic
+         cross-language defaults: it tokenizes with StandardTokenizer,
+	 removes stop words from case-insensitive "stopwords.txt"
+	 (empty by default), and down cases.  At query time only, it
+	 also applies synonyms. -->
+    <fieldType name="text_general" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.LowerCaseFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- A text field with defaults appropriate for English: it
+         tokenizes with StandardTokenizer, removes English stop words
+         (stopwords_en.txt), down cases, protects words from protwords.txt, and
+         finally applies Porter's stemming.  The query time analyzer
+         also applies synonyms from synonyms.txt. -->
+    <fieldType name="text_en" class="solr.TextField" positionIncrementGap="100">
+      <analyzer type="index">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <!-- in this example, we will only use synonyms at query time
+        <filter class="solr.SynonymFilterFactory" synonyms="index_synonyms.txt" ignoreCase="true" expand="false"/>
+        -->
+        <!-- Case insensitive stop word removal.
+          add enablePositionIncrements=true in both the index and query
+          analyzers to leave a 'gap' for more accurate phrase queries.
+        -->
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords_en.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.LowerCaseFilterFactory"/>
+	<filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+	<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
+	-->
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+      <analyzer type="query">
+        <tokenizer class="solr.StandardTokenizerFactory"/>
+        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
+        <filter class="solr.StopFilterFactory"
+                ignoreCase="true"
+                words="stopwords_en.txt"
+                enablePositionIncrements="true"
+                />
+        <filter class="solr.LowerCaseFilterFactory"/>
+	<filter class="solr.EnglishPossessiveFilterFactory"/>
+        <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
+	<!-- Optionally you may want to use this less aggressive stemmer instead of PorterStemFilterFactory:
+        <filter class="solr.EnglishMinimalStemFilterFactory"/>
+	-->
+        <filter class="solr.PorterStemFilterFactory"/>
+      </analyzer>
+    </fieldType>
+
+    <!-- A text field with defaults appropriate for English, plus
+	 aggressive word-splitting and autophrase features enabled.
+	 This field is just like text_en, except it adds
+	 WordDelimiterFilter to enable splitting and matching of
+	 words on case-change, alpha numeric boundaries, and
+	 non-alphanumeric chars.  This means certain compound word
+	 cases will work, for example query "wi fi" will match
+	 document "WiFi" or "wi-fi".  However, other cases will still
+	 not match, for example if the query is "wifi" and the
+	 document is "wi fi" or if the query is "wi-fi" and the
+	 document is "wifi".
         -->
-    <fieldType name="text" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
+    <fieldType name="text_en_splitting" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
       <analyzer type="index">
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
         <!-- in this example, we will only use synonyms at query time
@@ -211,7 +283,7 @@
         -->
         <filter class="solr.StopFilterFactory"
                 ignoreCase="true"
-                words="stopwords.txt"
+                words="stopwords_en.txt"
                 enablePositionIncrements="true"
                 />
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="1"/>
@@ -224,7 +296,7 @@
         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
         <filter class="solr.StopFilterFactory"
                 ignoreCase="true"
-                words="stopwords.txt"
+                words="stopwords_en.txt"
                 enablePositionIncrements="true"
                 />
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="1"/>
@@ -234,14 +306,13 @@
       </analyzer>
     </fieldType>
 
-
     <!-- Less flexible matching, but less false matches.  Probably not ideal for product names,
          but may be good for SKUs.  Can insert dashes in the wrong place and still match. -->
-    <fieldType name="textTight" class="solr.TextField" positionIncrementGap="100" >
+    <fieldType name="text_en_splitting_tight" class="solr.TextField" positionIncrementGap="100" autoGeneratePhraseQueries="true">
       <analyzer>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="false"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords_en.txt"/>
         <filter class="solr.WordDelimiterFilterFactory" generateWordParts="0" generateNumberParts="0" catenateWords="1" catenateNumbers="1" catenateAll="0"/>
         <filter class="solr.LowerCaseFilterFactory"/>
         <filter class="solr.KeywordMarkerFilterFactory" protected="protwords.txt"/>
@@ -252,57 +323,27 @@
       </analyzer>
     </fieldType>
 
-
-    <!-- A general unstemmed text field - good if one does not know the language of the field -->
-    <fieldType name="textgen" class="solr.TextField" positionIncrementGap="100">
-      <analyzer type="index">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-      </analyzer>
-      <analyzer type="query">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
-        <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory"
-                ignoreCase="true"
-                words="stopwords.txt"
-                enablePositionIncrements="true"
-                />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
-        <filter class="solr.LowerCaseFilterFactory"/>
-      </analyzer>
-    </fieldType>
-
-
-    <!-- A general unstemmed text field that indexes tokens normally and also
-         reversed (via ReversedWildcardFilterFactory), to enable more efficient 
-	 leading wildcard queries. -->
-    <fieldType name="text_rev" class="solr.TextField" positionIncrementGap="100">
+    <!-- Just like text_general except it reverses the characters of
+	 each token, to enable more efficient leading wildcard queries. -->
+    <fieldType name="text_general_rev" class="solr.TextField" positionIncrementGap="100">
       <analyzer type="index">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="1" catenateNumbers="1" catenateAll="0" splitOnCaseChange="0"/>
         <filter class="solr.LowerCaseFilterFactory"/>
         <filter class="solr.ReversedWildcardFilterFactory" withOriginal="true"
            maxPosAsterisk="3" maxPosQuestion="2" maxFractionAsterisk="0.33"/>
       </analyzer>
       <analyzer type="query">
-        <tokenizer class="solr.WhitespaceTokenizerFactory"/>
+        <tokenizer class="solr.StandardTokenizerFactory"/>
         <filter class="solr.SynonymFilterFactory" synonyms="synonyms.txt" ignoreCase="true" expand="true"/>
-        <filter class="solr.StopFilterFactory"
-                ignoreCase="true"
-                words="stopwords.txt"
-                enablePositionIncrements="true"
-                />
-        <filter class="solr.WordDelimiterFilterFactory" generateWordParts="1" generateNumberParts="1" catenateWords="0" catenateNumbers="0" catenateAll="0" splitOnCaseChange="0"/>
+        <filter class="solr.StopFilterFactory" ignoreCase="true" words="stopwords.txt" enablePositionIncrements="true" />
         <filter class="solr.LowerCaseFilterFactory"/>
       </analyzer>
     </fieldType>
 
     <!-- charFilter + WhitespaceTokenizer  -->
     <!--
-    <fieldType name="textCharNorm" class="solr.TextField" positionIncrementGap="100" >
+    <fieldType name="text_char_norm" class="solr.TextField" positionIncrementGap="100" >
       <analyzer>
         <charFilter class="solr.MappingCharFilterFactory" mapping="mapping-ISOLatin1Accent.txt"/>
         <tokenizer class="solr.WhitespaceTokenizerFactory"/>
@@ -436,13 +477,13 @@
    -->
 
    <field name="id" type="string" indexed="true" stored="true" required="true" /> 
-   <field name="sku" type="textTight" indexed="true" stored="true" omitNorms="true"/>
-   <field name="name" type="textgen" indexed="true" stored="true"/>
+   <field name="sku" type="text_en_splitting_tight" indexed="true" stored="true" omitNorms="true"/>
+   <field name="name" type="text_general" indexed="true" stored="true"/>
    <field name="alphaNameSort" type="alphaOnlySort" indexed="true" stored="false"/>
-   <field name="manu" type="textgen" indexed="true" stored="true" omitNorms="true"/>
+   <field name="manu" type="text_general" indexed="true" stored="true" omitNorms="true"/>
    <field name="cat" type="string" indexed="true" stored="true" multiValued="true"/>
-   <field name="features" type="text" indexed="true" stored="true" multiValued="true"/>
-   <field name="includes" type="text" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
+   <field name="features" type="text_general" indexed="true" stored="true" multiValued="true"/>
+   <field name="includes" type="text_general" indexed="true" stored="true" termVectors="true" termPositions="true" termOffsets="true" />
 
    <field name="weight" type="float" indexed="true" stored="true"/>
    <field name="price"  type="float" indexed="true" stored="true"/>
@@ -460,13 +501,13 @@
      Some fields are multiValued only because Tika currently may return
      multiple values for them.
    -->
-   <field name="title" type="text" indexed="true" stored="true" multiValued="true"/>
-   <field name="subject" type="text" indexed="true" stored="true"/>
-   <field name="description" type="text" indexed="true" stored="true"/>
-   <field name="comments" type="text" indexed="true" stored="true"/>
-   <field name="author" type="textgen" indexed="true" stored="true"/>
-   <field name="keywords" type="textgen" indexed="true" stored="true"/>
-   <field name="category" type="textgen" indexed="true" stored="true"/>
+   <field name="title" type="text_general" indexed="true" stored="true" multiValued="true"/>
+   <field name="subject" type="text_general" indexed="true" stored="true"/>
+   <field name="description" type="text_general" indexed="true" stored="true"/>
+   <field name="comments" type="text_general" indexed="true" stored="true"/>
+   <field name="author" type="text_general" indexed="true" stored="true"/>
+   <field name="keywords" type="text_general" indexed="true" stored="true"/>
+   <field name="category" type="text_general" indexed="true" stored="true"/>
    <field name="content_type" type="string" indexed="true" stored="true" multiValued="true"/>
    <field name="last_modified" type="date" indexed="true" stored="true"/>
    <field name="links" type="string" indexed="true" stored="true" multiValued="true"/>
@@ -474,11 +515,11 @@
 
    <!-- catchall field, containing all other searchable text fields (implemented
         via copyField further on in this schema  -->
-   <field name="text" type="text" indexed="true" stored="false" multiValued="true"/>
+   <field name="text" type="text_general" indexed="true" stored="false" multiValued="true"/>
 
    <!-- catchall text field that indexes tokens both normally and in reverse for efficient
         leading wildcard queries. -->
-   <field name="text_rev" type="text_rev" indexed="true" stored="false" multiValued="true"/>
+   <field name="text_rev" type="text_general_rev" indexed="true" stored="false" multiValued="true"/>
 
    <!-- non-tokenized version of manufacturer to make it easier to sort or group
         results by manufacturer.  copied from "manu" via copyField -->
@@ -504,8 +545,8 @@
    <dynamicField name="*_i"  type="int"    indexed="true"  stored="true"/>
    <dynamicField name="*_s"  type="string"  indexed="true"  stored="true"/>
    <dynamicField name="*_l"  type="long"   indexed="true"  stored="true"/>
-   <dynamicField name="*_t"  type="text"    indexed="true"  stored="true"/>
-   <dynamicField name="*_txt" type="text"    indexed="true"  stored="true" multiValued="true"/>
+   <dynamicField name="*_t"  type="text_general"    indexed="true"  stored="true"/>
+   <dynamicField name="*_txt" type="text_general"    indexed="true"  stored="true" multiValued="true"/>
    <dynamicField name="*_b"  type="boolean" indexed="true"  stored="true"/>
    <dynamicField name="*_f"  type="float"  indexed="true"  stored="true"/>
    <dynamicField name="*_d"  type="double" indexed="true"  stored="true"/>
@@ -526,7 +567,7 @@
    <dynamicField name="*_pi"  type="pint"    indexed="true"  stored="true"/>
 
    <dynamicField name="ignored_*" type="ignored" multiValued="true"/>
-   <dynamicField name="attr_*" type="textgen" indexed="true" stored="true" multiValued="true"/>
+   <dynamicField name="attr_*" type="text_general" indexed="true" stored="true" multiValued="true"/>
 
    <dynamicField name="random_*" type="random" />
 

Modified: lucene/dev/branches/solr2452/solr/example/solr/conf/solrconfig.xml
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/example/solr/conf/solrconfig.xml?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/example/solr/conf/solrconfig.xml (original)
+++ lucene/dev/branches/solr2452/solr/example/solr/conf/solrconfig.xml Mon May 30 14:51:25 2011
@@ -237,7 +237,7 @@
          of detailed information when indexing.
 
          Setting The value to true will instruct the underlying Lucene
-         IndexWriter to write it's debugging info the specified file
+         IndexWriter to write its debugging info the specified file
       -->
      <infoStream file="INFOSTREAM.txt">false</infoStream> 
 

Modified: lucene/dev/branches/solr2452/solr/example/solr/conf/stopwords.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/example/solr/conf/stopwords.txt?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/example/solr/conf/stopwords.txt (original)
+++ lucene/dev/branches/solr2452/solr/example/solr/conf/stopwords.txt Mon May 30 14:51:25 2011
@@ -12,47 +12,3 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
-#-----------------------------------------------------------------------
-# a couple of test stopwords to test that the words are really being
-# configured from this file:
-stopworda
-stopwordb
-
-#Standard english stop words taken from Lucene's StopAnalyzer
-a
-an
-and
-are
-as
-at
-be
-but
-by
-for
-if
-in
-into
-is
-it
-no
-not
-of
-on
-or
-s
-such
-t
-that
-the
-their
-then
-there
-these
-they
-this
-to
-was
-will
-with
-

Modified: lucene/dev/branches/solr2452/solr/site/skin/basic.css
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/site/skin/basic.css?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/site/skin/basic.css (original)
+++ lucene/dev/branches/solr2452/solr/site/skin/basic.css Mon May 30 14:51:25 2011
@@ -163,4 +163,4 @@ p {
 .codefrag {
   font-family: "Courier New", Courier, monospace;
   font-size: 110%;
-}
+}
\ No newline at end of file

Modified: lucene/dev/branches/solr2452/solr/site/skin/print.css
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/site/skin/print.css?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/site/skin/print.css (original)
+++ lucene/dev/branches/solr2452/solr/site/skin/print.css Mon May 30 14:51:25 2011
@@ -51,4 +51,4 @@ a:link, a:visited {
 
 acronym {
   border: 0;
-}
+}
\ No newline at end of file

Modified: lucene/dev/branches/solr2452/solr/site/skin/profile.css
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/site/skin/profile.css?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/site/skin/profile.css (original)
+++ lucene/dev/branches/solr2452/solr/site/skin/profile.css Mon May 30 14:51:25 2011
@@ -172,4 +172,4 @@ a:hover { color:#6587ff} 
     }
       
     
-  
+  
\ No newline at end of file

Modified: lucene/dev/branches/solr2452/solr/site/skin/screen.css
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/site/skin/screen.css?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/site/skin/screen.css (original)
+++ lucene/dev/branches/solr2452/solr/site/skin/screen.css Mon May 30 14:51:25 2011
@@ -584,4 +584,4 @@ p.instruction {
   list-style-image: url('../images/instruction_arrow.png');
   list-style-position: outside;
   margin-left: 2em;
-} 
+} 
\ No newline at end of file

Modified: lucene/dev/branches/solr2452/solr/site/tutorial.html
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/site/tutorial.html?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/site/tutorial.html (original)
+++ lucene/dev/branches/solr2452/solr/site/tutorial.html Mon May 30 14:51:25 2011
@@ -474,9 +474,9 @@ SimplePostTool: COMMITting Solr index ch
 <p>
 You may have noticed that even though the file <span class="codefrag">solr.xml</span> has now
 been POSTed to the server twice, you still only get 1 result when searching for
-"solr".  This is because the example schema.xml specifies a "uniqueKey" field
+"solr".  This is because the example <span class="codefrag">schema.xml</span> specifies a "<span class="codefrag">uniqueKey</span>" field
 called "<span class="codefrag">id</span>".  Whenever you POST instructions to Solr to add a
-document with the same value for the uniqueKey as an existing document, it
+document with the same value for the <span class="codefrag">uniqueKey</span> as an existing document, it
 automatically replaces it for you.  You can see that that has happened by
 looking at the values for <span class="codefrag">numDocs</span> and <span class="codefrag">maxDoc</span> in the
 "CORE"/searcher section of the statistics page...  </p>
@@ -487,20 +487,20 @@ looking at the values for <span class="c
 </p>
 <p>
   
-<strong>numDocs</strong> represents the number of searchable documents in the
+<strong><span class="codefrag">numDocs</span></strong> represents the number of searchable documents in the
   index (and will be larger than the number of XML files since some files
-  contained more than one <span class="codefrag">&lt;doc&gt;</span>). <strong>maxDoc</strong>
-  may be larger as the maxDoc count includes logically deleted documents that
+  contained more than one <span class="codefrag">&lt;doc&gt;</span>). <strong><span class="codefrag">maxDoc</span></strong>
+  may be larger as the <span class="codefrag">maxDoc</span> count includes logically deleted documents that
   have not yet been removed from the index. You can re-post the sample XML
-  files over and over again as much as you want and numDocs will never
-  increase,because the new documents will constantly be replacing the old.
+  files over and over again as much as you want and <span class="codefrag">numDocs</span> will never
+  increase, because the new documents will constantly be replacing the old.
 </p>
 <p>
 Go ahead and edit the existing XML files to change some of the data, and re-run
 the <span class="codefrag">java -jar post.jar</span> command, you'll see your changes reflected
 in subsequent searches.
 </p>
-<a name="N1011B"></a><a name="Deleting+Data"></a>
+<a name="N1012C"></a><a name="Deleting+Data"></a>
 <h3 class="boxed">Deleting Data</h3>
 <p>You can delete data by POSTing a delete command to the update URL and specifying the value
       of the document's unique key field, or a query that matches multiple documents (be careful with that one!).  Since these commands
@@ -511,7 +511,7 @@ in subsequent searches.
 <p>Now if you go to the <a href="http://localhost:8983/solr/admin/stats.jsp">statistics</a> page and scroll down
        to the UPDATE_HANDLERS section and verify that "<span class="codefrag">deletesById : 1</span>"</p>
 <p>If you search for <a href="http://localhost:8983/solr/select?q=id:SP2514N">id:SP2514N</a> it will still be found,
-       because index changes are not visible until, and a new searcher is opened.  To cause
+       because index changes are not visible until changes are committed and a new searcher is opened.  To cause
        this to happen, send a commit command to Solr (post.jar does this for you by default):</p>
 <pre class="code">java -jar post.jar</pre>
 <p>Now re-execute the previous search and verify that no matching documents are found.  Also revisit the
@@ -520,7 +520,7 @@ in subsequent searches.
       <a href="http://localhost:8983/solr/select?q=name:DDR&fl=name">DDR</a> in the name:</p>
 <pre class="code">java -Ddata=args -jar post.jar "&lt;delete&gt;&lt;query&gt;name:DDR&lt;/query&gt;&lt;/delete&gt;"</pre>
 <p>Commit can be an expensive operation so it's best to make many changes to an index in a batch and
-      then send the commit command at the end.  There is also an optimize command that does the same thing as commit,
+      then send the <span class="codefrag">commit</span> command at the end.  There is also an <span class="codefrag">optimize</span> command that does the same thing as <span class="codefrag">commit</span>,
       in addition to merging all index segments into a single segment, making it faster to search and causing any
       deleted documents to be removed.  All of the update commands are documented <a href="http://wiki.apache.org/solr/UpdateXmlMessages">here</a>.
     </p>
@@ -529,14 +529,14 @@ in subsequent searches.
 </div>
 
 
-<a name="N10161"></a><a name="Querying+Data"></a>
+<a name="N1017B"></a><a name="Querying+Data"></a>
 <h2 class="boxed">Querying Data</h2>
 <div class="section">
 <p>
-    Searches are done via HTTP GET on the select URL with the query string in the q parameter.
+    Searches are done via HTTP GET on the <span class="codefrag">select</span> URL with the query string in the <span class="codefrag">q</span> parameter.
     You can pass a number of optional <a href="http://wiki.apache.org/solr/StandardRequestHandler">request parameters</a>
-    to the request handler to control what information is returned.  For example, you can use the "fl" parameter
-    to control what stored fields are returned, and if the relevancy score is returned...
+    to the request handler to control what information is returned.  For example, you can use the "<span class="codefrag">fl</span>" parameter
+    to control what stored fields are returned, and if the relevancy score is returned:
   </p>
 <ul>
       
@@ -558,13 +558,13 @@ in subsequent searches.
 </ul>
 <p>
     Solr provides a <a href="http://localhost:8983/solr/admin/form.jsp">query form</a> within the web admin interface
-    that allows setting the various request parameters and is useful when trying out or debugging queries.
+    that allows setting the various request parameters and is useful when testing or debugging queries.
   </p>
-<a name="N10196"></a><a name="Sorting"></a>
+<a name="N101B9"></a><a name="Sorting"></a>
 <h3 class="boxed">Sorting</h3>
 <p>
       Solr provides a simple method to sort on one or more indexed fields.
-      Use the 'sort' parameter to specify "field direction" pairs...
+      Use the "<span class="codefrag">sort</span>' parameter to specify "field direction" pairs, separated by commas if there's more than one sort field:
     </p>
 <ul>
       
@@ -582,7 +582,7 @@ in subsequent searches.
     
 </ul>
 <p>
-      "score" can also be used as a field name when specifying a sort...
+      "<span class="codefrag">score</span>" can also be used as a field name when specifying a sort:
     </p>
 <ul>
       
@@ -596,7 +596,7 @@ in subsequent searches.
     
 </ul>
 <p>
-      Complex functions may also be used to sort results...
+      Complex functions may also be used to sort results:
     </p>
 <ul>
       
@@ -612,12 +612,12 @@ in subsequent searches.
 
 
 
-<a name="N101D4"></a><a name="Highlighting"></a>
+<a name="N101FD"></a><a name="Highlighting"></a>
 <h2 class="boxed">Highlighting</h2>
 <div class="section">
 <p>
     Hit highlighting returns relevent snippets of each returned document, and highlights
-    keywords from the query within those context snippets.
+    terms from the query within those context snippets.
   </p>
 <p>
     The following example searches for <span class="codefrag">video card</span> and requests
@@ -639,7 +639,7 @@ in subsequent searches.
 
 
 
-<a name="N101FD"></a><a name="Faceted+Search"></a>
+<a name="N10226"></a><a name="Faceted+Search"></a>
 <h2 class="boxed">Faceted Search</h2>
 <div class="section">
 <p>
@@ -698,7 +698,7 @@ in subsequent searches.
 
 
 
-<a name="N1024E"></a><a name="Search+UI"></a>
+<a name="N10277"></a><a name="Search+UI"></a>
 <h2 class="boxed">Search UI</h2>
 <div class="section">
 <p>
@@ -716,28 +716,44 @@ in subsequent searches.
 
 
 
-<a name="N10261"></a><a name="Text+Analysis"></a>
+<a name="N1028A"></a><a name="Text+Analysis"></a>
 <h2 class="boxed">Text Analysis</h2>
 <div class="section">
 <p>
-    Text fields are typically indexed by breaking the field into words and applying various transformations such as
+    Text fields are typically indexed by breaking the text into words and applying various transformations such as
     lowercasing, removing plurals, or stemming to increase relevancy.  The same text transformations are normally
     applied to any queries in order to match what is indexed.
   </p>
-<p>Example queries demonstrating relevancy improving transformations:</p>
+<p>
+    The <a href="http://wiki.apache.org/solr/SchemaXml">schema</a> defines
+    the fields in the index and what type of analysis is applied to them.  The current schema your server is using
+    may be accessed via the <span class="codefrag">[SCHEMA]</span> link on the <a href="http://localhost:8983/solr/admin/">admin</a> page.
+  </p>
+<p>
+    The best analysis components (tokenization and filtering) for your textual content depends heavily on language.
+    As you can see in the above <span class="codefrag">[SCHEMA]</span> link, the fields in the example schema are using a <span class="codefrag">fieldType</span>
+    named <span class="codefrag">text_general</span>, which has defaults appropriate for all languages.
+  </p>
+<p>
+    If you know your textual content is English, as is the case for the example documents in this tutorial,
+    and you'd like to apply English-specific stemming and stop word removal, as well as split compound words, you can use the <span class="codefrag">text_en_splitting</span> fieldType instead.
+    Go ahead and edit the <span class="codefrag">schema.xml</span> under the <span class="codefrag">solr/example/solr/conf</span> directory,
+    and change the <span class="codefrag">type</span> for fields <span class="codefrag">text</span> and <span class="codefrag">features</span> from <span class="codefrag">text_general</span> to <span class="codefrag">text_en_splitting</span>.
+    Restart the server and then re-post all of the documents, and then these queries will show the English-specific transformations:
+  </p>
 <ul>
     
 <li>A search for
        <a href="http://localhost:8983/solr/select/?indent=on&q=power-shot&fl=name">power-shot</a>
        matches <span class="codefrag">PowerShot</span>, and
       <a href="http://localhost:8983/solr/select/?indent=on&q=adata&fl=name">adata</a>
-      matches <span class="codefrag">A-DATA</span> due to the use of WordDelimiterFilter and LowerCaseFilter.
+      matches <span class="codefrag">A-DATA</span> due to the use of <span class="codefrag">WordDelimiterFilter</span> and <span class="codefrag">LowerCaseFilter</span>.
     </li>
 
     
 <li>A search for
       <a href="http://localhost:8983/solr/select/?indent=on&q=features:recharging&fl=name,features">features:recharging</a>
-       matches <span class="codefrag">Rechargeable</span> due to stemming with the EnglishPorterFilter.
+       matches <span class="codefrag">Rechargeable</span> due to stemming with the <span class="codefrag">EnglishPorterFilter</span>.
     </li>
 
     
@@ -745,20 +761,15 @@ in subsequent searches.
        <a href="http://localhost:8983/solr/select/?indent=on&q=%221 gigabyte%22&fl=name">"1 gigabyte"</a>
        matches things with <span class="codefrag">GB</span>, and the misspelled
       <a href="http://localhost:8983/solr/select/?indent=on&q=pixima&fl=name">pixima</a>
-       matches <span class="codefrag">Pixma</span> due to use of a SynonymFilter.
+       matches <span class="codefrag">Pixma</span> due to use of a <span class="codefrag">SynonymFilter</span>.
     </li>
 
   
 </ul>
-<p>
-    The <a href="http://wiki.apache.org/solr/SchemaXml">schema</a> defines
-    the fields in the index and what type of analysis is applied to them.  The current schema your server is using
-    may be accessed via the <span class="codefrag">[SCHEMA]</span> link on the <a href="http://localhost:8983/solr/admin/">admin</a> page.
-  </p>
 <p>A full description of the analysis components, Analyzers, Tokenizers, and TokenFilters
     available for use is <a href="http://wiki.apache.org/solr/AnalyzersTokenizersTokenFilters">here</a>.
   </p>
-<a name="N102B1"></a><a name="Analysis+Debugging"></a>
+<a name="N1030A"></a><a name="Analysis+Debugging"></a>
 <h3 class="boxed">Analysis Debugging</h3>
 <p>There is a handy <a href="http://localhost:8983/solr/admin/analysis.jsp">analysis</a>
       debugging page where you can see how a text value is broken down into words,
@@ -768,7 +779,7 @@ in subsequent searches.
       
 <a href="http://localhost:8983/solr/admin/analysis.jsp?name=name&val=Canon+Power-Shot+SD500">This</a>
       shows how "<span class="codefrag">Canon Power-Shot SD500</span>" would be indexed as a value in the name field.  Each row of
-      the table shows the resulting tokens after having passed through the next TokenFilter in the Analyzer for the <span class="codefrag">name</span> field.
+      the table shows the resulting tokens after having passed through the next <span class="codefrag">TokenFilter</span> in the analyzer for the <span class="codefrag">name</span> field.
       Notice how both <span class="codefrag">powershot</span> and <span class="codefrag">power</span>, <span class="codefrag">shot</span> are indexed.  Tokens generated at the same position
       are shown in the same column, in this case <span class="codefrag">shot</span> and <span class="codefrag">powershot</span>.
     </p>
@@ -787,12 +798,12 @@ in subsequent searches.
 </div>
 
 
-<a name="N102F0"></a><a name="Conclusion"></a>
+<a name="N1034C"></a><a name="Conclusion"></a>
 <h2 class="boxed">Conclusion</h2>
 <div class="section">
 <p>
   Congratulations!  You successfully ran a small Solr instance, added some
-  documents, and made changes to the index.  You learned about queries, text
+  documents, and made changes to the index and schema.  You learned about queries, text
   analysis, and the Solr admin interface.  You're ready to start using Solr on
   your own project!  Continue on with the following steps:
 </p>
@@ -800,22 +811,21 @@ in subsequent searches.
   
 <li>Subscribe to the Solr <a href="mailing_lists.html">mailing lists</a>!</li>
   
-<li>Make a copy of the Solr example directory as a template for your project.</li>
+<li>Make a copy of the Solr <span class="codefrag">example</span> directory as a template for your project.</li>
   
-<li>Customize the schema and other config in solr/conf/ to meet your needs.</li> 
+<li>Customize the schema and other config in <span class="codefrag">solr/conf/</span> to meet your needs.</li> 
 
 </ul>
 <p>
-  Solr as a ton of other features that we haven't touched on here, including
+  Solr has a ton of other features that we haven't touched on here, including
   <a href="http://wiki.apache.org/solr/DistributedSearch">distributed search</a>
   to handle huge document collections,
   <a href="http://wiki.apache.org/solr/FunctionQuery">function queries</a>,
   <a href="http://wiki.apache.org/solr/StatsComponent">numeric field statistics</a>,
   and
   <a href="http://wiki.apache.org/solr/ClusteringComponent">search results clustering</a>.
-  Explore the <a href="http://wiki.apache.org/solr/FrontPage">Solr Wiki</a> to find out
-  more details about Solr's many
-  <a href="features.html">features</a>.
+  Explore the <a href="http://wiki.apache.org/solr/FrontPage">Solr Wiki</a> to find
+  more details about Solr's many <a href="features.html">features</a>.
 </p>
 <p>
   Have Fun, and we'll see you on the Solr mailing lists!

Modified: lucene/dev/branches/solr2452/solr/site/tutorial.pdf
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/site/tutorial.pdf?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
Binary files - no diff available.

Modified: lucene/dev/branches/solr2452/solr/src/common/org/apache/solr/common/util/FileUtils.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/common/org/apache/solr/common/util/FileUtils.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/common/org/apache/solr/common/util/FileUtils.java (original)
+++ lucene/dev/branches/solr2452/solr/src/common/org/apache/solr/common/util/FileUtils.java Mon May 30 14:51:25 2011
@@ -54,7 +54,7 @@ public class FileUtils {
   }
 
   /**
-   * Copied from Lucene's FSDirectory.sync(String) <!-- protected -->
+   * Copied from Lucene's FSDirectory.fsync(String) <!-- protected -->
    *
    * @param fullFile the File to be synced to disk
    * @throws IOException if the file could not be synced

Modified: lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/IndexReaderFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/IndexReaderFactory.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/IndexReaderFactory.java (original)
+++ lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/IndexReaderFactory.java Mon May 30 14:51:25 2011
@@ -19,6 +19,7 @@ package org.apache.solr.core;
 import java.io.IOException;
 
 import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.store.Directory;
 import org.apache.solr.common.util.NamedList;
 import org.apache.solr.util.plugin.NamedListInitializedPlugin;
@@ -28,6 +29,7 @@ import org.apache.solr.util.plugin.Named
  */
 public abstract class IndexReaderFactory implements NamedListInitializedPlugin {
   protected int termInfosIndexDivisor = 1;//IndexReader.DEFAULT_TERMS_INDEX_DIVISOR;  Set this once Lucene makes this public.
+  protected CodecProvider provider;
   /**
    * Potentially initializes {@link #termInfosIndexDivisor}.  Overriding classes should call super.init() in order
    * to make sure termInfosIndexDivisor is set.
@@ -63,4 +65,11 @@ public abstract class IndexReaderFactory
    */
   public abstract IndexReader newReader(Directory indexDir, boolean readOnly)
       throws IOException;
+  
+  /**
+   * Sets the codec provider for this IndexReaderFactory
+   */
+  public void setCodecProvider(CodecProvider provider) {
+    this.provider = provider;
+  }
 }

Modified: lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/SolrConfig.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/SolrConfig.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/SolrConfig.java (original)
+++ lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/SolrConfig.java Mon May 30 14:51:25 2011
@@ -39,6 +39,8 @@ import org.apache.solr.spelling.QueryCon
 import org.apache.solr.highlight.SolrHighlighter;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.index.IndexDeletionPolicy;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.util.Version;
 
 import org.slf4j.Logger;
@@ -202,6 +204,7 @@ public class SolrConfig extends Config {
 
      loadPluginInfo(DirectoryFactory.class,"directoryFactory",false, true);
      loadPluginInfo(IndexDeletionPolicy.class,"mainIndex/deletionPolicy",false, true);
+     loadPluginInfo(CodecProviderFactory.class,"mainIndex/codecProviderFactory",false, false);
      loadPluginInfo(IndexReaderFactory.class,"indexReaderFactory",false, true);
      loadPluginInfo(UpdateRequestProcessorChain.class,"updateRequestProcessorChain",false, false);
 

Modified: lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/SolrCore.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/SolrCore.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/SolrCore.java (original)
+++ lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/SolrCore.java Mon May 30 14:51:25 2011
@@ -20,6 +20,8 @@ package org.apache.solr.core;
 import org.apache.lucene.index.IndexDeletionPolicy;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.codecs.Codec;
+import org.apache.lucene.index.codecs.CodecProvider;
 import org.apache.lucene.search.BooleanQuery;
 import org.apache.lucene.store.Directory;
 import org.apache.solr.common.SolrException;
@@ -93,6 +95,7 @@ public final class SolrCore implements S
   private IndexDeletionPolicyWrapper solrDelPolicy;
   private DirectoryFactory directoryFactory;
   private IndexReaderFactory indexReaderFactory;
+  private final CodecProvider codecProvider;
 
   public long getStartTime() { return startTime; }
 
@@ -330,6 +333,7 @@ public final class SolrCore implements S
       indexReaderFactory = new StandardIndexReaderFactory();
     } 
     this.indexReaderFactory = indexReaderFactory;
+    this.indexReaderFactory.setCodecProvider(codecProvider);
   }
   
   // protect via synchronized(SolrCore.class)
@@ -366,7 +370,7 @@ public final class SolrCore implements S
         log.warn(logid+"Solr index directory '" + new File(indexDir) + "' doesn't exist."
                 + " Creating new index...");
 
-        SolrIndexWriter writer = new SolrIndexWriter("SolrCore.initIndex", indexDir, getDirectoryFactory(), true, schema, solrConfig.mainIndexConfig, solrDelPolicy);
+        SolrIndexWriter writer = new SolrIndexWriter("SolrCore.initIndex", indexDir, getDirectoryFactory(), true, schema, solrConfig.mainIndexConfig, solrDelPolicy, codecProvider);
         writer.close();
       }
 
@@ -493,6 +497,7 @@ public final class SolrCore implements S
 
     initDeletionPolicy();
 
+    this.codecProvider = initCodecProvider(solrConfig, schema);
     initIndex();
 
     initWriters();
@@ -555,6 +560,19 @@ public final class SolrCore implements S
     resourceLoader.inform(infoRegistry);
   }
 
+  private CodecProvider initCodecProvider(SolrConfig solrConfig, IndexSchema schema) {
+    final PluginInfo info = solrConfig.getPluginInfo(CodecProviderFactory.class.getName());
+    CodecProvider cp;
+    if (info != null) {
+      CodecProviderFactory factory = (CodecProviderFactory) schema.getResourceLoader().newInstance(info.className);
+      factory.init(info.initArgs);
+      cp = factory.create();
+    } else {
+      // make sure we use the default if nothing is configured
+      cp = CodecProvider.getDefault();
+    }
+    return new SchemaCodecProvider(schema, cp);
+  }
 
   /**
    * Load the request processors
@@ -1618,6 +1636,10 @@ public final class SolrCore implements S
     }
     return lst;
   }
+  
+  public CodecProvider getCodecProvider() {
+    return codecProvider;
+  }
 
 }
 

Modified: lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/StandardIndexReaderFactory.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/StandardIndexReaderFactory.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/StandardIndexReaderFactory.java (original)
+++ lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/core/StandardIndexReaderFactory.java Mon May 30 14:51:25 2011
@@ -35,6 +35,6 @@ public class StandardIndexReaderFactory 
   @Override
   public IndexReader newReader(Directory indexDir, boolean readOnly)
       throws IOException {
-    return IndexReader.open(indexDir, null, readOnly, termInfosIndexDivisor);
+    return IndexReader.open(indexDir, null, readOnly, termInfosIndexDivisor, provider);
   }
 }

Modified: lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java (original)
+++ lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/AnalysisRequestHandlerBase.java Mon May 30 14:51:25 2011
@@ -27,6 +27,7 @@ import org.apache.lucene.index.Payload;
 import org.apache.lucene.util.Attribute;
 import org.apache.lucene.util.AttributeSource;
 import org.apache.lucene.util.AttributeReflector;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.SorterTemplate;
 import org.apache.solr.analysis.CharFilterFactory;
 import org.apache.solr.analysis.TokenFilterFactory;
@@ -39,8 +40,6 @@ import org.apache.solr.request.SolrQuery
 import org.apache.solr.response.SolrQueryResponse;
 import org.apache.solr.schema.FieldType;
 
-import org.apache.noggit.CharArr;
-
 import java.io.IOException;
 import java.io.StringReader;
 import java.util.*;
@@ -235,18 +234,13 @@ public abstract class AnalysisRequestHan
 
     FieldType fieldType = context.getFieldType();
 
-    final CharArr textBuf = new CharArr();
     for (int i = 0, c = tokens.size(); i < c; i++) {
       AttributeSource token = tokens.get(i);
       final NamedList<Object> tokenNamedList = new SimpleOrderedMap<Object>();
       final TermToBytesRefAttribute termAtt = token.getAttribute(TermToBytesRefAttribute.class);
       BytesRef rawBytes = termAtt.getBytesRef();
       termAtt.fillBytesRef();
-
-      textBuf.reset();
-      fieldType.indexedToReadable(rawBytes, textBuf);
-      final String text = textBuf.toString();
-
+      final String text = fieldType.indexedToReadable(rawBytes, new CharsRef(rawBytes.length)).toString();
       tokenNamedList.add("text", text);
       
       if (token.hasAttribute(CharTermAttribute.class)) {

Modified: lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java (original)
+++ lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/admin/LukeRequestHandler.java Mon May 30 14:51:25 2011
@@ -46,6 +46,7 @@ import org.apache.lucene.search.Query;
 import org.apache.lucene.search.TermRangeQuery;
 import org.apache.lucene.search.TopDocs;
 import org.apache.lucene.store.Directory;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.PriorityQueue;
 import org.apache.lucene.util.BytesRef;
 import org.apache.solr.analysis.CharFilterFactory;
@@ -232,6 +233,7 @@ public class LukeRequestHandler extends 
   
   private static SimpleOrderedMap<Object> getDocumentFieldsInfo( Document doc, int docId, IndexReader reader, IndexSchema schema ) throws IOException
   { 
+    final CharsRef spare = new CharsRef();
     SimpleOrderedMap<Object> finfo = new SimpleOrderedMap<Object>();
     for( Object o : doc.getFields() ) {
       Fieldable fieldable = (Fieldable)o;
@@ -265,7 +267,7 @@ public class LukeRequestHandler extends 
           if( v != null ) {
             SimpleOrderedMap<Integer> tfv = new SimpleOrderedMap<Integer>();
             for( int i=0; i<v.size(); i++ ) {
-              tfv.add( v.getTerms()[i].utf8ToString(), v.getTermFrequencies()[i] );
+              tfv.add( v.getTerms()[i].utf8ToChars(spare).toString(), v.getTermFrequencies()[i] );
             }
             f.add( "termVector", tfv );
           }
@@ -624,7 +626,7 @@ public class LukeRequestHandler extends 
   private static Map<String,TopTermQueue> getTopTerms( IndexReader reader, Set<String> fields, int numTerms, Set<String> junkWords ) throws Exception 
   {
     Map<String,TopTermQueue> info = new HashMap<String, TopTermQueue>();
-
+    final CharsRef spare = new CharsRef();
     Fields fieldsC = MultiFields.getFields(reader);
     if (fieldsC != null) {
       FieldsEnum fieldsEnum = fieldsC.iterator();
@@ -634,7 +636,7 @@ public class LukeRequestHandler extends 
         TermsEnum termsEnum = fieldsEnum.terms();
         BytesRef text;
         while((text = termsEnum.next()) != null) {
-          String t = text.utf8ToString();
+          String t = text.utf8ToChars(spare).toString();
   
           // Compute distinct terms for every field
           TopTermQueue tiq = info.get( field );

Modified: lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/component/QueryComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/component/QueryComponent.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/component/QueryComponent.java (original)
+++ lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/component/QueryComponent.java Mon May 30 14:51:25 2011
@@ -24,6 +24,7 @@ import org.apache.lucene.index.Term;
 import org.apache.lucene.queryParser.ParseException;
 import org.apache.lucene.search.*;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
 import org.apache.lucene.util.ReaderUtil;
 import org.apache.solr.cloud.CloudDescriptor;
 import org.apache.solr.cloud.ZkController;
@@ -455,7 +456,7 @@ public class QueryComponent extends Sear
   {
     SolrQueryRequest req = rb.req;
     SolrQueryResponse rsp = rb.rsp;
-
+    final CharsRef spare = new CharsRef();
     // The query cache doesn't currently store sort field values, and SolrIndexSearcher doesn't
     // currently have an option to return sort field values.  Because of this, we
     // take the documents given and re-derive the sort values.
@@ -524,7 +525,7 @@ public class QueryComponent extends Sear
           // String field in Lucene, which returns the terms
           // data as BytesRef:
           if (val instanceof BytesRef) {
-            field.setValue(((BytesRef)val).utf8ToString());
+            field.setValue(((BytesRef)val).utf8ToChars(spare).toString());
             val = ft.toObject(field);
           }
 

Modified: lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/component/StatsComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/component/StatsComponent.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/component/StatsComponent.java (original)
+++ lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/component/StatsComponent.java Mon May 30 14:51:25 2011
@@ -23,6 +23,7 @@ import java.util.Map;
 
 import org.apache.lucene.search.FieldCache;
 import org.apache.lucene.util.BytesRef;
+import org.apache.lucene.util.CharsRef;
 import org.apache.noggit.CharArr;
 import org.apache.solr.common.params.SolrParams;
 import org.apache.solr.common.params.StatsParams;
@@ -270,19 +271,15 @@ class SimpleStats {
       }
       finfo[i++] = new FieldFacetStats( f, si, ft, 0 );
     }
-
+    final CharsRef spare = new CharsRef();
     final BytesRef tempBR = new BytesRef();
-    final CharArr spare = new CharArr();
-
     DocIterator iter = docs.iterator();
     while (iter.hasNext()) {
       int docID = iter.nextDoc();
       BytesRef raw = all.getTermText(docID, tempBR);
       Double v = null;
       if( raw != null ) {
-        spare.reset();
-        all.ft.indexedToReadable(raw, spare);
-        v = Double.parseDouble(spare.toString());
+        v = Double.parseDouble(all.ft.indexedToReadable(raw, spare).toString());
         allstats.accumulate(v);
       }
       else {

Modified: lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/component/TermsComponent.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/component/TermsComponent.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/component/TermsComponent.java (original)
+++ lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/handler/component/TermsComponent.java Mon May 30 14:51:25 2011
@@ -18,7 +18,7 @@ package org.apache.solr.handler.componen
 
 import org.apache.lucene.index.*;
 import org.apache.lucene.util.BytesRef;
-import org.apache.noggit.CharArr;
+import org.apache.lucene.util.CharsRef;
 import org.apache.solr.common.SolrException;
 import org.apache.solr.common.params.*;
 import org.apache.solr.common.util.NamedList;
@@ -178,8 +178,7 @@ public class TermsComponent extends Sear
 
       int i = 0;
       BoundedTreeSet<CountPair<BytesRef, Integer>> queue = (sort ? new BoundedTreeSet<CountPair<BytesRef, Integer>>(limit) : null);
-      CharArr external = new CharArr();
-
+      CharsRef external = new CharsRef();
       while (term != null && (i<limit || sort)) {
         boolean externalized = false; // did we fill in "external" yet for this term?
 
@@ -189,8 +188,8 @@ public class TermsComponent extends Sear
         if (pattern != null) {
           // indexed text or external text?
           // TODO: support "raw" mode?
-          external.reset();
           ft.indexedToReadable(term, external);
+          externalized = true;
           if (!pattern.matcher(external).matches()) {
             term = termsEnum.next();
             continue;
@@ -213,13 +212,9 @@ public class TermsComponent extends Sear
 
             // TODO: handle raw somehow
             if (!externalized) {
-              external.reset();
               ft.indexedToReadable(term, external);
             }
-            String label = external.toString();
-            
-
-            fieldTerms.add(label, docFreq);
+            fieldTerms.add(external.toString(), docFreq);
             i++;
           }
         }
@@ -230,7 +225,6 @@ public class TermsComponent extends Sear
       if (sort) {
         for (CountPair<BytesRef, Integer> item : queue) {
           if (i >= limit) break;
-          external.reset();
           ft.indexedToReadable(item.key, external);          
           fieldTerms.add(external.toString(), item.val);
           i++;

Modified: lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java?rev=1129205&r1=1129204&r2=1129205&view=diff
==============================================================================
--- lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java (original)
+++ lucene/dev/branches/solr2452/solr/src/java/org/apache/solr/highlight/DefaultSolrHighlighter.java Mon May 30 14:51:25 2011
@@ -284,7 +284,7 @@ public class DefaultSolrHighlighter exte
   /**
    * Return a {@link org.apache.lucene.search.highlight.Fragmenter} appropriate for this field. If a fragmenter
    * has not been configured for this field, fall back to the configured
-   * default or the solr default ({@link org.apache.lucene.search.highlight.GapFragmenter}).
+   * default or the solr default ({@link GapFragmenter}).
    * 
    * @param fieldName The name of the field
    * @param params The params controlling Highlighting