You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2010/01/14 20:05:42 UTC

svn commit: r899359 [3/7] - in /lucene/java/branches/flex_1458: ./ contrib/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/ contrib/analyzers/common/src/java/org/apache/lucene/analysis/bg/ contrib/analyzers/common/src/java/org/apache/l...

Modified: lucene/java/branches/flex_1458/contrib/analyzers/smartcn/build.xml
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/smartcn/build.xml?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/smartcn/build.xml (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/smartcn/build.xml Thu Jan 14 19:05:12 2010
@@ -1,38 +1,38 @@
-<?xml version="1.0"?>
-
-<!--
-    Licensed to the Apache Software Foundation (ASF) under one or more
-    contributor license agreements.  See the NOTICE file distributed with
-    this work for additional information regarding copyright ownership.
-    The ASF licenses this file to You under the Apache License, Version 2.0
-    the "License"); you may not use this file except in compliance with
-    the License.  You may obtain a copy of the License at
- 
-        http://www.apache.org/licenses/LICENSE-2.0
- 
-    Unless required by applicable law or agreed to in writing, software
-    distributed under the License is distributed on an "AS IS" BASIS,
-    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-    See the License for the specific language governing permissions and
-    limitations under the License.
- -->
-
-<project name="smartcn" default="default">
-
-  <description>
-    Smart Chinese Analyzer
-  </description>
-	
-  <property name="build.dir" location="../../../build/contrib/analyzers/smartcn" />
-  <property name="dist.dir" location="../../../dist/contrib/analyzers/smartcn" />
-  <property name="maven.dist.dir" location="../../../dist/maven" />
-
-  <import file="../../contrib-build.xml"/>
-	
-  <path id="test.classpath">
-    <path refid="classpath"/>
-    <pathelement location="../../../build/classes/test/"/>
-    <path refid="junit-path"/>
-    <pathelement location="${build.dir}/classes/java"/>
-  </path>	
-</project>
+<?xml version="1.0"?>
+
+<!--
+    Licensed to the Apache Software Foundation (ASF) under one or more
+    contributor license agreements.  See the NOTICE file distributed with
+    this work for additional information regarding copyright ownership.
+    The ASF licenses this file to You under the Apache License, Version 2.0
+    the "License"); you may not use this file except in compliance with
+    the License.  You may obtain a copy of the License at
+ 
+        http://www.apache.org/licenses/LICENSE-2.0
+ 
+    Unless required by applicable law or agreed to in writing, software
+    distributed under the License is distributed on an "AS IS" BASIS,
+    WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+    See the License for the specific language governing permissions and
+    limitations under the License.
+ -->
+
+<project name="smartcn" default="default">
+
+  <description>
+    Smart Chinese Analyzer
+  </description>
+	
+  <property name="build.dir" location="../../../build/contrib/analyzers/smartcn" />
+  <property name="dist.dir" location="../../../dist/contrib/analyzers/smartcn" />
+  <property name="maven.dist.dir" location="../../../dist/maven" />
+
+  <import file="../../contrib-build.xml"/>
+	
+  <path id="test.classpath">
+    <path refid="classpath"/>
+    <pathelement location="../../../build/classes/test/"/>
+    <path refid="junit-path"/>
+    <pathelement location="${build.dir}/classes/java"/>
+  </path>	
+</project>

Propchange: lucene/java/branches/flex_1458/contrib/analyzers/smartcn/build.xml
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/contrib/analyzers/smartcn/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/smartcn/pom.xml.template?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/smartcn/pom.xml.template (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/smartcn/pom.xml.template Thu Jan 14 19:05:12 2010
@@ -1,35 +1,35 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0"
-  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
-  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-
-  <!--
-    Licensed to the Apache Software Foundation (ASF) under one
-    or more contributor license agreements.  See the NOTICE file
-    distributed with this work for additional information
-    regarding copyright ownership.  The ASF licenses this file
-    to you under the Apache License, Version 2.0 (the
-    "License"); you may not use this file except in compliance
-    with the License.  You may obtain a copy of the License at
-    
-    http://www.apache.org/licenses/LICENSE-2.0
-    
-    Unless required by applicable law or agreed to in writing,
-    software distributed under the License is distributed on an
-    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-    KIND, either express or implied.  See the License for the
-    specific language governing permissions and limitations
-    under the License.
-  -->
-  <modelVersion>4.0.0</modelVersion>
-  <parent>
-    <groupId>org.apache.lucene</groupId>
-    <artifactId>lucene-contrib</artifactId>
-    <version>@version@</version>
-  </parent>
-  <groupId>org.apache.lucene</groupId>
-  <artifactId>lucene-smartcn</artifactId>
-  <name>Lucene Smart Chinese Analyzer</name>
-  <version>@version@</version>
-  <description>Smart Chinese Analyzer</description>
-  <packaging>jar</packaging>
-</project>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+  xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+  <!--
+    Licensed to the Apache Software Foundation (ASF) under one
+    or more contributor license agreements.  See the NOTICE file
+    distributed with this work for additional information
+    regarding copyright ownership.  The ASF licenses this file
+    to you under the Apache License, Version 2.0 (the
+    "License"); you may not use this file except in compliance
+    with the License.  You may obtain a copy of the License at
+    
+    http://www.apache.org/licenses/LICENSE-2.0
+    
+    Unless required by applicable law or agreed to in writing,
+    software distributed under the License is distributed on an
+    "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+    KIND, either express or implied.  See the License for the
+    specific language governing permissions and limitations
+    under the License.
+  -->
+  <modelVersion>4.0.0</modelVersion>
+  <parent>
+    <groupId>org.apache.lucene</groupId>
+    <artifactId>lucene-contrib</artifactId>
+    <version>@version@</version>
+  </parent>
+  <groupId>org.apache.lucene</groupId>
+  <artifactId>lucene-smartcn</artifactId>
+  <name>Lucene Smart Chinese Analyzer</name>
+  <version>@version@</version>
+  <description>Smart Chinese Analyzer</description>
+  <packaging>jar</packaging>
+</project>

Propchange: lucene/java/branches/flex_1458/contrib/analyzers/smartcn/pom.xml.template
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/branches/flex_1458/contrib/analyzers/smartcn/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/branches/flex_1458/contrib/ant/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/CHANGES.txt?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/CHANGES.txt Thu Jan 14 19:05:12 2010
@@ -4,6 +4,25 @@
 
 $Id:$
 
+1/11/2010
+  LUCENE-2181: Add a benchmark for collation. This adds NewLocaleTask,
+  which sets a Locale in the run data for collation to use, and can be
+  used in the future for benchmarking localized range queries and sorts.
+  Also add NewCollationAnalyzerTask, which works with both JDK and ICU
+  Collator implementations. Fix ReadTokensTask to not tokenize fields
+  unless they should be tokenized according to DocMaker config. The 
+  easiest way to run the benchmark is to run 'ant collation'
+  (Steven Rowe via Robert Muir)
+
+12/22/2009
+  LUCENE-2178: Allow multiple locations to add to the class path with
+  -Dbenchmark.ext.classpath=... when running "ant run-task" (Steven
+  Rowe via Mike McCandless)
+
+12/17/2009
+  LUCENE-2168: Allow negative relative thread priority for BG tasks
+  (Mike McCandless)
+
 12/07/2009
   LUCENE-2106: ReadTask does not close its Reader when 
   OpenReader/CloseReader are not used. (Mark Miller)

Modified: lucene/java/branches/flex_1458/contrib/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/build.xml?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/build.xml (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/build.xml Thu Jan 14 19:05:12 2010
@@ -24,7 +24,10 @@
         <available file="temp/enwiki-20070527-pages-articles.xml.bz2" property="enwiki.exists"/>
         <available file="temp/enwiki-20070527-pages-articles.xml" property="enwiki.expanded"/>
         <available file="${working.dir}/enwiki.txt" property="enwiki.extracted"/>
-
+    	<available file="temp/${top.100k.words.archive.filename}"
+                   property="top.100k.words.archive.present"/>
+    	<available file="${working.dir}/top100k-out" 
+                   property="top.100k.word.files.expanded"/>
     </target>
 
     <target name="enwiki-files" depends="check-files">
@@ -94,6 +97,27 @@
         <untar src="temp/mini_newsgroups.tar" dest="${working.dir}"/>
     </target>
 
+	<property name="top.100k.words.archive.filename" 
+	          value="top.100k.words.de.en.fr.uk.wikipedia.2009-11.tar.bz2"/>
+	<property name="top.100k.words.archive.base.url"
+	          value="http://people.apache.org/~rmuir/wikipedia"/>
+	<target name="get-top-100k-words-archive" unless="top.100k.words.archive.present">
+		<mkdir dir="temp"/>
+	    <get src="${top.100k.words.archive.base.url}/${top.100k.words.archive.filename}"
+	         dest="temp/${top.100k.words.archive.filename}"/>
+	</target>
+	<target name="expand-top-100k-word-files" unless="top.100k.word.files.expanded">
+		<mkdir dir="${working.dir}/top100k-out"/>
+	    <untar src="temp/${top.100k.words.archive.filename}"
+	           overwrite="true" compression="bzip2" dest="${working.dir}/top100k-out"/>
+	</target>
+	
+	<target name="top-100k-wiki-word-files" depends="check-files">
+	  <mkdir dir="${working.dir}"/>
+	  <antcall target="get-top-100k-words-archive"/>
+	  <antcall target="expand-top-100k-word-files"/>
+	</target>
+	
     <target name="get-files" depends="check-files">
         <mkdir dir="temp"/>
         <antcall target="get-reuters"/>
@@ -104,6 +128,7 @@
     <path id="classpath">
         <pathelement path="${common.dir}/build/classes/java"/>
         <pathelement path="${common.dir}/build/classes/demo"/>
+      <pathelement path="${common.dir}/build/classes/test"/>
         <pathelement path="${common.dir}/build/contrib/highlighter/classes/java"/>
         <pathelement path="${common.dir}/build/contrib/memory/classes/java"/>
         <pathelement path="${common.dir}/build/contrib/fast-vector-highlighter/classes/java"/>
@@ -114,13 +139,13 @@
     <path id="run.classpath">
         <path refid="classpath"/>
         <pathelement location="${build.dir}/classes/java"/>
-        <pathelement location="${benchmark.ext.classpath}"/>
+        <pathelement path="${benchmark.ext.classpath}"/>
     </path>
 
     <property name="task.alg" location="conf/micro-standard.alg"/>
     <property name="task.mem" value="140M"/>
 
-    <target name="run-task" depends="compile,check-files,get-files" 
+    <target name="run-task" depends="compile-test,check-files,get-files" 
      description="Run compound penalty perf test (optional: -Dtask.alg=your-algorithm-file -Dtask.mem=java-max-mem)">
         <echo>Working Directory: ${working.dir}</echo>
         <java classname="org.apache.lucene.benchmark.byTask.Benchmark" maxmemory="${task.mem}" fork="true">
@@ -140,6 +165,34 @@
         </java>
     </target>
 
+	<property name="collation.alg.file" location="conf/collation.alg"/>
+	<property name="collation.output.file" 
+	          value="${working.dir}/collation.benchmark.output.txt"/>
+	<property name="collation.jira.output.file" 
+	          value="${working.dir}/collation.bm2jira.output.txt"/>
+	
+	<path id="collation.runtime.classpath">
+	  <path refid="run.classpath"/>
+	  <pathelement path="${common.dir}/build/contrib/icu/classes/java"/>
+      <fileset dir="${common.dir}/contrib/icu/lib" includes="icu4j*.jar"/>
+	</path>
+	
+	<target name="collation" depends="compile,compile-icu,top-100k-wiki-word-files">
+	    <echo>Running contrib/benchmark with alg file: ${collation.alg.file}</echo>
+	    <java fork="true" classname="org.apache.lucene.benchmark.byTask.Benchmark" 
+	          maxmemory="${task.mem}" output="${collation.output.file}">
+	      <classpath refid="collation.runtime.classpath"/>
+	      <arg file="${collation.alg.file}"/>
+	    </java>
+	    <echo>Benchmark output is in file: ${collation.output.file}</echo>
+	    <echo>Converting to JIRA table format...</echo>
+	    <exec executable="perl" output="${collation.jira.output.file}" failonerror="true">
+	      <arg value="scripts/collation.bm2jira.pl"/>
+	      <arg value="${collation.output.file}"/>
+	    </exec>
+	    <echo>Benchmark output in JIRA table format is in file: ${collation.jira.output.file}</echo>
+	</target>
+	
     <target name="compile-demo">
       <subant target="compile-demo">
          <fileset dir="${common.dir}" includes="build.xml"/>
@@ -150,6 +203,11 @@
          <fileset dir="${common.dir}/contrib/highlighter" includes="build.xml"/>
       </subant>
     </target>
+    <target name="compile-icu">
+      <subant target="compile">
+         <fileset dir="${common.dir}/contrib/icu" includes="build.xml"/>
+      </subant>
+    </target>
     <target name="compile-memory">
       <subant target="compile">
          <fileset dir="${common.dir}/contrib/memory" includes="build.xml"/>

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java Thu Jan 14 19:05:12 2010
@@ -20,6 +20,7 @@
 import java.io.File;
 import java.io.IOException;
 import java.util.HashMap;
+import java.util.Locale;
 
 import org.apache.lucene.analysis.Analyzer;
 import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
@@ -61,6 +62,7 @@
   private Directory directory;
   private Analyzer analyzer;
   private DocMaker docMaker;
+  private Locale locale;
   
   // we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
   private HashMap<Class<? extends ReadTask>,QueryMaker> readTaskQueryMaker;
@@ -245,6 +247,20 @@
   }
 
   /**
+   * @return the locale
+   */
+  public Locale getLocale() {
+    return locale;
+  }
+
+  /**
+   * @param locale the locale to set
+   */
+  public void setLocale(Locale locale) {
+    this.locale = locale;
+  }
+
+  /**
    * @return Returns the config.
    */
   public Config getConfig() {

Propchange: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java Thu Jan 14 19:05:12 2010
@@ -22,6 +22,7 @@
 import org.apache.lucene.index.IndexDeletionPolicy;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.MergeScheduler;
+import org.apache.lucene.index.ConcurrentMergeScheduler;
 import org.apache.lucene.index.MergePolicy;
 
 import java.io.BufferedOutputStream;
@@ -33,9 +34,15 @@
 /**
  * Create an index. <br>
  * Other side effects: index writer object in perfRunData is set. <br>
- * Relevant properties: <code>merge.factor, max.buffered,
- *  max.field.length, ram.flush.mb [default 0],
- *  [default true]</code>.
+ * Relevant properties: <code>merge.factor (default 10),
+ * max.buffered (default no flush), max.field.length (default
+ * 10,000 tokens), max.field.length, compound (default true), ram.flush.mb [default 0],
+ * merge.policy (default org.apache.lucene.index.LogByteSizeMergePolicy),
+ * merge.scheduler (default
+ * org.apache.lucene.index.ConcurrentMergeScheduler),
+ * concurrent.merge.scheduler.max.thread.count and
+ * concurrent.merge.scheduler.max.merge.count (defaults per
+ * ConcurrentMergeScheduler) </code>.
  * <p>
  * This task also supports a "writer.info.stream" property with the following
  * values:
@@ -66,6 +73,18 @@
       throw new RuntimeException("unable to instantiate class '" + mergeScheduler + "' as merge scheduler", e);
     }
 
+    if (mergeScheduler.equals("org.apache.lucene.index.ConcurrentMergeScheduler")) {
+      ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) writer.getMergeScheduler();
+      int v = config.get("concurrent.merge.scheduler.max.thread.count", -1);
+      if (v != -1) {
+        cms.setMaxThreadCount(v);
+      }
+      v = config.get("concurrent.merge.scheduler.max.merge.count", -1);
+      if (v != -1) {
+        cms.setMaxMergeCount(v);
+      }
+    }
+
     final String mergePolicy = config.get("merge.policy",
                                           "org.apache.lucene.index.LogByteSizeMergePolicy");
     try {

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java Thu Jan 14 19:05:12 2010
@@ -67,6 +67,8 @@
     Analyzer analyzer = getRunData().getAnalyzer();
     int tokenCount = 0;
     for(final Fieldable field : fields) {
+      if (!field.isTokenized()) continue;
+      
       final TokenStream stream;
       final TokenStream streamValue = field.tokenStreamValue();
 

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java Thu Jan 14 19:05:12 2010
@@ -156,6 +156,7 @@
       return count;
     }
 
+    @Override
     public void run() {
       try {
         count = task.runAndMaybeStats(letChildReport);
@@ -188,7 +189,7 @@
             bgTasks = new ArrayList<RunBackgroundTask>();
           }
           RunBackgroundTask bgTask = new RunBackgroundTask(task, letChildReport);
-          bgTask.setPriority(getBackgroundDeltaPriority() + Thread.currentThread().getPriority());
+          bgTask.setPriority(task.getBackgroundDeltaPriority() + Thread.currentThread().getPriority());
           bgTask.start();
           bgTasks.add(bgTask);
         } else {

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java Thu Jan 14 19:05:12 2010
@@ -51,7 +51,6 @@
     stok.ordinaryChar('/');
     stok.ordinaryChar('(');
     stok.ordinaryChar(')');
-    stok.ordinaryChar('-');
     boolean colonOk = false; 
     boolean isDisableCountNextTask = false; // only for primitive tasks
     currSequence.setDepth(0);

Propchange: lucene/java/branches/flex_1458/contrib/benchmark/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Thu Jan 14 19:05:12 2010
@@ -21,13 +21,20 @@
 import java.io.File;
 import java.io.FileReader;
 import java.io.BufferedReader;
+import java.text.Collator;
 import java.util.List;
 import java.util.Iterator;
+import java.util.Locale;
 
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
 import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
 import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
 import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask;
 import org.apache.lucene.benchmark.byTask.stats.TaskStats;
+import org.apache.lucene.collation.CollationKeyAnalyzer;
 import org.apache.lucene.index.IndexReader;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.TermsEnum;
@@ -120,6 +127,8 @@
     assertTrue("elapsed time was " + elapsed + " msec", elapsed <= 1500);
   }
 
+  // disabled until we fix BG thread prio -- this test
+  // causes build to hang
   public void testBGSearchTaskThreads() throws Exception {
     String algLines[] = {
         "log.time.step.msec = 100",
@@ -853,6 +862,119 @@
     };
   }
 
+  /**
+   * Test that we can change the Locale in the runData,
+   * that it is parsed as we expect.
+   */
+  public void testLocale() throws Exception {
+    // empty Locale: clear it (null)
+    Benchmark benchmark = execBenchmark(getLocaleConfig(""));
+    assertNull(benchmark.getRunData().getLocale());
+
+    // ROOT locale
+    benchmark = execBenchmark(getLocaleConfig("ROOT"));
+    assertEquals(new Locale(""), benchmark.getRunData().getLocale());
+    
+    // specify just a language 
+    benchmark = execBenchmark(getLocaleConfig("de"));
+    assertEquals(new Locale("de"), benchmark.getRunData().getLocale());
+    
+    // specify language + country
+    benchmark = execBenchmark(getLocaleConfig("en,US"));
+    assertEquals(new Locale("en", "US"), benchmark.getRunData().getLocale());
+    
+    // specify language + country + variant
+    benchmark = execBenchmark(getLocaleConfig("no,NO,NY"));
+    assertEquals(new Locale("no", "NO", "NY"), benchmark.getRunData().getLocale());
+  }
+   
+  private static String[] getLocaleConfig(String localeParam) {
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=3",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "# ----- alg ",
+        "{ \"Rounds\"",
+        "  ResetSystemErase",
+        "  NewLocale(" + localeParam + ")",
+        "  CreateIndex",
+        "  { \"AddDocs\"  AddDoc > : * ",
+        "  NewRound",
+        "} : 1",
+    };
+    return algLines;
+  }
+  
+  /**
+   * Test that we can create CollationAnalyzers.
+   */
+  public void testCollator() throws Exception {
+    // ROOT locale
+    Benchmark benchmark = execBenchmark(getCollatorConfig("ROOT", "impl:jdk"));
+    CollationKeyAnalyzer expected = new CollationKeyAnalyzer(Collator
+        .getInstance(new Locale("")));
+    assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
+    
+    // specify just a language
+    benchmark = execBenchmark(getCollatorConfig("de", "impl:jdk"));
+    expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("de")));
+    assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
+    
+    // specify language + country
+    benchmark = execBenchmark(getCollatorConfig("en,US", "impl:jdk"));
+    expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("en",
+        "US")));
+    assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
+    
+    // specify language + country + variant
+    benchmark = execBenchmark(getCollatorConfig("no,NO,NY", "impl:jdk"));
+    expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("no",
+        "NO", "NY")));
+    assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
+  }
+  
+  private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
+      throws Exception {
+    TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text));
+    TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text));
+    ts1.reset();
+    ts2.reset();
+    TermAttribute termAtt1 = ts1.addAttribute(TermAttribute.class);
+    TermAttribute termAtt2 = ts2.addAttribute(TermAttribute.class);
+    assertTrue(ts1.incrementToken());
+    assertTrue(ts2.incrementToken());
+    assertEquals(termAtt1.term(), termAtt2.term());
+    assertFalse(ts1.incrementToken());
+    assertFalse(ts2.incrementToken());
+    ts1.close();
+    ts2.close();
+  }
+  
+  private static String[] getCollatorConfig(String localeParam, 
+      String collationParam) {
+    String algLines[] = {
+        "# ----- properties ",
+        "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+        "docs.file=" + getReuters20LinesFile(),
+        "content.source.log.step=3",
+        "content.source.forever=false",
+        "directory=RAMDirectory",
+        "# ----- alg ",
+        "{ \"Rounds\"",
+        "  ResetSystemErase",
+        "  NewLocale(" + localeParam + ")",
+        "  NewCollationAnalyzer(" + collationParam + ")",
+        "  CreateIndex",
+        "  { \"AddDocs\"  AddDoc > : * ",
+        "  NewRound",
+        "} : 1",
+    };
+    return algLines;
+  }
+  
   private static String getReuters20LinesFile() {
     return System.getProperty("lucene.common.dir").replace('\\','/') +
       "/contrib/benchmark/src/test/org/apache/lucene/benchmark/reuters.first20.lines.txt";

Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/PerfTaskTest.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/PerfTaskTest.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/PerfTaskTest.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/PerfTaskTest.java Thu Jan 14 19:05:12 2010
@@ -1,73 +1,73 @@
-package org.apache.lucene.benchmark.byTask.tasks;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *     http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Properties;
-
-import org.apache.lucene.benchmark.BenchmarkTestCase;
-import org.apache.lucene.benchmark.byTask.PerfRunData;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-
-/** Tests the functionality of the abstract {@link PerfTask}. */
-public class PerfTaskTest extends BenchmarkTestCase {
-
-  private static final class MyPerfTask extends PerfTask {
-
-    public MyPerfTask(PerfRunData runData) {
-      super(runData);
-    }
-
-    @Override
-    public int doLogic() throws Exception {
-      return 0;
-    }
-
-    public int getLogStep() { return logStep; }
-    
-  }
-  
-  private PerfRunData createPerfRunData(boolean setLogStep, int logStepVal,
-      boolean setTaskLogStep, int taskLogStepVal) throws Exception {
-    Properties props = new Properties();
-    if (setLogStep) {
-      props.setProperty("log.step", Integer.toString(logStepVal));
-    }
-    if (setTaskLogStep) {
-      props.setProperty("log.step.MyPerf", Integer.toString(taskLogStepVal));
-    }
-    props.setProperty("directory", "RAMDirectory"); // no accidental FS dir.
-    Config config = new Config(props);
-    return new PerfRunData(config);
-  }
-  
-  private void doLogStepTest(boolean setLogStep, int logStepVal,
-      boolean setTaskLogStep, int taskLogStepVal, int expLogStepValue) throws Exception {
-    PerfRunData runData = createPerfRunData(setLogStep, logStepVal, setTaskLogStep, taskLogStepVal);
-    MyPerfTask mpt = new MyPerfTask(runData);
-    assertEquals(expLogStepValue, mpt.getLogStep());
-  }
-  
-  public void testLogStep() throws Exception {
-    doLogStepTest(false, -1, false, -1, PerfTask.DEFAULT_LOG_STEP);
-    doLogStepTest(true, -1, false, -1, Integer.MAX_VALUE);
-    doLogStepTest(true, 100, false, -1, 100);
-    doLogStepTest(false, -1, true, -1, Integer.MAX_VALUE);
-    doLogStepTest(false, -1, true, 100, 100);
-  }
-  
-}
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Properties;
+
+import org.apache.lucene.benchmark.BenchmarkTestCase;
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+/** Tests the functionality of the abstract {@link PerfTask}. */
+public class PerfTaskTest extends BenchmarkTestCase {
+
+  private static final class MyPerfTask extends PerfTask {
+
+    public MyPerfTask(PerfRunData runData) {
+      super(runData);
+    }
+
+    @Override
+    public int doLogic() throws Exception {
+      return 0;
+    }
+
+    public int getLogStep() { return logStep; }
+    
+  }
+  
+  private PerfRunData createPerfRunData(boolean setLogStep, int logStepVal,
+      boolean setTaskLogStep, int taskLogStepVal) throws Exception {
+    Properties props = new Properties();
+    if (setLogStep) {
+      props.setProperty("log.step", Integer.toString(logStepVal));
+    }
+    if (setTaskLogStep) {
+      props.setProperty("log.step.MyPerf", Integer.toString(taskLogStepVal));
+    }
+    props.setProperty("directory", "RAMDirectory"); // no accidental FS dir.
+    Config config = new Config(props);
+    return new PerfRunData(config);
+  }
+  
+  private void doLogStepTest(boolean setLogStep, int logStepVal,
+      boolean setTaskLogStep, int taskLogStepVal, int expLogStepValue) throws Exception {
+    PerfRunData runData = createPerfRunData(setLogStep, logStepVal, setTaskLogStep, taskLogStepVal);
+    MyPerfTask mpt = new MyPerfTask(runData);
+    assertEquals(expLogStepValue, mpt.getLogStep());
+  }
+  
+  public void testLogStep() throws Exception {
+    doLogStepTest(false, -1, false, -1, PerfTask.DEFAULT_LOG_STEP);
+    doLogStepTest(true, -1, false, -1, Integer.MAX_VALUE);
+    doLogStepTest(true, 100, false, -1, 100);
+    doLogStepTest(false, -1, true, -1, Integer.MAX_VALUE);
+    doLogStepTest(false, -1, true, 100, 100);
+  }
+  
+}

Propchange: lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/PerfTaskTest.java
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/branches/flex_1458/contrib/db/bdb-je/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/branches/flex_1458/contrib/db/bdb/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/pom.xml.template
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java (original)
+++ lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java Thu Jan 14 19:05:12 2010
@@ -70,6 +70,22 @@
       }
     }
     
+    public List<SubInfo> getSubInfos(){
+      return subInfos;
+    }
+    
+    public float getTotalBoost(){
+      return totalBoost;
+    }
+    
+    public int getStartOffset(){
+      return startOffset;
+    }
+    
+    public int getEndOffset(){
+      return endOffset;
+    }
+    
     @Override
     public String toString(){
       StringBuilder sb = new StringBuilder();
@@ -80,17 +96,26 @@
       return sb.toString();
     }
     
-    static class SubInfo {
+    public static class SubInfo {
       final String text;  // unnecessary member, just exists for debugging purpose
       final List<Toffs> termsOffsets;   // usually termsOffsets.size() == 1,
                               // but if position-gap > 1 and slop > 0 then size() could be greater than 1
       int seqnum;
+
       SubInfo( String text, List<Toffs> termsOffsets, int seqnum ){
         this.text = text;
         this.termsOffsets = termsOffsets;
         this.seqnum = seqnum;
       }
       
+      public List<Toffs> getTermsOffsets(){
+        return termsOffsets;
+      }
+      
+      public int getSeqnum(){
+        return seqnum;
+      }
+      
       @Override
       public String toString(){
         StringBuilder sb = new StringBuilder();

Modified: lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (original)
+++ lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java Thu Jan 14 19:05:12 2010
@@ -171,9 +171,15 @@
         this.startOffset = startOffset;
         this.endOffset = endOffset;
       }
-      void setEndOffset( int endOffset ){
+      public void setEndOffset( int endOffset ){
         this.endOffset = endOffset;
       }
+      public int getStartOffset(){
+        return startOffset;
+      }
+      public int getEndOffset(){
+        return endOffset;
+      }
       @Override
       public String toString(){
         StringBuilder sb = new StringBuilder();

Propchange: lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Thu Jan 14 19:05:12 2010
@@ -268,6 +268,21 @@
     protected boolean isDelimiter( int c ){
       return delimiters.indexOf( c ) >= 0;
     }
+    
+    public void reset( Reader input ) throws IOException {
+      super.reset( input );
+      reset();
+    }
+    
+    public void reset() throws IOException {
+      startTerm = 0;
+      nextStartOffset = 0;
+      snippet = null;
+      snippetBuffer.setLength( 0 );
+      charBufferIndex = BUFFER_SIZE;
+      charBufferLen = 0;
+      ch = 0;
+    }
   }
 
   protected void make1d1fIndex( String value ) throws Exception {

Propchange: lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TextFragment.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TextFragment.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TextFragment.java (original)
+++ lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TextFragment.java Thu Jan 14 19:05:12 2010
@@ -41,6 +41,7 @@
    * @deprecated Use {@link #TextFragment(CharSequence, int, int)} instead.
    * This constructor will be removed in Lucene 4.0
    */
+	@Deprecated
 	public TextFragment(StringBuffer markedUpText,int textStartPos, int fragNum)
 	{
 		this.markedUpText=markedUpText;

Modified: lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original)
+++ lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Thu Jan 14 19:05:12 2010
@@ -38,234 +38,248 @@
 import org.apache.lucene.index.TermVectorOffsetInfo;
 
 /**
- * Hides implementation issues associated with obtaining a TokenStream for use with
- * the higlighter - can obtain from TermFreqVectors with offsets and (optionally) positions or
- * from Analyzer class reparsing the stored content.
+ * Hides implementation issues associated with obtaining a TokenStream for use
+ * with the higlighter - can obtain from TermFreqVectors with offsets and
+ * (optionally) positions or from Analyzer class reparsing the stored content.
  */
-public class TokenSources
-{
+public class TokenSources {
   /**
-   * A convenience method that tries to first get a TermPositionVector for the specified docId, then, falls back to
-   * using the passed in {@link org.apache.lucene.document.Document} to retrieve the TokenStream.  This is useful when
-   * you already have the document, but would prefer to use the vector first.
-   * @param reader The {@link org.apache.lucene.index.IndexReader} to use to try and get the vector from
+   * A convenience method that tries to first get a TermPositionVector for the
+   * specified docId, then, falls back to using the passed in
+   * {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
+   * This is useful when you already have the document, but would prefer to use
+   * the vector first.
+   * 
+   * @param reader The {@link org.apache.lucene.index.IndexReader} to use to try
+   *        and get the vector from
    * @param docId The docId to retrieve.
    * @param field The field to retrieve on the document
    * @param doc The document to fall back on
-   * @param analyzer The analyzer to use for creating the TokenStream if the vector doesn't exist
-   * @return The {@link org.apache.lucene.analysis.TokenStream} for the {@link org.apache.lucene.document.Fieldable} on the {@link org.apache.lucene.document.Document}
+   * @param analyzer The analyzer to use for creating the TokenStream if the
+   *        vector doesn't exist
+   * @return The {@link org.apache.lucene.analysis.TokenStream} for the
+   *         {@link org.apache.lucene.document.Fieldable} on the
+   *         {@link org.apache.lucene.document.Document}
    * @throws IOException if there was an error loading
    */
-  public static TokenStream getAnyTokenStream(IndexReader reader, int docId, String field, Document doc, Analyzer analyzer) throws IOException{
-    TokenStream ts=null;
+  public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
+      String field, Document doc, Analyzer analyzer) throws IOException {
+    TokenStream ts = null;
+
+    TermFreqVector tfv = reader.getTermFreqVector(docId, field);
+    if (tfv != null) {
+      if (tfv instanceof TermPositionVector) {
+        ts = getTokenStream((TermPositionVector) tfv);
+      }
+    }
+    // No token info stored so fall back to analyzing raw content
+    if (ts == null) {
+      ts = getTokenStream(doc, field, analyzer);
+    }
+    return ts;
+  }
+
+  /**
+   * A convenience method that tries a number of approaches to getting a token
+   * stream. The cost of finding there are no termVectors in the index is
+   * minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?)
+   * approach to coding is probably acceptable
+   * 
+   * @param reader
+   * @param docId
+   * @param field
+   * @param analyzer
+   * @return null if field not stored correctly
+   * @throws IOException
+   */
+  public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
+      String field, Analyzer analyzer) throws IOException {
+    TokenStream ts = null;
+
+    TermFreqVector tfv = reader.getTermFreqVector(docId, field);
+    if (tfv != null) {
+      if (tfv instanceof TermPositionVector) {
+        ts = getTokenStream((TermPositionVector) tfv);
+      }
+    }
+    // No token info stored so fall back to analyzing raw content
+    if (ts == null) {
+      ts = getTokenStream(reader, docId, field, analyzer);
+    }
+    return ts;
+  }
+
+  public static TokenStream getTokenStream(TermPositionVector tpv) {
+    // assumes the worst and makes no assumptions about token position
+    // sequences.
+    return getTokenStream(tpv, false);
+  }
+
+  /**
+   * Low level api. Returns a token stream or null if no offset info available
+   * in index. This can be used to feed the highlighter with a pre-parsed token
+   * stream
+   * 
+   * In my tests the speeds to recreate 1000 token streams using this method
+   * are: - with TermVector offset only data stored - 420 milliseconds - with
+   * TermVector offset AND position data stored - 271 milliseconds (nb timings
+   * for TermVector with position data are based on a tokenizer with contiguous
+   * positions - no overlaps or gaps) The cost of not using TermPositionVector
+   * to store pre-parsed content and using an analyzer to re-parse the original
+   * content: - reanalyzing the original content - 980 milliseconds
+   * 
+   * The re-analyze timings will typically vary depending on - 1) The complexity
+   * of the analyzer code (timings above were using a
+   * stemmer/lowercaser/stopword combo) 2) The number of other fields (Lucene
+   * reads ALL fields off the disk when accessing just one document field - can
+   * cost dear!) 3) Use of compression on field storage - could be faster due to
+   * compression (less disk IO) or slower (more CPU burn) depending on the
+   * content.
+   * 
+   * @param tpv
+   * @param tokenPositionsGuaranteedContiguous true if the token position
+   *        numbers have no overlaps or gaps. If looking to eek out the last
+   *        drops of performance, set to true. If in doubt, set to false.
+   */
+  public static TokenStream getTokenStream(TermPositionVector tpv,
+      boolean tokenPositionsGuaranteedContiguous) {
+    if (!tokenPositionsGuaranteedContiguous && tpv.getTermPositions(0) != null) {
+      return new TokenStreamFromTermPositionVector(tpv);
+    }
 
-		TermFreqVector tfv = reader.getTermFreqVector(docId,field);
-		if(tfv!=null)
-		{
-		    if(tfv instanceof TermPositionVector)
-		    {
-		        ts=getTokenStream((TermPositionVector) tfv);
-		    }
-		}
-		//No token info stored so fall back to analyzing raw content
-		if(ts==null)
-		{
-		    ts=getTokenStream(doc,field,analyzer);
-		}
-		return ts;
-  }
-    /**
-     * A convenience method that tries a number of approaches to getting a token stream.
-     * The cost of finding there are no termVectors in the index is minimal (1000 invocations still 
-     * registers 0 ms). So this "lazy" (flexible?) approach to coding is probably acceptable
-     * @param reader
-     * @param docId
-     * @param field
-     * @param analyzer
-     * @return null if field not stored correctly 
-     * @throws IOException
-     */
-    public static TokenStream getAnyTokenStream(IndexReader reader,int docId, String field,Analyzer analyzer) throws IOException
-    {
-		TokenStream ts=null;
-
-		TermFreqVector tfv = reader.getTermFreqVector(docId,field);
-		if(tfv!=null)
-		{
-		    if(tfv instanceof TermPositionVector)
-		    {
-		        ts=getTokenStream((TermPositionVector) tfv);
-		    }
-		}
-		//No token info stored so fall back to analyzing raw content
-		if(ts==null)
-		{
-		    ts=getTokenStream(reader,docId,field,analyzer);
-		}
-		return ts;
-    }
-    
-    
-    public static TokenStream getTokenStream(TermPositionVector tpv)
-    {
-        //assumes the worst and makes no assumptions about token position sequences.
-         return getTokenStream(tpv,false);   
-    }
-    /**
-     * Low level api.
-     * Returns a token stream or null if no offset info available in index.
-     * This can be used to feed the highlighter with a pre-parsed token stream 
-     * 
-     * In my tests the speeds to recreate 1000 token streams using this method are:
-     * - with TermVector offset only data stored - 420  milliseconds 
-     * - with TermVector offset AND position data stored - 271 milliseconds
-     *  (nb timings for TermVector with position data are based on a tokenizer with contiguous
-     *  positions - no overlaps or gaps)
-     * The cost of not using TermPositionVector to store
-     * pre-parsed content and using an analyzer to re-parse the original content: 
-     * - reanalyzing the original content - 980 milliseconds
-     * 
-     * The re-analyze timings will typically vary depending on -
-     * 	1) The complexity of the analyzer code (timings above were using a 
-     * 	   stemmer/lowercaser/stopword combo)
-     *  2) The  number of other fields (Lucene reads ALL fields off the disk 
-     *     when accessing just one document field - can cost dear!)
-     *  3) Use of compression on field storage - could be faster due to compression (less disk IO)
-     *     or slower (more CPU burn) depending on the content.
-     *
-     * @param tpv
-     * @param tokenPositionsGuaranteedContiguous true if the token position numbers have no overlaps or gaps. If looking
-     * to eek out the last drops of performance, set to true. If in doubt, set to false.
-     */
-    public static TokenStream getTokenStream(TermPositionVector tpv, boolean tokenPositionsGuaranteedContiguous) {
-        //an object used to iterate across an array of tokens
-        class StoredTokenStream extends TokenStream {
-          Token tokens[];
-          int currentToken = 0;
-          TermAttribute termAtt;
-          OffsetAttribute offsetAtt;
-    
-          StoredTokenStream(Token tokens[]) {
-            this.tokens = tokens;
-            termAtt = addAttribute(TermAttribute.class);
-            offsetAtt = addAttribute(OffsetAttribute.class);
-          }
-    
-          @Override
-          public boolean incrementToken() throws IOException {
-            if (currentToken >= tokens.length) {
-              return false;
-            }
-            Token token = tokens[currentToken++];
-            termAtt.setTermBuffer(token.term());
-            offsetAtt.setOffset(token.startOffset(), token.endOffset());
-            return true;
-          }
-        }      
-        //code to reconstruct the original sequence of Tokens
-        String[] terms=tpv.getTerms();          
-        int[] freq=tpv.getTermFrequencies();
-        int totalTokens=0;
-
-        for (int t = 0; t < freq.length; t++)
-        {
-            totalTokens+=freq[t];
+    // an object used to iterate across an array of tokens
+    class StoredTokenStream extends TokenStream {
+      Token tokens[];
+
+      int currentToken = 0;
+
+      TermAttribute termAtt;
+
+      OffsetAttribute offsetAtt;
+
+      StoredTokenStream(Token tokens[]) {
+        this.tokens = tokens;
+        termAtt = addAttribute(TermAttribute.class);
+        offsetAtt = addAttribute(OffsetAttribute.class);
+      }
+
+      @Override
+      public boolean incrementToken() throws IOException {
+        if (currentToken >= tokens.length) {
+          return false;
         }
-        Token tokensInOriginalOrder[]=new Token[totalTokens];
-        ArrayList<Token> unsortedTokens = null;
-        for (int t = 0; t < freq.length; t++)
-        {
-            TermVectorOffsetInfo[] offsets=tpv.getOffsets(t);
-            if(offsets==null)
-            {
-                return null;
-            }
-            
-            int[] pos=null;
-            if(tokenPositionsGuaranteedContiguous)
-            {
-                //try get the token position info to speed up assembly of tokens into sorted sequence
-                pos=tpv.getTermPositions(t);
-            }
-            if(pos==null)
-            {	
-                //tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later
-                if(unsortedTokens==null)
-                {
-                    unsortedTokens=new ArrayList<Token>();
-                }
-                for (int tp = 0; tp < offsets.length; tp++)
-                {
-                  Token token = new Token(offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
-                  token.setTermBuffer(terms[t]);
-                  unsortedTokens.add(token);
-                }
-            }
-            else
-            {
-                //We have positions stored and a guarantee that the token position information is contiguous
-                
-                // This may be fast BUT wont work if Tokenizers used which create >1 token in same position or
-                // creates jumps in position numbers - this code would fail under those circumstances
-                
-                //tokens stored with positions - can use this to index straight into sorted array
-                for (int tp = 0; tp < pos.length; tp++)
-                {
-                  Token token = new Token(terms[t], offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
-                  tokensInOriginalOrder[pos[tp]] = token;
-                }                
-            }
+        Token token = tokens[currentToken++];
+        termAtt.setTermBuffer(token.term());
+        offsetAtt.setOffset(token.startOffset(), token.endOffset());
+        return true;
+      }
+    }
+    // code to reconstruct the original sequence of Tokens
+    String[] terms = tpv.getTerms();
+    int[] freq = tpv.getTermFrequencies();
+    int totalTokens = 0;
+
+    for (int t = 0; t < freq.length; t++) {
+      totalTokens += freq[t];
+    }
+    Token tokensInOriginalOrder[] = new Token[totalTokens];
+    ArrayList<Token> unsortedTokens = null;
+    for (int t = 0; t < freq.length; t++) {
+      TermVectorOffsetInfo[] offsets = tpv.getOffsets(t);
+      if (offsets == null) {
+        throw new IllegalArgumentException("Required TermVector Offset information was not found");
+      }
+
+      int[] pos = null;
+      if (tokenPositionsGuaranteedContiguous) {
+        // try get the token position info to speed up assembly of tokens into
+        // sorted sequence
+        pos = tpv.getTermPositions(t);
+      }
+      if (pos == null) {
+        // tokens NOT stored with positions or not guaranteed contiguous - must
+        // add to list and sort later
+        if (unsortedTokens == null) {
+          unsortedTokens = new ArrayList<Token>();
+        }
+        for (int tp = 0; tp < offsets.length; tp++) {
+          Token token = new Token(offsets[tp].getStartOffset(), offsets[tp]
+              .getEndOffset());
+          token.setTermBuffer(terms[t]);
+          unsortedTokens.add(token);
+        }
+      } else {
+        // We have positions stored and a guarantee that the token position
+        // information is contiguous
+
+        // This may be fast BUT wont work if Tokenizers used which create >1
+        // token in same position or
+        // creates jumps in position numbers - this code would fail under those
+        // circumstances
+
+        // tokens stored with positions - can use this to index straight into
+        // sorted array
+        for (int tp = 0; tp < pos.length; tp++) {
+          Token token = new Token(terms[t], offsets[tp].getStartOffset(),
+              offsets[tp].getEndOffset());
+          tokensInOriginalOrder[pos[tp]] = token;
         }
-        //If the field has been stored without position data we must perform a sort        
-        if(unsortedTokens!=null) {
-            tokensInOriginalOrder= unsortedTokens.toArray(new Token[unsortedTokens.size()]);
-            Arrays.sort(tokensInOriginalOrder, new Comparator<Token>(){
-                public int compare(Token t1, Token t2) {
-                    if(t1.startOffset()>t2.endOffset())
-                        return 1;
-                    if(t1.startOffset()<t2.startOffset())
-                        return -1;
-                    return 0;
-                }});
+      }
+    }
+    // If the field has been stored without position data we must perform a sort
+    if (unsortedTokens != null) {
+      tokensInOriginalOrder = unsortedTokens.toArray(new Token[unsortedTokens
+          .size()]);
+      Arrays.sort(tokensInOriginalOrder, new Comparator<Token>() {
+        public int compare(Token t1, Token t2) {
+          if (t1.startOffset() > t2.endOffset())
+            return 1;
+          if (t1.startOffset() < t2.startOffset())
+            return -1;
+          return 0;
         }
-        return new StoredTokenStream(tokensInOriginalOrder);
+      });
+    }
+    return new StoredTokenStream(tokensInOriginalOrder);
+  }
+
+  public static TokenStream getTokenStream(IndexReader reader, int docId,
+      String field) throws IOException {
+    TermFreqVector tfv = reader.getTermFreqVector(docId, field);
+    if (tfv == null) {
+      throw new IllegalArgumentException(field + " in doc #" + docId
+          + "does not have any term position data stored");
     }
+    if (tfv instanceof TermPositionVector) {
+      TermPositionVector tpv = (TermPositionVector) reader.getTermFreqVector(
+          docId, field);
+      return getTokenStream(tpv);
+    }
+    throw new IllegalArgumentException(field + " in doc #" + docId
+        + "does not have any term position data stored");
+  }
+
+  // convenience method
+  public static TokenStream getTokenStream(IndexReader reader, int docId,
+      String field, Analyzer analyzer) throws IOException {
+    Document doc = reader.document(docId);
+    return getTokenStream(doc, field, analyzer);
+  }
+
+  public static TokenStream getTokenStream(Document doc, String field,
+      Analyzer analyzer) {
+    String contents = doc.get(field);
+    if (contents == null) {
+      throw new IllegalArgumentException("Field " + field
+          + " in document is not stored and cannot be analyzed");
+    }
+    return getTokenStream(field, contents, analyzer);
+  }
 
-    public static TokenStream getTokenStream(IndexReader reader,int docId, String field) throws IOException
-    {
-		TermFreqVector tfv = reader.getTermFreqVector(docId,field);
-		if(tfv==null)
-		{
-		    throw new IllegalArgumentException(field+" in doc #"+docId
-		            	+"does not have any term position data stored");
-		}
-	    if(tfv instanceof TermPositionVector)
-	    {
-			TermPositionVector tpv=(TermPositionVector) reader.getTermFreqVector(docId,field);
-	        return getTokenStream(tpv);	        
-	    }
-	    throw new IllegalArgumentException(field+" in doc #"+docId
-            	+"does not have any term position data stored");
-    }
-
-    //convenience method
-    public static TokenStream getTokenStream(IndexReader reader,int docId, String field,Analyzer analyzer) throws IOException
-    {
-		  Document doc=reader.document(docId);
-		  return getTokenStream(doc, field, analyzer);
-    }
-    
-  public static TokenStream getTokenStream(Document doc, String field, Analyzer analyzer){
-    String contents=doc.get(field);
-		if(contents==null)
-		{
-		    throw new IllegalArgumentException("Field "+field +" in document is not stored and cannot be analyzed");
-		}
-        return getTokenStream(field, contents, analyzer);
-  }
-  //convenience method
-  public static TokenStream getTokenStream(String field, String contents, Analyzer analyzer){
-    return analyzer.tokenStream(field,new StringReader(contents));
+  // convenience method
+  public static TokenStream getTokenStream(String field, String contents,
+      Analyzer analyzer) {
+    return analyzer.tokenStream(field, new StringReader(contents));
   }
 
 }

Modified: lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java (original)
+++ lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java Thu Jan 14 19:05:12 2010
@@ -53,7 +53,7 @@
    * Checks to see if this term is valid at <code>position</code>.
    *
    * @param position
-   *            to check against valid term postions
+   *            to check against valid term positions
    * @return true iff this term is a hit at this position
    */
   public boolean checkPosition(int position) {

Propchange: lucene/java/branches/flex_1458/contrib/highlighter/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/branches/flex_1458/contrib/highlighter/src/test/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jan 14 19:05:12 2010
@@ -1,5 +1,5 @@
 /lucene/java/branches/lucene_2_4/contrib/highlighter/src/test:748824
-/lucene/java/branches/lucene_2_9/contrib/highlighter/src/test:817269-818600,825998,826775,829134,829816,829881,831036
+/lucene/java/branches/lucene_2_9/contrib/highlighter/src/test:817269-818600,825998,826775,829134,829816,829881,831036,896850
 /lucene/java/branches/lucene_2_9_back_compat_tests/contrib/highlighter/src/test:818601-821336
-/lucene/java/branches/lucene_3_0/contrib/highlighter/src/test:880793
-/lucene/java/trunk/contrib/highlighter/src/test:829439-833960,880727-886190,889185,889622,889667
+/lucene/java/branches/lucene_3_0/contrib/highlighter/src/test:880793,896906
+/lucene/java/trunk/contrib/highlighter/src/test:829439-833960,880727-886190,889185,889622,889667,889866-899001

Modified: lucene/java/branches/flex_1458/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java (original)
+++ lucene/java/branches/flex_1458/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java Thu Jan 14 19:05:12 2010
@@ -23,13 +23,10 @@
 
 import org.apache.lucene.analysis.TokenFilter;
 import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
 import org.apache.lucene.analysis.tokenattributes.TermAttribute;
 import org.apache.lucene.util.IndexableBinaryStringTools;
 
 import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
 
 
 /**
@@ -92,15 +89,14 @@
       char[] termBuffer = termAtt.termBuffer();
       String termText = new String(termBuffer, 0, termAtt.termLength());
       collator.getRawCollationKey(termText, reusableKey);
-      ByteBuffer collationKeyBuf = ByteBuffer.wrap(reusableKey.bytes, 0, reusableKey.size);
-      int encodedLength
-        = IndexableBinaryStringTools.getEncodedLength(collationKeyBuf);
+      int encodedLength = IndexableBinaryStringTools.getEncodedLength(
+          reusableKey.bytes, 0, reusableKey.size);
       if (encodedLength > termBuffer.length) {
         termAtt.resizeTermBuffer(encodedLength);
       }
       termAtt.setTermLength(encodedLength);
-      CharBuffer wrappedTermBuffer = CharBuffer.wrap(termAtt.termBuffer());
-      IndexableBinaryStringTools.encode(collationKeyBuf, wrappedTermBuffer);
+      IndexableBinaryStringTools.encode(reusableKey.bytes, 0, reusableKey.size,
+          termAtt.termBuffer(), 0, encodedLength);
       return true;
     } else {
       return false;

Modified: lucene/java/branches/flex_1458/contrib/icu/src/java/overview.html
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/icu/src/java/overview.html?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/icu/src/java/overview.html (original)
+++ lucene/java/branches/flex_1458/contrib/icu/src/java/overview.html Thu Jan 14 19:05:12 2010
@@ -34,7 +34,7 @@
   <code>CollationKey</code>s.  <code>icu4j-collation-4.0.jar</code>, 
   a trimmed-down version of <code>icu4j-4.0.jar</code> that contains only the 
   code and data needed to support collation, is included in Lucene's Subversion 
-  repository at <code>contrib/collation/lib/</code>.
+  repository at <code>contrib/icu/lib/</code>.
 </p>
 
 <h2>Use Cases</h2>

Modified: lucene/java/branches/flex_1458/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java (original)
+++ lucene/java/branches/flex_1458/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java Thu Jan 14 19:05:12 2010
@@ -28,6 +28,7 @@
     this.reader = reader;
   }
 
+  @Override
   public boolean isDeleted(int doc) {
     return reader.isDeleted(doc);
   }

Propchange: lucene/java/branches/flex_1458/contrib/instantiated/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/branches/flex_1458/contrib/lucli/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/branches/flex_1458/contrib/memory/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/misc/ChainedFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/misc/ChainedFilter.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/misc/ChainedFilter.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/misc/ChainedFilter.java Thu Jan 14 19:05:12 2010
@@ -156,6 +156,7 @@
      *  switch to a different DocIdSet implementation yourself.
      *  This method will be removed in Lucene 4.0 
      **/
+    @Deprecated
     protected final DocIdSet finalResult(OpenBitSetDISI result, int maxDocs) {
         return result;
     }

Propchange: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java Thu Jan 14 19:05:12 2010
@@ -133,7 +133,7 @@
     final ParserExtension extension = this.extensions
         .getExtension(splitExtensionField.cud);
     if (extension != null) {
-      return extension.parse(new ExtensionQuery(splitExtensionField.cur,
+      return extension.parse(new ExtensionQuery(this, splitExtensionField.cur,
           queryText));
     }
     return super.getFieldQuery(field, queryText);

Modified: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtensionQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtensionQuery.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtensionQuery.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtensionQuery.java Thu Jan 14 19:05:12 2010
@@ -1,5 +1,7 @@
 package org.apache.lucene.queryParser.ext;
 
+import org.apache.lucene.queryParser.QueryParser;
+
 /**
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -29,6 +31,7 @@
 
   private final String field;
   private final String rawQueryString;
+  private final QueryParser topLevelParser;
 
   /**
    * Creates a new {@link ExtensionQuery}
@@ -38,9 +41,10 @@
    * @param rawQueryString
    *          the raw extension query string
    */
-  public ExtensionQuery(String field, String rawQueryString) {
+  public ExtensionQuery(QueryParser topLevelParser, String field, String rawQueryString) {
     this.field = field;
     this.rawQueryString = rawQueryString;
+    this.topLevelParser = topLevelParser;
   }
 
   /**
@@ -60,4 +64,12 @@
   public String getRawQueryString() {
     return rawQueryString;
   }
+  
+  /**
+   * Returns the top level parser which created this {@link ExtensionQuery} 
+   * @return the top level parser which created this {@link ExtensionQuery}
+   */
+  public QueryParser getTopLevelParser() {
+    return topLevelParser;
+  }
 }

Propchange: lucene/java/branches/flex_1458/contrib/misc/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/misc/ChainedFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/misc/ChainedFilterTest.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/misc/ChainedFilterTest.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/misc/ChainedFilterTest.java Thu Jan 14 19:05:12 2010
@@ -18,6 +18,7 @@
  */
 
 import java.util.Calendar;
+import java.util.GregorianCalendar;
 
 import junit.framework.TestCase;
 
@@ -60,7 +61,8 @@
     IndexWriter writer =
        new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
 
-    Calendar cal = Calendar.getInstance();
+    Calendar cal = new GregorianCalendar();
+    cal.clear();
     cal.setTimeInMillis(1041397200000L); // 2003 January 01
 
     for (int i = 0; i < MAX; i++) {

Propchange: lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/complexPhrase/TestComplexPhraseQuery.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtensions.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtensions.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtensions.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtensions.java Thu Jan 14 19:05:12 2010
@@ -26,6 +26,7 @@
 
   private Extensions ext;
 
+  @Override
   protected void setUp() throws Exception {
     super.setUp();
     this.ext = new Extensions();

Modified: lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java Thu Jan 14 19:05:12 2010
@@ -392,7 +392,12 @@
   public String getLocalizedDate(int year, int month, int day) {
     DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
     Calendar calendar = new GregorianCalendar();
+    calendar.clear();
     calendar.set(year, month, day);
+    calendar.set(Calendar.HOUR_OF_DAY, 23);
+    calendar.set(Calendar.MINUTE, 59);
+    calendar.set(Calendar.SECOND, 59);
+    calendar.set(Calendar.MILLISECOND, 999);
     return df.format(calendar.getTime());
   }
 

Modified: lucene/java/branches/flex_1458/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java (original)
+++ lucene/java/branches/flex_1458/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java Thu Jan 14 19:05:12 2010
@@ -117,6 +117,7 @@
    * switch to a different DocIdSet implementation yourself.
    * This method will be removed in Lucene 4.0 
    */
+  @Deprecated
   protected final DocIdSet finalResult(OpenBitSetDISI result, int maxDocs) {
     return result;
   }

Propchange: lucene/java/branches/flex_1458/contrib/queries/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/MultiFieldQueryParserWrapper.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/MultiFieldQueryParserWrapper.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/MultiFieldQueryParserWrapper.java (original)
+++ lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/MultiFieldQueryParserWrapper.java Thu Jan 14 19:05:12 2010
@@ -36,6 +36,7 @@
  *             used along the transition from the old query parser to the new
  *             one
  */
+@Deprecated
 public class MultiFieldQueryParserWrapper extends QueryParserWrapper {
 
   /**

Modified: lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java (original)
+++ lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java Thu Jan 14 19:05:12 2010
@@ -64,6 +64,7 @@
  *             used along the transition from the old query parser to the new
  *             one
  */
+@Deprecated
 public class QueryParserWrapper {
 
   /**

Modified: lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (original)
+++ lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java Thu Jan 14 19:05:12 2010
@@ -650,25 +650,24 @@
     }
   }
 
-  private String getLocalizedDate(int year, int month, int day,
-      boolean extendLastDate) {
+  private String getLocalizedDate(int year, int month, int day) {
     DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
     Calendar calendar = new GregorianCalendar();
+    calendar.clear();
     calendar.set(year, month, day);
-    if (extendLastDate) {
-      calendar.set(Calendar.HOUR_OF_DAY, 23);
-      calendar.set(Calendar.MINUTE, 59);
-      calendar.set(Calendar.SECOND, 59);
-      calendar.set(Calendar.MILLISECOND, 999);
-    }
+    calendar.set(Calendar.HOUR_OF_DAY, 23);
+    calendar.set(Calendar.MINUTE, 59);
+    calendar.set(Calendar.SECOND, 59);
+    calendar.set(Calendar.MILLISECOND, 999);
     return df.format(calendar.getTime());
   }
 
   /** for testing legacy DateField support */
   public void testLegacyDateRange() throws Exception {
-    String startDate = getLocalizedDate(2002, 1, 1, false);
-    String endDate = getLocalizedDate(2002, 1, 4, false);
+    String startDate = getLocalizedDate(2002, 1, 1);
+    String endDate = getLocalizedDate(2002, 1, 4);
     Calendar endDateExpected = new GregorianCalendar();
+    endDateExpected.clear();
     endDateExpected.set(2002, 1, 4, 23, 59, 59);
     endDateExpected.set(Calendar.MILLISECOND, 999);
     assertQueryEquals("[ " + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "]", null, "["
@@ -679,9 +678,10 @@
   }
 
   public void testDateRange() throws Exception {
-    String startDate = getLocalizedDate(2002, 1, 1, false);
-    String endDate = getLocalizedDate(2002, 1, 4, false);
+    String startDate = getLocalizedDate(2002, 1, 1);
+    String endDate = getLocalizedDate(2002, 1, 4);
     Calendar endDateExpected = new GregorianCalendar();
+    endDateExpected.clear();
     endDateExpected.set(2002, 1, 4, 23, 59, 59);
     endDateExpected.set(Calendar.MILLISECOND, 999);
     final String defaultField = "default";

Modified: lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (original)
+++ lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java Thu Jan 14 19:05:12 2010
@@ -644,25 +644,24 @@
     }
   }
 
-  private String getLocalizedDate(int year, int month, int day,
-      boolean extendLastDate) {
+  private String getLocalizedDate(int year, int month, int day) {
     DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
     Calendar calendar = new GregorianCalendar();
+    calendar.clear();
     calendar.set(year, month, day);
-    if (extendLastDate) {
-      calendar.set(Calendar.HOUR_OF_DAY, 23);
-      calendar.set(Calendar.MINUTE, 59);
-      calendar.set(Calendar.SECOND, 59);
-      calendar.set(Calendar.MILLISECOND, 999);
-    }
+    calendar.set(Calendar.HOUR_OF_DAY, 23);
+    calendar.set(Calendar.MINUTE, 59);
+    calendar.set(Calendar.SECOND, 59);
+    calendar.set(Calendar.MILLISECOND, 999);
     return df.format(calendar.getTime());
   }
 
   /** for testing legacy DateField support */
   public void testLegacyDateRange() throws Exception {
-    String startDate = getLocalizedDate(2002, 1, 1, false);
-    String endDate = getLocalizedDate(2002, 1, 4, false);
+    String startDate = getLocalizedDate(2002, 1, 1);
+    String endDate = getLocalizedDate(2002, 1, 4);
     Calendar endDateExpected = new GregorianCalendar();
+    endDateExpected.clear();
     endDateExpected.set(2002, 1, 4, 23, 59, 59);
     endDateExpected.set(Calendar.MILLISECOND, 999);
     assertQueryEquals("[ " + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "]", null, "["
@@ -673,9 +672,10 @@
   }
 
   public void testDateRange() throws Exception {
-    String startDate = getLocalizedDate(2002, 1, 1, false);
-    String endDate = getLocalizedDate(2002, 1, 4, false);
+    String startDate = getLocalizedDate(2002, 1, 1);
+    String endDate = getLocalizedDate(2002, 1, 4);
     Calendar endDateExpected = new GregorianCalendar();
+    endDateExpected.clear();
     endDateExpected.set(2002, 1, 4, 23, 59, 59);
     endDateExpected.set(Calendar.MILLISECOND, 999);
     final String defaultField = "default";

Propchange: lucene/java/branches/flex_1458/contrib/regex/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Propchange: lucene/java/branches/flex_1458/contrib/remote/src/java/overview.html
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: lucene/java/branches/flex_1458/contrib/snowball/build.xml
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/snowball/build.xml?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/snowball/build.xml (original)
+++ lucene/java/branches/flex_1458/contrib/snowball/build.xml Thu Jan 14 19:05:12 2010
@@ -131,10 +131,26 @@
   </target>
 
   <target name="compile-core" depends="build-analyzers, common.compile-core" />
+  <target name="compile-test" depends="download-vocab-tests, common.compile-test" />
   
   <target name="build-analyzers" unless="analyzers.jar.present">
     <echo>Snowball building dependency ${analyzers.jar}</echo>
     <ant antfile="../analyzers/build.xml" target="default" inheritall="false" dir="../analyzers" />
   </target>
 
+  <property name="snowball.vocab.rev" value="500"/>
+  <property name="snowball.vocab.url" 
+  	value="svn://svn.tartarus.org/snowball/trunk/data"/>
+  <property name="vocab.dir" value="src/test/org/apache/lucene/analysis/snowball"/>
+	
+  <target name="download-vocab-tests" depends="compile-core"
+    description="Downloads Snowball vocabulary tests">
+    <sequential>
+      <mkdir dir="${vocab.dir}"/>
+      <exec dir="${vocab.dir}" executable="${svn.exe}"
+	            failifexecutionfails="false">
+        <arg line="checkout -r ${snowball.vocab.rev} ${snowball.vocab.url}"/>
+	  </exec>
+	</sequential>
+  </target>
 </project>

Modified: lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java (original)
+++ lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java Thu Jan 14 19:05:12 2010
@@ -51,11 +51,22 @@
     this.matchVersion = matchVersion;
   }
 
-  /** Builds the named analyzer with the given stop words. */
+  /** 
+   * Builds the named analyzer with the given stop words.
+   * @deprecated Use {@link #SnowballAnalyzer(Version, String, Set)} instead.  
+   */
+  @Deprecated
   public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) {
     this(matchVersion, name);
     stopSet = StopFilter.makeStopSet(matchVersion, stopWords);
   }
+  
+  /** Builds the named analyzer with the given stop words. */
+  public SnowballAnalyzer(Version matchVersion, String name, Set<?> stopWords) {
+    this(matchVersion, name);
+    stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion,
+        stopWords));
+  }
 
   /** Constructs a {@link StandardTokenizer} filtered by a {@link
       StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},

Modified: lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (original)
+++ lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java Thu Jan 14 19:05:12 2010
@@ -74,13 +74,16 @@
   @Override
   public final boolean incrementToken() throws IOException {
     if (input.incrementToken()) {
-      String originalTerm = termAtt.term();
-      stemmer.setCurrent(originalTerm);
+      char termBuffer[] = termAtt.termBuffer();
+      final int length = termAtt.termLength();
+      stemmer.setCurrent(termBuffer, length);
       stemmer.stem();
-      String finalTerm = stemmer.getCurrent();
-      // Don't bother updating, if it is unchanged.
-      if (!originalTerm.equals(finalTerm))
-        termAtt.setTermBuffer(finalTerm);
+      final char finalTerm[] = stemmer.getCurrentBuffer();
+      final int newLength = stemmer.getCurrentBufferLength();
+      if (finalTerm != termBuffer)
+        termAtt.setTermBuffer(finalTerm, 0, newLength);
+      else
+        termAtt.setTermLength(newLength); 
       return true;
     } else {
       return false;