You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by us...@apache.org on 2010/01/14 20:05:42 UTC
svn commit: r899359 [3/7] - in /lucene/java/branches/flex_1458: ./ contrib/
contrib/analyzers/common/src/java/org/apache/lucene/analysis/ar/
contrib/analyzers/common/src/java/org/apache/lucene/analysis/bg/
contrib/analyzers/common/src/java/org/apache/l...
Modified: lucene/java/branches/flex_1458/contrib/analyzers/smartcn/build.xml
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/smartcn/build.xml?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/smartcn/build.xml (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/smartcn/build.xml Thu Jan 14 19:05:12 2010
@@ -1,38 +1,38 @@
-<?xml version="1.0"?>
-
-<!--
- Licensed to the Apache Software Foundation (ASF) under one or more
- contributor license agreements. See the NOTICE file distributed with
- this work for additional information regarding copyright ownership.
- The ASF licenses this file to You under the Apache License, Version 2.0
- the "License"); you may not use this file except in compliance with
- the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License.
- -->
-
-<project name="smartcn" default="default">
-
- <description>
- Smart Chinese Analyzer
- </description>
-
- <property name="build.dir" location="../../../build/contrib/analyzers/smartcn" />
- <property name="dist.dir" location="../../../dist/contrib/analyzers/smartcn" />
- <property name="maven.dist.dir" location="../../../dist/maven" />
-
- <import file="../../contrib-build.xml"/>
-
- <path id="test.classpath">
- <path refid="classpath"/>
- <pathelement location="../../../build/classes/test/"/>
- <path refid="junit-path"/>
- <pathelement location="${build.dir}/classes/java"/>
- </path>
-</project>
+<?xml version="1.0"?>
+
+<!--
+ Licensed to the Apache Software Foundation (ASF) under one or more
+ contributor license agreements. See the NOTICE file distributed with
+ this work for additional information regarding copyright ownership.
+ The ASF licenses this file to You under the Apache License, Version 2.0
+ the "License"); you may not use this file except in compliance with
+ the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing, software
+ distributed under the License is distributed on an "AS IS" BASIS,
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ See the License for the specific language governing permissions and
+ limitations under the License.
+ -->
+
+<project name="smartcn" default="default">
+
+ <description>
+ Smart Chinese Analyzer
+ </description>
+
+ <property name="build.dir" location="../../../build/contrib/analyzers/smartcn" />
+ <property name="dist.dir" location="../../../dist/contrib/analyzers/smartcn" />
+ <property name="maven.dist.dir" location="../../../dist/maven" />
+
+ <import file="../../contrib-build.xml"/>
+
+ <path id="test.classpath">
+ <path refid="classpath"/>
+ <pathelement location="../../../build/classes/test/"/>
+ <path refid="junit-path"/>
+ <pathelement location="${build.dir}/classes/java"/>
+ </path>
+</project>
Propchange: lucene/java/branches/flex_1458/contrib/analyzers/smartcn/build.xml
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/contrib/analyzers/smartcn/pom.xml.template
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/analyzers/smartcn/pom.xml.template?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/analyzers/smartcn/pom.xml.template (original)
+++ lucene/java/branches/flex_1458/contrib/analyzers/smartcn/pom.xml.template Thu Jan 14 19:05:12 2010
@@ -1,35 +1,35 @@
-<project xmlns="http://maven.apache.org/POM/4.0.0"
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
- xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
-
- <!--
- Licensed to the Apache Software Foundation (ASF) under one
- or more contributor license agreements. See the NOTICE file
- distributed with this work for additional information
- regarding copyright ownership. The ASF licenses this file
- to you under the Apache License, Version 2.0 (the
- "License"); you may not use this file except in compliance
- with the License. You may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing,
- software distributed under the License is distributed on an
- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- KIND, either express or implied. See the License for the
- specific language governing permissions and limitations
- under the License.
- -->
- <modelVersion>4.0.0</modelVersion>
- <parent>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-contrib</artifactId>
- <version>@version@</version>
- </parent>
- <groupId>org.apache.lucene</groupId>
- <artifactId>lucene-smartcn</artifactId>
- <name>Lucene Smart Chinese Analyzer</name>
- <version>@version@</version>
- <description>Smart Chinese Analyzer</description>
- <packaging>jar</packaging>
-</project>
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
+
+ <!--
+ Licensed to the Apache Software Foundation (ASF) under one
+ or more contributor license agreements. See the NOTICE file
+ distributed with this work for additional information
+ regarding copyright ownership. The ASF licenses this file
+ to you under the Apache License, Version 2.0 (the
+ "License"); you may not use this file except in compliance
+ with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+ Unless required by applicable law or agreed to in writing,
+ software distributed under the License is distributed on an
+ "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ KIND, either express or implied. See the License for the
+ specific language governing permissions and limitations
+ under the License.
+ -->
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-contrib</artifactId>
+ <version>@version@</version>
+ </parent>
+ <groupId>org.apache.lucene</groupId>
+ <artifactId>lucene-smartcn</artifactId>
+ <name>Lucene Smart Chinese Analyzer</name>
+ <version>@version@</version>
+ <description>Smart Chinese Analyzer</description>
+ <packaging>jar</packaging>
+</project>
Propchange: lucene/java/branches/flex_1458/contrib/analyzers/smartcn/pom.xml.template
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/branches/flex_1458/contrib/analyzers/smartcn/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/branches/flex_1458/contrib/ant/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/contrib/benchmark/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/CHANGES.txt?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/CHANGES.txt (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/CHANGES.txt Thu Jan 14 19:05:12 2010
@@ -4,6 +4,25 @@
$Id:$
+1/11/2010
+ LUCENE-2181: Add a benchmark for collation. This adds NewLocaleTask,
+ which sets a Locale in the run data for collation to use, and can be
+ used in the future for benchmarking localized range queries and sorts.
+ Also add NewCollationAnalyzerTask, which works with both JDK and ICU
+ Collator implementations. Fix ReadTokensTask to not tokenize fields
+ unless they should be tokenized according to DocMaker config. The
+ easiest way to run the benchmark is to run 'ant collation'
+ (Steven Rowe via Robert Muir)
+
+12/22/2009
+ LUCENE-2178: Allow multiple locations to add to the class path with
+ -Dbenchmark.ext.classpath=... when running "ant run-task" (Steven
+ Rowe via Mike McCandless)
+
+12/17/2009
+ LUCENE-2168: Allow negative relative thread priority for BG tasks
+ (Mike McCandless)
+
12/07/2009
LUCENE-2106: ReadTask does not close its Reader when
OpenReader/CloseReader are not used. (Mark Miller)
Modified: lucene/java/branches/flex_1458/contrib/benchmark/build.xml
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/build.xml?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/build.xml (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/build.xml Thu Jan 14 19:05:12 2010
@@ -24,7 +24,10 @@
<available file="temp/enwiki-20070527-pages-articles.xml.bz2" property="enwiki.exists"/>
<available file="temp/enwiki-20070527-pages-articles.xml" property="enwiki.expanded"/>
<available file="${working.dir}/enwiki.txt" property="enwiki.extracted"/>
-
+ <available file="temp/${top.100k.words.archive.filename}"
+ property="top.100k.words.archive.present"/>
+ <available file="${working.dir}/top100k-out"
+ property="top.100k.word.files.expanded"/>
</target>
<target name="enwiki-files" depends="check-files">
@@ -94,6 +97,27 @@
<untar src="temp/mini_newsgroups.tar" dest="${working.dir}"/>
</target>
+ <property name="top.100k.words.archive.filename"
+ value="top.100k.words.de.en.fr.uk.wikipedia.2009-11.tar.bz2"/>
+ <property name="top.100k.words.archive.base.url"
+ value="http://people.apache.org/~rmuir/wikipedia"/>
+ <target name="get-top-100k-words-archive" unless="top.100k.words.archive.present">
+ <mkdir dir="temp"/>
+ <get src="${top.100k.words.archive.base.url}/${top.100k.words.archive.filename}"
+ dest="temp/${top.100k.words.archive.filename}"/>
+ </target>
+ <target name="expand-top-100k-word-files" unless="top.100k.word.files.expanded">
+ <mkdir dir="${working.dir}/top100k-out"/>
+ <untar src="temp/${top.100k.words.archive.filename}"
+ overwrite="true" compression="bzip2" dest="${working.dir}/top100k-out"/>
+ </target>
+
+ <target name="top-100k-wiki-word-files" depends="check-files">
+ <mkdir dir="${working.dir}"/>
+ <antcall target="get-top-100k-words-archive"/>
+ <antcall target="expand-top-100k-word-files"/>
+ </target>
+
<target name="get-files" depends="check-files">
<mkdir dir="temp"/>
<antcall target="get-reuters"/>
@@ -104,6 +128,7 @@
<path id="classpath">
<pathelement path="${common.dir}/build/classes/java"/>
<pathelement path="${common.dir}/build/classes/demo"/>
+ <pathelement path="${common.dir}/build/classes/test"/>
<pathelement path="${common.dir}/build/contrib/highlighter/classes/java"/>
<pathelement path="${common.dir}/build/contrib/memory/classes/java"/>
<pathelement path="${common.dir}/build/contrib/fast-vector-highlighter/classes/java"/>
@@ -114,13 +139,13 @@
<path id="run.classpath">
<path refid="classpath"/>
<pathelement location="${build.dir}/classes/java"/>
- <pathelement location="${benchmark.ext.classpath}"/>
+ <pathelement path="${benchmark.ext.classpath}"/>
</path>
<property name="task.alg" location="conf/micro-standard.alg"/>
<property name="task.mem" value="140M"/>
- <target name="run-task" depends="compile,check-files,get-files"
+ <target name="run-task" depends="compile-test,check-files,get-files"
description="Run compound penalty perf test (optional: -Dtask.alg=your-algorithm-file -Dtask.mem=java-max-mem)">
<echo>Working Directory: ${working.dir}</echo>
<java classname="org.apache.lucene.benchmark.byTask.Benchmark" maxmemory="${task.mem}" fork="true">
@@ -140,6 +165,34 @@
</java>
</target>
+ <property name="collation.alg.file" location="conf/collation.alg"/>
+ <property name="collation.output.file"
+ value="${working.dir}/collation.benchmark.output.txt"/>
+ <property name="collation.jira.output.file"
+ value="${working.dir}/collation.bm2jira.output.txt"/>
+
+ <path id="collation.runtime.classpath">
+ <path refid="run.classpath"/>
+ <pathelement path="${common.dir}/build/contrib/icu/classes/java"/>
+ <fileset dir="${common.dir}/contrib/icu/lib" includes="icu4j*.jar"/>
+ </path>
+
+ <target name="collation" depends="compile,compile-icu,top-100k-wiki-word-files">
+ <echo>Running contrib/benchmark with alg file: ${collation.alg.file}</echo>
+ <java fork="true" classname="org.apache.lucene.benchmark.byTask.Benchmark"
+ maxmemory="${task.mem}" output="${collation.output.file}">
+ <classpath refid="collation.runtime.classpath"/>
+ <arg file="${collation.alg.file}"/>
+ </java>
+ <echo>Benchmark output is in file: ${collation.output.file}</echo>
+ <echo>Converting to JIRA table format...</echo>
+ <exec executable="perl" output="${collation.jira.output.file}" failonerror="true">
+ <arg value="scripts/collation.bm2jira.pl"/>
+ <arg value="${collation.output.file}"/>
+ </exec>
+ <echo>Benchmark output in JIRA table format is in file: ${collation.jira.output.file}</echo>
+ </target>
+
<target name="compile-demo">
<subant target="compile-demo">
<fileset dir="${common.dir}" includes="build.xml"/>
@@ -150,6 +203,11 @@
<fileset dir="${common.dir}/contrib/highlighter" includes="build.xml"/>
</subant>
</target>
+ <target name="compile-icu">
+ <subant target="compile">
+ <fileset dir="${common.dir}/contrib/icu" includes="build.xml"/>
+ </subant>
+ </target>
<target name="compile-memory">
<subant target="compile">
<fileset dir="${common.dir}/contrib/memory" includes="build.xml"/>
Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/PerfRunData.java Thu Jan 14 19:05:12 2010
@@ -20,6 +20,7 @@
import java.io.File;
import java.io.IOException;
import java.util.HashMap;
+import java.util.Locale;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
@@ -61,6 +62,7 @@
private Directory directory;
private Analyzer analyzer;
private DocMaker docMaker;
+ private Locale locale;
// we use separate (identical) instances for each "read" task type, so each can iterate the quries separately.
private HashMap<Class<? extends ReadTask>,QueryMaker> readTaskQueryMaker;
@@ -245,6 +247,20 @@
}
/**
+ * @return the locale
+ */
+ public Locale getLocale() {
+ return locale;
+ }
+
+ /**
+ * @param locale the locale to set
+ */
+ public void setLocale(Locale locale) {
+ this.locale = locale;
+ }
+
+ /**
* @return Returns the config.
*/
public Config getConfig() {
Propchange: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/feeds/EnwikiQueryMaker.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/CreateIndexTask.java Thu Jan 14 19:05:12 2010
@@ -22,6 +22,7 @@
import org.apache.lucene.index.IndexDeletionPolicy;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.MergeScheduler;
+import org.apache.lucene.index.ConcurrentMergeScheduler;
import org.apache.lucene.index.MergePolicy;
import java.io.BufferedOutputStream;
@@ -33,9 +34,15 @@
/**
* Create an index. <br>
* Other side effects: index writer object in perfRunData is set. <br>
- * Relevant properties: <code>merge.factor, max.buffered,
- * max.field.length, ram.flush.mb [default 0],
- * [default true]</code>.
+ * Relevant properties: <code>merge.factor (default 10),
+ * max.buffered (default no flush), max.field.length (default
+ * 10,000 tokens), max.field.length, compound (default true), ram.flush.mb [default 0],
+ * merge.policy (default org.apache.lucene.index.LogByteSizeMergePolicy),
+ * merge.scheduler (default
+ * org.apache.lucene.index.ConcurrentMergeScheduler),
+ * concurrent.merge.scheduler.max.thread.count and
+ * concurrent.merge.scheduler.max.merge.count (defaults per
+ * ConcurrentMergeScheduler) </code>.
* <p>
* This task also supports a "writer.info.stream" property with the following
* values:
@@ -66,6 +73,18 @@
throw new RuntimeException("unable to instantiate class '" + mergeScheduler + "' as merge scheduler", e);
}
+ if (mergeScheduler.equals("org.apache.lucene.index.ConcurrentMergeScheduler")) {
+ ConcurrentMergeScheduler cms = (ConcurrentMergeScheduler) writer.getMergeScheduler();
+ int v = config.get("concurrent.merge.scheduler.max.thread.count", -1);
+ if (v != -1) {
+ cms.setMaxThreadCount(v);
+ }
+ v = config.get("concurrent.merge.scheduler.max.merge.count", -1);
+ if (v != -1) {
+ cms.setMaxMergeCount(v);
+ }
+ }
+
final String mergePolicy = config.get("merge.policy",
"org.apache.lucene.index.LogByteSizeMergePolicy");
try {
Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/ReadTokensTask.java Thu Jan 14 19:05:12 2010
@@ -67,6 +67,8 @@
Analyzer analyzer = getRunData().getAnalyzer();
int tokenCount = 0;
for(final Fieldable field : fields) {
+ if (!field.isTokenized()) continue;
+
final TokenStream stream;
final TokenStream streamValue = field.tokenStreamValue();
Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/tasks/TaskSequence.java Thu Jan 14 19:05:12 2010
@@ -156,6 +156,7 @@
return count;
}
+ @Override
public void run() {
try {
count = task.runAndMaybeStats(letChildReport);
@@ -188,7 +189,7 @@
bgTasks = new ArrayList<RunBackgroundTask>();
}
RunBackgroundTask bgTask = new RunBackgroundTask(task, letChildReport);
- bgTask.setPriority(getBackgroundDeltaPriority() + Thread.currentThread().getPriority());
+ bgTask.setPriority(task.getBackgroundDeltaPriority() + Thread.currentThread().getPriority());
bgTask.start();
bgTasks.add(bgTask);
} else {
Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/java/org/apache/lucene/benchmark/byTask/utils/Algorithm.java Thu Jan 14 19:05:12 2010
@@ -51,7 +51,6 @@
stok.ordinaryChar('/');
stok.ordinaryChar('(');
stok.ordinaryChar(')');
- stok.ordinaryChar('-');
boolean colonOk = false;
boolean isDisableCountNextTask = false; // only for primitive tasks
currSequence.setDepth(0);
Propchange: lucene/java/branches/flex_1458/contrib/benchmark/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/TestPerfTasksLogic.java Thu Jan 14 19:05:12 2010
@@ -21,13 +21,20 @@
import java.io.File;
import java.io.FileReader;
import java.io.BufferedReader;
+import java.text.Collator;
import java.util.List;
import java.util.Iterator;
+import java.util.Locale;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.tokenattributes.TermAttribute;
+import org.apache.lucene.benchmark.byTask.feeds.DocMaker;
import org.apache.lucene.benchmark.byTask.feeds.ReutersQueryMaker;
import org.apache.lucene.benchmark.byTask.tasks.CountingSearchTestTask;
import org.apache.lucene.benchmark.byTask.tasks.CountingHighlighterTestTask;
import org.apache.lucene.benchmark.byTask.stats.TaskStats;
+import org.apache.lucene.collation.CollationKeyAnalyzer;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.TermsEnum;
@@ -120,6 +127,8 @@
assertTrue("elapsed time was " + elapsed + " msec", elapsed <= 1500);
}
+ // disabled until we fix BG thread prio -- this test
+ // causes build to hang
public void testBGSearchTaskThreads() throws Exception {
String algLines[] = {
"log.time.step.msec = 100",
@@ -853,6 +862,119 @@
};
}
+ /**
+ * Test that we can change the Locale in the runData,
+ * that it is parsed as we expect.
+ */
+ public void testLocale() throws Exception {
+ // empty Locale: clear it (null)
+ Benchmark benchmark = execBenchmark(getLocaleConfig(""));
+ assertNull(benchmark.getRunData().getLocale());
+
+ // ROOT locale
+ benchmark = execBenchmark(getLocaleConfig("ROOT"));
+ assertEquals(new Locale(""), benchmark.getRunData().getLocale());
+
+ // specify just a language
+ benchmark = execBenchmark(getLocaleConfig("de"));
+ assertEquals(new Locale("de"), benchmark.getRunData().getLocale());
+
+ // specify language + country
+ benchmark = execBenchmark(getLocaleConfig("en,US"));
+ assertEquals(new Locale("en", "US"), benchmark.getRunData().getLocale());
+
+ // specify language + country + variant
+ benchmark = execBenchmark(getLocaleConfig("no,NO,NY"));
+ assertEquals(new Locale("no", "NO", "NY"), benchmark.getRunData().getLocale());
+ }
+
+ private static String[] getLocaleConfig(String localeParam) {
+ String algLines[] = {
+ "# ----- properties ",
+ "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+ "docs.file=" + getReuters20LinesFile(),
+ "content.source.log.step=3",
+ "content.source.forever=false",
+ "directory=RAMDirectory",
+ "# ----- alg ",
+ "{ \"Rounds\"",
+ " ResetSystemErase",
+ " NewLocale(" + localeParam + ")",
+ " CreateIndex",
+ " { \"AddDocs\" AddDoc > : * ",
+ " NewRound",
+ "} : 1",
+ };
+ return algLines;
+ }
+
+ /**
+ * Test that we can create CollationAnalyzers.
+ */
+ public void testCollator() throws Exception {
+ // ROOT locale
+ Benchmark benchmark = execBenchmark(getCollatorConfig("ROOT", "impl:jdk"));
+ CollationKeyAnalyzer expected = new CollationKeyAnalyzer(Collator
+ .getInstance(new Locale("")));
+ assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
+
+ // specify just a language
+ benchmark = execBenchmark(getCollatorConfig("de", "impl:jdk"));
+ expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("de")));
+ assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
+
+ // specify language + country
+ benchmark = execBenchmark(getCollatorConfig("en,US", "impl:jdk"));
+ expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("en",
+ "US")));
+ assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
+
+ // specify language + country + variant
+ benchmark = execBenchmark(getCollatorConfig("no,NO,NY", "impl:jdk"));
+ expected = new CollationKeyAnalyzer(Collator.getInstance(new Locale("no",
+ "NO", "NY")));
+ assertEqualCollation(expected, benchmark.getRunData().getAnalyzer(), "foobar");
+ }
+
+ private void assertEqualCollation(Analyzer a1, Analyzer a2, String text)
+ throws Exception {
+ TokenStream ts1 = a1.tokenStream("bogus", new StringReader(text));
+ TokenStream ts2 = a2.tokenStream("bogus", new StringReader(text));
+ ts1.reset();
+ ts2.reset();
+ TermAttribute termAtt1 = ts1.addAttribute(TermAttribute.class);
+ TermAttribute termAtt2 = ts2.addAttribute(TermAttribute.class);
+ assertTrue(ts1.incrementToken());
+ assertTrue(ts2.incrementToken());
+ assertEquals(termAtt1.term(), termAtt2.term());
+ assertFalse(ts1.incrementToken());
+ assertFalse(ts2.incrementToken());
+ ts1.close();
+ ts2.close();
+ }
+
+ private static String[] getCollatorConfig(String localeParam,
+ String collationParam) {
+ String algLines[] = {
+ "# ----- properties ",
+ "content.source=org.apache.lucene.benchmark.byTask.feeds.LineDocSource",
+ "docs.file=" + getReuters20LinesFile(),
+ "content.source.log.step=3",
+ "content.source.forever=false",
+ "directory=RAMDirectory",
+ "# ----- alg ",
+ "{ \"Rounds\"",
+ " ResetSystemErase",
+ " NewLocale(" + localeParam + ")",
+ " NewCollationAnalyzer(" + collationParam + ")",
+ " CreateIndex",
+ " { \"AddDocs\" AddDoc > : * ",
+ " NewRound",
+ "} : 1",
+ };
+ return algLines;
+ }
+
private static String getReuters20LinesFile() {
return System.getProperty("lucene.common.dir").replace('\\','/') +
"/contrib/benchmark/src/test/org/apache/lucene/benchmark/reuters.first20.lines.txt";
Modified: lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/PerfTaskTest.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/PerfTaskTest.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/PerfTaskTest.java (original)
+++ lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/PerfTaskTest.java Thu Jan 14 19:05:12 2010
@@ -1,73 +1,73 @@
-package org.apache.lucene.benchmark.byTask.tasks;
-
-/**
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements. See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-import java.util.Properties;
-
-import org.apache.lucene.benchmark.BenchmarkTestCase;
-import org.apache.lucene.benchmark.byTask.PerfRunData;
-import org.apache.lucene.benchmark.byTask.utils.Config;
-
-/** Tests the functionality of the abstract {@link PerfTask}. */
-public class PerfTaskTest extends BenchmarkTestCase {
-
- private static final class MyPerfTask extends PerfTask {
-
- public MyPerfTask(PerfRunData runData) {
- super(runData);
- }
-
- @Override
- public int doLogic() throws Exception {
- return 0;
- }
-
- public int getLogStep() { return logStep; }
-
- }
-
- private PerfRunData createPerfRunData(boolean setLogStep, int logStepVal,
- boolean setTaskLogStep, int taskLogStepVal) throws Exception {
- Properties props = new Properties();
- if (setLogStep) {
- props.setProperty("log.step", Integer.toString(logStepVal));
- }
- if (setTaskLogStep) {
- props.setProperty("log.step.MyPerf", Integer.toString(taskLogStepVal));
- }
- props.setProperty("directory", "RAMDirectory"); // no accidental FS dir.
- Config config = new Config(props);
- return new PerfRunData(config);
- }
-
- private void doLogStepTest(boolean setLogStep, int logStepVal,
- boolean setTaskLogStep, int taskLogStepVal, int expLogStepValue) throws Exception {
- PerfRunData runData = createPerfRunData(setLogStep, logStepVal, setTaskLogStep, taskLogStepVal);
- MyPerfTask mpt = new MyPerfTask(runData);
- assertEquals(expLogStepValue, mpt.getLogStep());
- }
-
- public void testLogStep() throws Exception {
- doLogStepTest(false, -1, false, -1, PerfTask.DEFAULT_LOG_STEP);
- doLogStepTest(true, -1, false, -1, Integer.MAX_VALUE);
- doLogStepTest(true, 100, false, -1, 100);
- doLogStepTest(false, -1, true, -1, Integer.MAX_VALUE);
- doLogStepTest(false, -1, true, 100, 100);
- }
-
-}
+package org.apache.lucene.benchmark.byTask.tasks;
+
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+import java.util.Properties;
+
+import org.apache.lucene.benchmark.BenchmarkTestCase;
+import org.apache.lucene.benchmark.byTask.PerfRunData;
+import org.apache.lucene.benchmark.byTask.utils.Config;
+
+/** Tests the functionality of the abstract {@link PerfTask}. */
+public class PerfTaskTest extends BenchmarkTestCase {
+
+ private static final class MyPerfTask extends PerfTask {
+
+ public MyPerfTask(PerfRunData runData) {
+ super(runData);
+ }
+
+ @Override
+ public int doLogic() throws Exception {
+ return 0;
+ }
+
+ public int getLogStep() { return logStep; }
+
+ }
+
+ private PerfRunData createPerfRunData(boolean setLogStep, int logStepVal,
+ boolean setTaskLogStep, int taskLogStepVal) throws Exception {
+ Properties props = new Properties();
+ if (setLogStep) {
+ props.setProperty("log.step", Integer.toString(logStepVal));
+ }
+ if (setTaskLogStep) {
+ props.setProperty("log.step.MyPerf", Integer.toString(taskLogStepVal));
+ }
+ props.setProperty("directory", "RAMDirectory"); // no accidental FS dir.
+ Config config = new Config(props);
+ return new PerfRunData(config);
+ }
+
+ private void doLogStepTest(boolean setLogStep, int logStepVal,
+ boolean setTaskLogStep, int taskLogStepVal, int expLogStepValue) throws Exception {
+ PerfRunData runData = createPerfRunData(setLogStep, logStepVal, setTaskLogStep, taskLogStepVal);
+ MyPerfTask mpt = new MyPerfTask(runData);
+ assertEquals(expLogStepValue, mpt.getLogStep());
+ }
+
+ public void testLogStep() throws Exception {
+ doLogStepTest(false, -1, false, -1, PerfTask.DEFAULT_LOG_STEP);
+ doLogStepTest(true, -1, false, -1, Integer.MAX_VALUE);
+ doLogStepTest(true, 100, false, -1, 100);
+ doLogStepTest(false, -1, true, -1, Integer.MAX_VALUE);
+ doLogStepTest(false, -1, true, 100, 100);
+ }
+
+}
Propchange: lucene/java/branches/flex_1458/contrib/benchmark/src/test/org/apache/lucene/benchmark/byTask/tasks/PerfTaskTest.java
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/branches/flex_1458/contrib/db/bdb-je/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/branches/flex_1458/contrib/db/bdb/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/pom.xml.template
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java (original)
+++ lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldFragList.java Thu Jan 14 19:05:12 2010
@@ -70,6 +70,22 @@
}
}
+ public List<SubInfo> getSubInfos(){
+ return subInfos;
+ }
+
+ public float getTotalBoost(){
+ return totalBoost;
+ }
+
+ public int getStartOffset(){
+ return startOffset;
+ }
+
+ public int getEndOffset(){
+ return endOffset;
+ }
+
@Override
public String toString(){
StringBuilder sb = new StringBuilder();
@@ -80,17 +96,26 @@
return sb.toString();
}
- static class SubInfo {
+ public static class SubInfo {
final String text; // unnecessary member, just exists for debugging purpose
final List<Toffs> termsOffsets; // usually termsOffsets.size() == 1,
// but if position-gap > 1 and slop > 0 then size() could be greater than 1
int seqnum;
+
SubInfo( String text, List<Toffs> termsOffsets, int seqnum ){
this.text = text;
this.termsOffsets = termsOffsets;
this.seqnum = seqnum;
}
+ public List<Toffs> getTermsOffsets(){
+ return termsOffsets;
+ }
+
+ public int getSeqnum(){
+ return seqnum;
+ }
+
@Override
public String toString(){
StringBuilder sb = new StringBuilder();
Modified: lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java (original)
+++ lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/org/apache/lucene/search/vectorhighlight/FieldPhraseList.java Thu Jan 14 19:05:12 2010
@@ -171,9 +171,15 @@
this.startOffset = startOffset;
this.endOffset = endOffset;
}
- void setEndOffset( int endOffset ){
+ public void setEndOffset( int endOffset ){
this.endOffset = endOffset;
}
+ public int getStartOffset(){
+ return startOffset;
+ }
+ public int getEndOffset(){
+ return endOffset;
+ }
@Override
public String toString(){
StringBuilder sb = new StringBuilder();
Propchange: lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java (original)
+++ lucene/java/branches/flex_1458/contrib/fast-vector-highlighter/src/test/org/apache/lucene/search/vectorhighlight/AbstractTestCase.java Thu Jan 14 19:05:12 2010
@@ -268,6 +268,21 @@
protected boolean isDelimiter( int c ){
return delimiters.indexOf( c ) >= 0;
}
+
+ public void reset( Reader input ) throws IOException {
+ super.reset( input );
+ reset();
+ }
+
+ public void reset() throws IOException {
+ startTerm = 0;
+ nextStartOffset = 0;
+ snippet = null;
+ snippetBuffer.setLength( 0 );
+ charBufferIndex = BUFFER_SIZE;
+ charBufferLen = 0;
+ ch = 0;
+ }
}
protected void make1d1fIndex( String value ) throws Exception {
Propchange: lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/QueryTermScorer.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TextFragment.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TextFragment.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TextFragment.java (original)
+++ lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TextFragment.java Thu Jan 14 19:05:12 2010
@@ -41,6 +41,7 @@
* @deprecated Use {@link #TextFragment(CharSequence, int, int)} instead.
* This constructor will be removed in Lucene 4.0
*/
+ @Deprecated
public TextFragment(StringBuffer markedUpText,int textStartPos, int fragNum)
{
this.markedUpText=markedUpText;
Modified: lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java (original)
+++ lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/TokenSources.java Thu Jan 14 19:05:12 2010
@@ -38,234 +38,248 @@
import org.apache.lucene.index.TermVectorOffsetInfo;
/**
- * Hides implementation issues associated with obtaining a TokenStream for use with
- * the higlighter - can obtain from TermFreqVectors with offsets and (optionally) positions or
- * from Analyzer class reparsing the stored content.
+ * Hides implementation issues associated with obtaining a TokenStream for use
+ * with the higlighter - can obtain from TermFreqVectors with offsets and
+ * (optionally) positions or from Analyzer class reparsing the stored content.
*/
-public class TokenSources
-{
+public class TokenSources {
/**
- * A convenience method that tries to first get a TermPositionVector for the specified docId, then, falls back to
- * using the passed in {@link org.apache.lucene.document.Document} to retrieve the TokenStream. This is useful when
- * you already have the document, but would prefer to use the vector first.
- * @param reader The {@link org.apache.lucene.index.IndexReader} to use to try and get the vector from
+ * A convenience method that tries to first get a TermPositionVector for the
+ * specified docId, then, falls back to using the passed in
+ * {@link org.apache.lucene.document.Document} to retrieve the TokenStream.
+ * This is useful when you already have the document, but would prefer to use
+ * the vector first.
+ *
+ * @param reader The {@link org.apache.lucene.index.IndexReader} to use to try
+ * and get the vector from
* @param docId The docId to retrieve.
* @param field The field to retrieve on the document
* @param doc The document to fall back on
- * @param analyzer The analyzer to use for creating the TokenStream if the vector doesn't exist
- * @return The {@link org.apache.lucene.analysis.TokenStream} for the {@link org.apache.lucene.document.Fieldable} on the {@link org.apache.lucene.document.Document}
+ * @param analyzer The analyzer to use for creating the TokenStream if the
+ * vector doesn't exist
+ * @return The {@link org.apache.lucene.analysis.TokenStream} for the
+ * {@link org.apache.lucene.document.Fieldable} on the
+ * {@link org.apache.lucene.document.Document}
* @throws IOException if there was an error loading
*/
- public static TokenStream getAnyTokenStream(IndexReader reader, int docId, String field, Document doc, Analyzer analyzer) throws IOException{
- TokenStream ts=null;
+ public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
+ String field, Document doc, Analyzer analyzer) throws IOException {
+ TokenStream ts = null;
+
+ TermFreqVector tfv = reader.getTermFreqVector(docId, field);
+ if (tfv != null) {
+ if (tfv instanceof TermPositionVector) {
+ ts = getTokenStream((TermPositionVector) tfv);
+ }
+ }
+ // No token info stored so fall back to analyzing raw content
+ if (ts == null) {
+ ts = getTokenStream(doc, field, analyzer);
+ }
+ return ts;
+ }
+
+ /**
+ * A convenience method that tries a number of approaches to getting a token
+ * stream. The cost of finding there are no termVectors in the index is
+ * minimal (1000 invocations still registers 0 ms). So this "lazy" (flexible?)
+ * approach to coding is probably acceptable
+ *
+ * @param reader
+ * @param docId
+ * @param field
+ * @param analyzer
+ * @return null if field not stored correctly
+ * @throws IOException
+ */
+ public static TokenStream getAnyTokenStream(IndexReader reader, int docId,
+ String field, Analyzer analyzer) throws IOException {
+ TokenStream ts = null;
+
+ TermFreqVector tfv = reader.getTermFreqVector(docId, field);
+ if (tfv != null) {
+ if (tfv instanceof TermPositionVector) {
+ ts = getTokenStream((TermPositionVector) tfv);
+ }
+ }
+ // No token info stored so fall back to analyzing raw content
+ if (ts == null) {
+ ts = getTokenStream(reader, docId, field, analyzer);
+ }
+ return ts;
+ }
+
+ public static TokenStream getTokenStream(TermPositionVector tpv) {
+ // assumes the worst and makes no assumptions about token position
+ // sequences.
+ return getTokenStream(tpv, false);
+ }
+
+ /**
+ * Low level api. Returns a token stream or null if no offset info available
+ * in index. This can be used to feed the highlighter with a pre-parsed token
+ * stream
+ *
+ * In my tests the speeds to recreate 1000 token streams using this method
+ * are: - with TermVector offset only data stored - 420 milliseconds - with
+ * TermVector offset AND position data stored - 271 milliseconds (nb timings
+ * for TermVector with position data are based on a tokenizer with contiguous
+ * positions - no overlaps or gaps) The cost of not using TermPositionVector
+ * to store pre-parsed content and using an analyzer to re-parse the original
+ * content: - reanalyzing the original content - 980 milliseconds
+ *
+ * The re-analyze timings will typically vary depending on - 1) The complexity
+ * of the analyzer code (timings above were using a
+ * stemmer/lowercaser/stopword combo) 2) The number of other fields (Lucene
+ * reads ALL fields off the disk when accessing just one document field - can
+ * cost dear!) 3) Use of compression on field storage - could be faster due to
+ * compression (less disk IO) or slower (more CPU burn) depending on the
+ * content.
+ *
+ * @param tpv
+ * @param tokenPositionsGuaranteedContiguous true if the token position
+ * numbers have no overlaps or gaps. If looking to eek out the last
+ * drops of performance, set to true. If in doubt, set to false.
+ */
+ public static TokenStream getTokenStream(TermPositionVector tpv,
+ boolean tokenPositionsGuaranteedContiguous) {
+ if (!tokenPositionsGuaranteedContiguous && tpv.getTermPositions(0) != null) {
+ return new TokenStreamFromTermPositionVector(tpv);
+ }
- TermFreqVector tfv = reader.getTermFreqVector(docId,field);
- if(tfv!=null)
- {
- if(tfv instanceof TermPositionVector)
- {
- ts=getTokenStream((TermPositionVector) tfv);
- }
- }
- //No token info stored so fall back to analyzing raw content
- if(ts==null)
- {
- ts=getTokenStream(doc,field,analyzer);
- }
- return ts;
- }
- /**
- * A convenience method that tries a number of approaches to getting a token stream.
- * The cost of finding there are no termVectors in the index is minimal (1000 invocations still
- * registers 0 ms). So this "lazy" (flexible?) approach to coding is probably acceptable
- * @param reader
- * @param docId
- * @param field
- * @param analyzer
- * @return null if field not stored correctly
- * @throws IOException
- */
- public static TokenStream getAnyTokenStream(IndexReader reader,int docId, String field,Analyzer analyzer) throws IOException
- {
- TokenStream ts=null;
-
- TermFreqVector tfv = reader.getTermFreqVector(docId,field);
- if(tfv!=null)
- {
- if(tfv instanceof TermPositionVector)
- {
- ts=getTokenStream((TermPositionVector) tfv);
- }
- }
- //No token info stored so fall back to analyzing raw content
- if(ts==null)
- {
- ts=getTokenStream(reader,docId,field,analyzer);
- }
- return ts;
- }
-
-
- public static TokenStream getTokenStream(TermPositionVector tpv)
- {
- //assumes the worst and makes no assumptions about token position sequences.
- return getTokenStream(tpv,false);
- }
- /**
- * Low level api.
- * Returns a token stream or null if no offset info available in index.
- * This can be used to feed the highlighter with a pre-parsed token stream
- *
- * In my tests the speeds to recreate 1000 token streams using this method are:
- * - with TermVector offset only data stored - 420 milliseconds
- * - with TermVector offset AND position data stored - 271 milliseconds
- * (nb timings for TermVector with position data are based on a tokenizer with contiguous
- * positions - no overlaps or gaps)
- * The cost of not using TermPositionVector to store
- * pre-parsed content and using an analyzer to re-parse the original content:
- * - reanalyzing the original content - 980 milliseconds
- *
- * The re-analyze timings will typically vary depending on -
- * 1) The complexity of the analyzer code (timings above were using a
- * stemmer/lowercaser/stopword combo)
- * 2) The number of other fields (Lucene reads ALL fields off the disk
- * when accessing just one document field - can cost dear!)
- * 3) Use of compression on field storage - could be faster due to compression (less disk IO)
- * or slower (more CPU burn) depending on the content.
- *
- * @param tpv
- * @param tokenPositionsGuaranteedContiguous true if the token position numbers have no overlaps or gaps. If looking
- * to eek out the last drops of performance, set to true. If in doubt, set to false.
- */
- public static TokenStream getTokenStream(TermPositionVector tpv, boolean tokenPositionsGuaranteedContiguous) {
- //an object used to iterate across an array of tokens
- class StoredTokenStream extends TokenStream {
- Token tokens[];
- int currentToken = 0;
- TermAttribute termAtt;
- OffsetAttribute offsetAtt;
-
- StoredTokenStream(Token tokens[]) {
- this.tokens = tokens;
- termAtt = addAttribute(TermAttribute.class);
- offsetAtt = addAttribute(OffsetAttribute.class);
- }
-
- @Override
- public boolean incrementToken() throws IOException {
- if (currentToken >= tokens.length) {
- return false;
- }
- Token token = tokens[currentToken++];
- termAtt.setTermBuffer(token.term());
- offsetAtt.setOffset(token.startOffset(), token.endOffset());
- return true;
- }
- }
- //code to reconstruct the original sequence of Tokens
- String[] terms=tpv.getTerms();
- int[] freq=tpv.getTermFrequencies();
- int totalTokens=0;
-
- for (int t = 0; t < freq.length; t++)
- {
- totalTokens+=freq[t];
+ // an object used to iterate across an array of tokens
+ class StoredTokenStream extends TokenStream {
+ Token tokens[];
+
+ int currentToken = 0;
+
+ TermAttribute termAtt;
+
+ OffsetAttribute offsetAtt;
+
+ StoredTokenStream(Token tokens[]) {
+ this.tokens = tokens;
+ termAtt = addAttribute(TermAttribute.class);
+ offsetAtt = addAttribute(OffsetAttribute.class);
+ }
+
+ @Override
+ public boolean incrementToken() throws IOException {
+ if (currentToken >= tokens.length) {
+ return false;
}
- Token tokensInOriginalOrder[]=new Token[totalTokens];
- ArrayList<Token> unsortedTokens = null;
- for (int t = 0; t < freq.length; t++)
- {
- TermVectorOffsetInfo[] offsets=tpv.getOffsets(t);
- if(offsets==null)
- {
- return null;
- }
-
- int[] pos=null;
- if(tokenPositionsGuaranteedContiguous)
- {
- //try get the token position info to speed up assembly of tokens into sorted sequence
- pos=tpv.getTermPositions(t);
- }
- if(pos==null)
- {
- //tokens NOT stored with positions or not guaranteed contiguous - must add to list and sort later
- if(unsortedTokens==null)
- {
- unsortedTokens=new ArrayList<Token>();
- }
- for (int tp = 0; tp < offsets.length; tp++)
- {
- Token token = new Token(offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
- token.setTermBuffer(terms[t]);
- unsortedTokens.add(token);
- }
- }
- else
- {
- //We have positions stored and a guarantee that the token position information is contiguous
-
- // This may be fast BUT wont work if Tokenizers used which create >1 token in same position or
- // creates jumps in position numbers - this code would fail under those circumstances
-
- //tokens stored with positions - can use this to index straight into sorted array
- for (int tp = 0; tp < pos.length; tp++)
- {
- Token token = new Token(terms[t], offsets[tp].getStartOffset(), offsets[tp].getEndOffset());
- tokensInOriginalOrder[pos[tp]] = token;
- }
- }
+ Token token = tokens[currentToken++];
+ termAtt.setTermBuffer(token.term());
+ offsetAtt.setOffset(token.startOffset(), token.endOffset());
+ return true;
+ }
+ }
+ // code to reconstruct the original sequence of Tokens
+ String[] terms = tpv.getTerms();
+ int[] freq = tpv.getTermFrequencies();
+ int totalTokens = 0;
+
+ for (int t = 0; t < freq.length; t++) {
+ totalTokens += freq[t];
+ }
+ Token tokensInOriginalOrder[] = new Token[totalTokens];
+ ArrayList<Token> unsortedTokens = null;
+ for (int t = 0; t < freq.length; t++) {
+ TermVectorOffsetInfo[] offsets = tpv.getOffsets(t);
+ if (offsets == null) {
+ throw new IllegalArgumentException("Required TermVector Offset information was not found");
+ }
+
+ int[] pos = null;
+ if (tokenPositionsGuaranteedContiguous) {
+ // try get the token position info to speed up assembly of tokens into
+ // sorted sequence
+ pos = tpv.getTermPositions(t);
+ }
+ if (pos == null) {
+ // tokens NOT stored with positions or not guaranteed contiguous - must
+ // add to list and sort later
+ if (unsortedTokens == null) {
+ unsortedTokens = new ArrayList<Token>();
+ }
+ for (int tp = 0; tp < offsets.length; tp++) {
+ Token token = new Token(offsets[tp].getStartOffset(), offsets[tp]
+ .getEndOffset());
+ token.setTermBuffer(terms[t]);
+ unsortedTokens.add(token);
+ }
+ } else {
+ // We have positions stored and a guarantee that the token position
+ // information is contiguous
+
+ // This may be fast BUT wont work if Tokenizers used which create >1
+ // token in same position or
+ // creates jumps in position numbers - this code would fail under those
+ // circumstances
+
+ // tokens stored with positions - can use this to index straight into
+ // sorted array
+ for (int tp = 0; tp < pos.length; tp++) {
+ Token token = new Token(terms[t], offsets[tp].getStartOffset(),
+ offsets[tp].getEndOffset());
+ tokensInOriginalOrder[pos[tp]] = token;
}
- //If the field has been stored without position data we must perform a sort
- if(unsortedTokens!=null) {
- tokensInOriginalOrder= unsortedTokens.toArray(new Token[unsortedTokens.size()]);
- Arrays.sort(tokensInOriginalOrder, new Comparator<Token>(){
- public int compare(Token t1, Token t2) {
- if(t1.startOffset()>t2.endOffset())
- return 1;
- if(t1.startOffset()<t2.startOffset())
- return -1;
- return 0;
- }});
+ }
+ }
+ // If the field has been stored without position data we must perform a sort
+ if (unsortedTokens != null) {
+ tokensInOriginalOrder = unsortedTokens.toArray(new Token[unsortedTokens
+ .size()]);
+ Arrays.sort(tokensInOriginalOrder, new Comparator<Token>() {
+ public int compare(Token t1, Token t2) {
+ if (t1.startOffset() > t2.endOffset())
+ return 1;
+ if (t1.startOffset() < t2.startOffset())
+ return -1;
+ return 0;
}
- return new StoredTokenStream(tokensInOriginalOrder);
+ });
+ }
+ return new StoredTokenStream(tokensInOriginalOrder);
+ }
+
+ public static TokenStream getTokenStream(IndexReader reader, int docId,
+ String field) throws IOException {
+ TermFreqVector tfv = reader.getTermFreqVector(docId, field);
+ if (tfv == null) {
+ throw new IllegalArgumentException(field + " in doc #" + docId
+ + "does not have any term position data stored");
}
+ if (tfv instanceof TermPositionVector) {
+ TermPositionVector tpv = (TermPositionVector) reader.getTermFreqVector(
+ docId, field);
+ return getTokenStream(tpv);
+ }
+ throw new IllegalArgumentException(field + " in doc #" + docId
+ + "does not have any term position data stored");
+ }
+
+ // convenience method
+ public static TokenStream getTokenStream(IndexReader reader, int docId,
+ String field, Analyzer analyzer) throws IOException {
+ Document doc = reader.document(docId);
+ return getTokenStream(doc, field, analyzer);
+ }
+
+ public static TokenStream getTokenStream(Document doc, String field,
+ Analyzer analyzer) {
+ String contents = doc.get(field);
+ if (contents == null) {
+ throw new IllegalArgumentException("Field " + field
+ + " in document is not stored and cannot be analyzed");
+ }
+ return getTokenStream(field, contents, analyzer);
+ }
- public static TokenStream getTokenStream(IndexReader reader,int docId, String field) throws IOException
- {
- TermFreqVector tfv = reader.getTermFreqVector(docId,field);
- if(tfv==null)
- {
- throw new IllegalArgumentException(field+" in doc #"+docId
- +"does not have any term position data stored");
- }
- if(tfv instanceof TermPositionVector)
- {
- TermPositionVector tpv=(TermPositionVector) reader.getTermFreqVector(docId,field);
- return getTokenStream(tpv);
- }
- throw new IllegalArgumentException(field+" in doc #"+docId
- +"does not have any term position data stored");
- }
-
- //convenience method
- public static TokenStream getTokenStream(IndexReader reader,int docId, String field,Analyzer analyzer) throws IOException
- {
- Document doc=reader.document(docId);
- return getTokenStream(doc, field, analyzer);
- }
-
- public static TokenStream getTokenStream(Document doc, String field, Analyzer analyzer){
- String contents=doc.get(field);
- if(contents==null)
- {
- throw new IllegalArgumentException("Field "+field +" in document is not stored and cannot be analyzed");
- }
- return getTokenStream(field, contents, analyzer);
- }
- //convenience method
- public static TokenStream getTokenStream(String field, String contents, Analyzer analyzer){
- return analyzer.tokenStream(field,new StringReader(contents));
+ // convenience method
+ public static TokenStream getTokenStream(String field, String contents,
+ Analyzer analyzer) {
+ return analyzer.tokenStream(field, new StringReader(contents));
}
}
Modified: lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java (original)
+++ lucene/java/branches/flex_1458/contrib/highlighter/src/java/org/apache/lucene/search/highlight/WeightedSpanTerm.java Thu Jan 14 19:05:12 2010
@@ -53,7 +53,7 @@
* Checks to see if this term is valid at <code>position</code>.
*
* @param position
- * to check against valid term postions
+ * to check against valid term positions
* @return true iff this term is a hit at this position
*/
public boolean checkPosition(int position) {
Propchange: lucene/java/branches/flex_1458/contrib/highlighter/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/branches/flex_1458/contrib/highlighter/src/test/
------------------------------------------------------------------------------
--- svn:mergeinfo (original)
+++ svn:mergeinfo Thu Jan 14 19:05:12 2010
@@ -1,5 +1,5 @@
/lucene/java/branches/lucene_2_4/contrib/highlighter/src/test:748824
-/lucene/java/branches/lucene_2_9/contrib/highlighter/src/test:817269-818600,825998,826775,829134,829816,829881,831036
+/lucene/java/branches/lucene_2_9/contrib/highlighter/src/test:817269-818600,825998,826775,829134,829816,829881,831036,896850
/lucene/java/branches/lucene_2_9_back_compat_tests/contrib/highlighter/src/test:818601-821336
-/lucene/java/branches/lucene_3_0/contrib/highlighter/src/test:880793
-/lucene/java/trunk/contrib/highlighter/src/test:829439-833960,880727-886190,889185,889622,889667
+/lucene/java/branches/lucene_3_0/contrib/highlighter/src/test:880793,896906
+/lucene/java/trunk/contrib/highlighter/src/test:829439-833960,880727-886190,889185,889622,889667,889866-899001
Modified: lucene/java/branches/flex_1458/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java (original)
+++ lucene/java/branches/flex_1458/contrib/icu/src/java/org/apache/lucene/collation/ICUCollationKeyFilter.java Thu Jan 14 19:05:12 2010
@@ -23,13 +23,10 @@
import org.apache.lucene.analysis.TokenFilter;
import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.Token;
import org.apache.lucene.analysis.tokenattributes.TermAttribute;
import org.apache.lucene.util.IndexableBinaryStringTools;
import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.CharBuffer;
/**
@@ -92,15 +89,14 @@
char[] termBuffer = termAtt.termBuffer();
String termText = new String(termBuffer, 0, termAtt.termLength());
collator.getRawCollationKey(termText, reusableKey);
- ByteBuffer collationKeyBuf = ByteBuffer.wrap(reusableKey.bytes, 0, reusableKey.size);
- int encodedLength
- = IndexableBinaryStringTools.getEncodedLength(collationKeyBuf);
+ int encodedLength = IndexableBinaryStringTools.getEncodedLength(
+ reusableKey.bytes, 0, reusableKey.size);
if (encodedLength > termBuffer.length) {
termAtt.resizeTermBuffer(encodedLength);
}
termAtt.setTermLength(encodedLength);
- CharBuffer wrappedTermBuffer = CharBuffer.wrap(termAtt.termBuffer());
- IndexableBinaryStringTools.encode(collationKeyBuf, wrappedTermBuffer);
+ IndexableBinaryStringTools.encode(reusableKey.bytes, 0, reusableKey.size,
+ termAtt.termBuffer(), 0, encodedLength);
return true;
} else {
return false;
Modified: lucene/java/branches/flex_1458/contrib/icu/src/java/overview.html
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/icu/src/java/overview.html?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/icu/src/java/overview.html (original)
+++ lucene/java/branches/flex_1458/contrib/icu/src/java/overview.html Thu Jan 14 19:05:12 2010
@@ -34,7 +34,7 @@
<code>CollationKey</code>s. <code>icu4j-collation-4.0.jar</code>,
a trimmed-down version of <code>icu4j-4.0.jar</code> that contains only the
code and data needed to support collation, is included in Lucene's Subversion
- repository at <code>contrib/collation/lib/</code>.
+ repository at <code>contrib/icu/lib/</code>.
</p>
<h2>Use Cases</h2>
Modified: lucene/java/branches/flex_1458/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java (original)
+++ lucene/java/branches/flex_1458/contrib/instantiated/src/java/org/apache/lucene/store/instantiated/InstantiatedAllTermDocs.java Thu Jan 14 19:05:12 2010
@@ -28,6 +28,7 @@
this.reader = reader;
}
+ @Override
public boolean isDeleted(int doc) {
return reader.isDeleted(doc);
}
Propchange: lucene/java/branches/flex_1458/contrib/instantiated/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/branches/flex_1458/contrib/lucli/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/branches/flex_1458/contrib/memory/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/misc/ChainedFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/misc/ChainedFilter.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/misc/ChainedFilter.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/misc/ChainedFilter.java Thu Jan 14 19:05:12 2010
@@ -156,6 +156,7 @@
* switch to a different DocIdSet implementation yourself.
* This method will be removed in Lucene 4.0
**/
+ @Deprecated
protected final DocIdSet finalResult(OpenBitSetDISI result, int maxDocs) {
return result;
}
Propchange: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/complexPhrase/ComplexPhraseQueryParser.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtendableQueryParser.java Thu Jan 14 19:05:12 2010
@@ -133,7 +133,7 @@
final ParserExtension extension = this.extensions
.getExtension(splitExtensionField.cud);
if (extension != null) {
- return extension.parse(new ExtensionQuery(splitExtensionField.cur,
+ return extension.parse(new ExtensionQuery(this, splitExtensionField.cur,
queryText));
}
return super.getFieldQuery(field, queryText);
Modified: lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtensionQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtensionQuery.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtensionQuery.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/java/org/apache/lucene/queryParser/ext/ExtensionQuery.java Thu Jan 14 19:05:12 2010
@@ -1,5 +1,7 @@
package org.apache.lucene.queryParser.ext;
+import org.apache.lucene.queryParser.QueryParser;
+
/**
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
@@ -29,6 +31,7 @@
private final String field;
private final String rawQueryString;
+ private final QueryParser topLevelParser;
/**
* Creates a new {@link ExtensionQuery}
@@ -38,9 +41,10 @@
* @param rawQueryString
* the raw extension query string
*/
- public ExtensionQuery(String field, String rawQueryString) {
+ public ExtensionQuery(QueryParser topLevelParser, String field, String rawQueryString) {
this.field = field;
this.rawQueryString = rawQueryString;
+ this.topLevelParser = topLevelParser;
}
/**
@@ -60,4 +64,12 @@
public String getRawQueryString() {
return rawQueryString;
}
+
+ /**
+ * Returns the top level parser which created this {@link ExtensionQuery}
+ * @return the top level parser which created this {@link ExtensionQuery}
+ */
+ public QueryParser getTopLevelParser() {
+ return topLevelParser;
+ }
}
Propchange: lucene/java/branches/flex_1458/contrib/misc/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/misc/ChainedFilterTest.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/misc/ChainedFilterTest.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/misc/ChainedFilterTest.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/misc/ChainedFilterTest.java Thu Jan 14 19:05:12 2010
@@ -18,6 +18,7 @@
*/
import java.util.Calendar;
+import java.util.GregorianCalendar;
import junit.framework.TestCase;
@@ -60,7 +61,8 @@
IndexWriter writer =
new IndexWriter(directory, new WhitespaceAnalyzer(), true, IndexWriter.MaxFieldLength.UNLIMITED);
- Calendar cal = Calendar.getInstance();
+ Calendar cal = new GregorianCalendar();
+ cal.clear();
cal.setTimeInMillis(1041397200000L); // 2003 January 01
for (int i = 0; i < MAX; i++) {
Propchange: lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/complexPhrase/TestComplexPhraseQuery.java
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtensions.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtensions.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtensions.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/ext/TestExtensions.java Thu Jan 14 19:05:12 2010
@@ -26,6 +26,7 @@
private Extensions ext;
+ @Override
protected void setUp() throws Exception {
super.setUp();
this.ext = new Extensions();
Modified: lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java (original)
+++ lucene/java/branches/flex_1458/contrib/misc/src/test/org/apache/lucene/queryParser/precedence/TestPrecedenceQueryParser.java Thu Jan 14 19:05:12 2010
@@ -392,7 +392,12 @@
public String getLocalizedDate(int year, int month, int day) {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
Calendar calendar = new GregorianCalendar();
+ calendar.clear();
calendar.set(year, month, day);
+ calendar.set(Calendar.HOUR_OF_DAY, 23);
+ calendar.set(Calendar.MINUTE, 59);
+ calendar.set(Calendar.SECOND, 59);
+ calendar.set(Calendar.MILLISECOND, 999);
return df.format(calendar.getTime());
}
Modified: lucene/java/branches/flex_1458/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java (original)
+++ lucene/java/branches/flex_1458/contrib/queries/src/java/org/apache/lucene/search/BooleanFilter.java Thu Jan 14 19:05:12 2010
@@ -117,6 +117,7 @@
* switch to a different DocIdSet implementation yourself.
* This method will be removed in Lucene 4.0
*/
+ @Deprecated
protected final DocIdSet finalResult(OpenBitSetDISI result, int maxDocs) {
return result;
}
Propchange: lucene/java/branches/flex_1458/contrib/queries/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/MultiFieldQueryParserWrapper.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/MultiFieldQueryParserWrapper.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/MultiFieldQueryParserWrapper.java (original)
+++ lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/MultiFieldQueryParserWrapper.java Thu Jan 14 19:05:12 2010
@@ -36,6 +36,7 @@
* used along the transition from the old query parser to the new
* one
*/
+@Deprecated
public class MultiFieldQueryParserWrapper extends QueryParserWrapper {
/**
Modified: lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java (original)
+++ lucene/java/branches/flex_1458/contrib/queryparser/src/java/org/apache/lucene/queryParser/standard/QueryParserWrapper.java Thu Jan 14 19:05:12 2010
@@ -64,6 +64,7 @@
* used along the transition from the old query parser to the new
* one
*/
+@Deprecated
public class QueryParserWrapper {
/**
Modified: lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java (original)
+++ lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQPHelper.java Thu Jan 14 19:05:12 2010
@@ -650,25 +650,24 @@
}
}
- private String getLocalizedDate(int year, int month, int day,
- boolean extendLastDate) {
+ private String getLocalizedDate(int year, int month, int day) {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
Calendar calendar = new GregorianCalendar();
+ calendar.clear();
calendar.set(year, month, day);
- if (extendLastDate) {
- calendar.set(Calendar.HOUR_OF_DAY, 23);
- calendar.set(Calendar.MINUTE, 59);
- calendar.set(Calendar.SECOND, 59);
- calendar.set(Calendar.MILLISECOND, 999);
- }
+ calendar.set(Calendar.HOUR_OF_DAY, 23);
+ calendar.set(Calendar.MINUTE, 59);
+ calendar.set(Calendar.SECOND, 59);
+ calendar.set(Calendar.MILLISECOND, 999);
return df.format(calendar.getTime());
}
/** for testing legacy DateField support */
public void testLegacyDateRange() throws Exception {
- String startDate = getLocalizedDate(2002, 1, 1, false);
- String endDate = getLocalizedDate(2002, 1, 4, false);
+ String startDate = getLocalizedDate(2002, 1, 1);
+ String endDate = getLocalizedDate(2002, 1, 4);
Calendar endDateExpected = new GregorianCalendar();
+ endDateExpected.clear();
endDateExpected.set(2002, 1, 4, 23, 59, 59);
endDateExpected.set(Calendar.MILLISECOND, 999);
assertQueryEquals("[ " + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "]", null, "["
@@ -679,9 +678,10 @@
}
public void testDateRange() throws Exception {
- String startDate = getLocalizedDate(2002, 1, 1, false);
- String endDate = getLocalizedDate(2002, 1, 4, false);
+ String startDate = getLocalizedDate(2002, 1, 1);
+ String endDate = getLocalizedDate(2002, 1, 4);
Calendar endDateExpected = new GregorianCalendar();
+ endDateExpected.clear();
endDateExpected.set(2002, 1, 4, 23, 59, 59);
endDateExpected.set(Calendar.MILLISECOND, 999);
final String defaultField = "default";
Modified: lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java (original)
+++ lucene/java/branches/flex_1458/contrib/queryparser/src/test/org/apache/lucene/queryParser/standard/TestQueryParserWrapper.java Thu Jan 14 19:05:12 2010
@@ -644,25 +644,24 @@
}
}
- private String getLocalizedDate(int year, int month, int day,
- boolean extendLastDate) {
+ private String getLocalizedDate(int year, int month, int day) {
DateFormat df = DateFormat.getDateInstance(DateFormat.SHORT);
Calendar calendar = new GregorianCalendar();
+ calendar.clear();
calendar.set(year, month, day);
- if (extendLastDate) {
- calendar.set(Calendar.HOUR_OF_DAY, 23);
- calendar.set(Calendar.MINUTE, 59);
- calendar.set(Calendar.SECOND, 59);
- calendar.set(Calendar.MILLISECOND, 999);
- }
+ calendar.set(Calendar.HOUR_OF_DAY, 23);
+ calendar.set(Calendar.MINUTE, 59);
+ calendar.set(Calendar.SECOND, 59);
+ calendar.set(Calendar.MILLISECOND, 999);
return df.format(calendar.getTime());
}
/** for testing legacy DateField support */
public void testLegacyDateRange() throws Exception {
- String startDate = getLocalizedDate(2002, 1, 1, false);
- String endDate = getLocalizedDate(2002, 1, 4, false);
+ String startDate = getLocalizedDate(2002, 1, 1);
+ String endDate = getLocalizedDate(2002, 1, 4);
Calendar endDateExpected = new GregorianCalendar();
+ endDateExpected.clear();
endDateExpected.set(2002, 1, 4, 23, 59, 59);
endDateExpected.set(Calendar.MILLISECOND, 999);
assertQueryEquals("[ " + escapeDateString(startDate) + " TO " + escapeDateString(endDate) + "]", null, "["
@@ -673,9 +672,10 @@
}
public void testDateRange() throws Exception {
- String startDate = getLocalizedDate(2002, 1, 1, false);
- String endDate = getLocalizedDate(2002, 1, 4, false);
+ String startDate = getLocalizedDate(2002, 1, 1);
+ String endDate = getLocalizedDate(2002, 1, 4);
Calendar endDateExpected = new GregorianCalendar();
+ endDateExpected.clear();
endDateExpected.set(2002, 1, 4, 23, 59, 59);
endDateExpected.set(Calendar.MILLISECOND, 999);
final String defaultField = "default";
Propchange: lucene/java/branches/flex_1458/contrib/regex/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Propchange: lucene/java/branches/flex_1458/contrib/remote/src/java/overview.html
------------------------------------------------------------------------------
svn:eol-style = native
Modified: lucene/java/branches/flex_1458/contrib/snowball/build.xml
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/snowball/build.xml?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/snowball/build.xml (original)
+++ lucene/java/branches/flex_1458/contrib/snowball/build.xml Thu Jan 14 19:05:12 2010
@@ -131,10 +131,26 @@
</target>
<target name="compile-core" depends="build-analyzers, common.compile-core" />
+ <target name="compile-test" depends="download-vocab-tests, common.compile-test" />
<target name="build-analyzers" unless="analyzers.jar.present">
<echo>Snowball building dependency ${analyzers.jar}</echo>
<ant antfile="../analyzers/build.xml" target="default" inheritall="false" dir="../analyzers" />
</target>
+ <property name="snowball.vocab.rev" value="500"/>
+ <property name="snowball.vocab.url"
+ value="svn://svn.tartarus.org/snowball/trunk/data"/>
+ <property name="vocab.dir" value="src/test/org/apache/lucene/analysis/snowball"/>
+
+ <target name="download-vocab-tests" depends="compile-core"
+ description="Downloads Snowball vocabulary tests">
+ <sequential>
+ <mkdir dir="${vocab.dir}"/>
+ <exec dir="${vocab.dir}" executable="${svn.exe}"
+ failifexecutionfails="false">
+ <arg line="checkout -r ${snowball.vocab.rev} ${snowball.vocab.url}"/>
+ </exec>
+ </sequential>
+ </target>
</project>
Modified: lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java (original)
+++ lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballAnalyzer.java Thu Jan 14 19:05:12 2010
@@ -51,11 +51,22 @@
this.matchVersion = matchVersion;
}
- /** Builds the named analyzer with the given stop words. */
+ /**
+ * Builds the named analyzer with the given stop words.
+ * @deprecated Use {@link #SnowballAnalyzer(Version, String, Set)} instead.
+ */
+ @Deprecated
public SnowballAnalyzer(Version matchVersion, String name, String[] stopWords) {
this(matchVersion, name);
stopSet = StopFilter.makeStopSet(matchVersion, stopWords);
}
+
+ /** Builds the named analyzer with the given stop words. */
+ public SnowballAnalyzer(Version matchVersion, String name, Set<?> stopWords) {
+ this(matchVersion, name);
+ stopSet = CharArraySet.unmodifiableSet(CharArraySet.copy(matchVersion,
+ stopWords));
+ }
/** Constructs a {@link StandardTokenizer} filtered by a {@link
StandardFilter}, a {@link LowerCaseFilter}, a {@link StopFilter},
Modified: lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java
URL: http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java?rev=899359&r1=899358&r2=899359&view=diff
==============================================================================
--- lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java (original)
+++ lucene/java/branches/flex_1458/contrib/snowball/src/java/org/apache/lucene/analysis/snowball/SnowballFilter.java Thu Jan 14 19:05:12 2010
@@ -74,13 +74,16 @@
@Override
public final boolean incrementToken() throws IOException {
if (input.incrementToken()) {
- String originalTerm = termAtt.term();
- stemmer.setCurrent(originalTerm);
+ char termBuffer[] = termAtt.termBuffer();
+ final int length = termAtt.termLength();
+ stemmer.setCurrent(termBuffer, length);
stemmer.stem();
- String finalTerm = stemmer.getCurrent();
- // Don't bother updating, if it is unchanged.
- if (!originalTerm.equals(finalTerm))
- termAtt.setTermBuffer(finalTerm);
+ final char finalTerm[] = stemmer.getCurrentBuffer();
+ final int newLength = stemmer.getCurrentBufferLength();
+ if (finalTerm != termBuffer)
+ termAtt.setTermBuffer(finalTerm, 0, newLength);
+ else
+ termAtt.setTermLength(newLength);
return true;
} else {
return false;