You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by eh...@apache.org on 2005/11/22 02:46:32 UTC
svn commit: r348060 - in /lucene/java/trunk/src:
java/org/apache/lucene/analysis/ java/org/apache/lucene/index/
test/org/apache/lucene/index/ test/org/apache/lucene/search/
Author: ehatcher
Date: Mon Nov 21 17:46:24 2005
New Revision: 348060
URL: http://svn.apache.org/viewcvs?rev=348060&view=rev
Log:
added getPositionIncrementGap(String fieldName) to Analyzer
Modified:
lucene/java/trunk/src/java/org/apache/lucene/analysis/Analyzer.java
lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java
lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java
lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java
lucene/java/trunk/src/test/org/apache/lucene/search/TestPhraseQuery.java
Modified: lucene/java/trunk/src/java/org/apache/lucene/analysis/Analyzer.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/analysis/Analyzer.java?rev=348060&r1=348059&r2=348060&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/analysis/Analyzer.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/analysis/Analyzer.java Mon Nov 21 17:46:24 2005
@@ -49,5 +49,23 @@
{
return tokenStream(null, reader);
}
+
+ /**
+ * Invoked, by DocumentWriter, before indexing a Field instance if
+ * terms have already been added to that field. This allows custom
+ * analyzers to place an automatic position increment gap between
+ * Field instances using the same field name. The default value
+ * position increment gap is 0. With a 0 position increment gap and
+ * the typical default token position increment of 1, all terms in a field,
+ * including across Field instances, are in successive positions, allowing
+ * exact PhraseQuery matches, for instance, across Field instance boundaries.
+ *
+ * @param fieldName Field name being indexed.
+ * @return position increment gap, added to the next token emitted from {@link #tokenStream(String,Reader)}
+ */
+ public int getPositionIncrementGap(String fieldName)
+ {
+ return 0;
+ }
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java?rev=348060&r1=348059&r2=348060&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/DocumentWriter.java Mon Nov 21 17:46:24 2005
@@ -135,6 +135,7 @@
int length = fieldLengths[fieldNumber]; // length of field
int position = fieldPositions[fieldNumber]; // position in field
+ if (length>0) position+=analyzer.getPositionIncrementGap(fieldName);
int offset = fieldOffsets[fieldNumber]; // offset field
if (field.isIndexed()) {
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java?rev=348060&r1=348059&r2=348060&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java Mon Nov 21 17:46:24 2005
@@ -1,12 +1,19 @@
package org.apache.lucene.index;
/**
- * Created by IntelliJ IDEA.
- * User: Grant Ingersoll
- * Date: Feb 2, 2004
- * Time: 6:16:12 PM
- * $Id$
- * Copyright 2004. Center For Natural Language Processing
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
*/
import org.apache.lucene.analysis.Analyzer;
@@ -21,10 +28,6 @@
import java.util.Map;
import java.util.Enumeration;
-/**
- *
- *
- **/
class DocHelper {
public static final String FIELD_1_TEXT = "field one text";
public static final String TEXT_FIELD_1_KEY = "textField1";
@@ -68,6 +71,14 @@
public static Field unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT,
Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES);
+ public static final String REPEATED_1_TEXT = "repeated one";
+ public static final String REPEATED_KEY = "repeated";
+ public static Field repeatedField1 = new Field(REPEATED_KEY, REPEATED_1_TEXT,
+ Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO);
+ public static final String REPEATED_2_TEXT = "repeated two";
+ public static Field repeatedField2 = new Field(REPEATED_KEY, REPEATED_2_TEXT,
+ Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO);
+
public static Map nameValues = null;
// ordered list of all the fields...
@@ -81,6 +92,8 @@
unIndField,
unStoredField1,
unStoredField2,
+ repeatedField1,
+ repeatedField2
};
// Map<String fieldName, Field field>
@@ -156,9 +169,8 @@
*/
public static void writeDoc(Directory dir, String segment, Document doc) throws IOException
{
- Analyzer analyzer = new WhitespaceAnalyzer();
Similarity similarity = Similarity.getDefault();
- writeDoc(dir, analyzer, similarity, segment, doc);
+ writeDoc(dir, new WhitespaceAnalyzer(), similarity, segment, doc);
}
/**
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java?rev=348060&r1=348059&r2=348060&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java Mon Nov 21 17:46:24 2005
@@ -17,14 +17,16 @@
*/
import junit.framework.TestCase;
-import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.search.Similarity;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.search.Similarity;
+import org.apache.lucene.store.RAMDirectory;
-import java.io.IOException;
+import java.io.Reader;
public class TestDocumentWriter extends TestCase {
private RAMDirectory dir = new RAMDirectory();
@@ -48,54 +50,62 @@
}
- public void testAddDocument() {
- Analyzer analyzer = new WhitespaceAnalyzer();
+ public void testAddDocument() throws Exception {
+ Analyzer analyzer = new Analyzer() {
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new WhitespaceTokenizer(reader);
+ }
+
+ public int getPositionIncrementGap(String fieldName) {
+ return 500;
+ }
+ };
Similarity similarity = Similarity.getDefault();
DocumentWriter writer = new DocumentWriter(dir, analyzer, similarity, 50);
- assertTrue(writer != null);
- try {
- String segName="test";
- writer.addDocument(segName, testDoc);
- //After adding the document, we should be able to read it back in
- SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
- assertTrue(reader != null);
- Document doc = reader.document(0);
- assertTrue(doc != null);
-
- //System.out.println("Document: " + doc);
- Field [] fields = doc.getFields("textField2");
- assertTrue(fields != null && fields.length == 1);
- assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT));
- assertTrue(fields[0].isTermVectorStored() == true);
-
- fields = doc.getFields("textField1");
- assertTrue(fields != null && fields.length == 1);
- assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_1_TEXT));
- assertTrue(fields[0].isTermVectorStored() == false);
-
- fields = doc.getFields("keyField");
- assertTrue(fields != null && fields.length == 1);
- assertTrue(fields[0].stringValue().equals(DocHelper.KEYWORD_TEXT));
-
- fields = doc.getFields(DocHelper.NO_NORMS_KEY);
- assertTrue(fields != null && fields.length == 1);
- assertTrue(fields[0].stringValue().equals(DocHelper.NO_NORMS_TEXT));
-
- fields = doc.getFields(DocHelper.TEXT_FIELD_3_KEY);
- assertTrue(fields != null && fields.length == 1);
- assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_3_TEXT));
-
- // test that the norm file is not present if omitNorms is true
- for (int i=0; i<reader.fieldInfos.size(); i++) {
- FieldInfo fi = reader.fieldInfos.fieldInfo(i);
- if (fi.isIndexed) {
- assertTrue(fi.omitNorms == !dir.fileExists(segName + ".f" + i));
- }
+ String segName = "test";
+ writer.addDocument(segName, testDoc);
+ //After adding the document, we should be able to read it back in
+ SegmentReader reader = SegmentReader.get(new SegmentInfo(segName, 1, dir));
+ assertTrue(reader != null);
+ Document doc = reader.document(0);
+ assertTrue(doc != null);
+
+ //System.out.println("Document: " + doc);
+ Field [] fields = doc.getFields("textField2");
+ assertTrue(fields != null && fields.length == 1);
+ assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT));
+ assertTrue(fields[0].isTermVectorStored());
+
+ fields = doc.getFields("textField1");
+ assertTrue(fields != null && fields.length == 1);
+ assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_1_TEXT));
+ assertFalse(fields[0].isTermVectorStored());
+
+ fields = doc.getFields("keyField");
+ assertTrue(fields != null && fields.length == 1);
+ assertTrue(fields[0].stringValue().equals(DocHelper.KEYWORD_TEXT));
+
+ fields = doc.getFields(DocHelper.NO_NORMS_KEY);
+ assertTrue(fields != null && fields.length == 1);
+ assertTrue(fields[0].stringValue().equals(DocHelper.NO_NORMS_TEXT));
+
+ fields = doc.getFields(DocHelper.TEXT_FIELD_3_KEY);
+ assertTrue(fields != null && fields.length == 1);
+ assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_3_TEXT));
+
+ // test that the norm file is not present if omitNorms is true
+ for (int i = 0; i < reader.fieldInfos.size(); i++) {
+ FieldInfo fi = reader.fieldInfos.fieldInfo(i);
+ if (fi.isIndexed) {
+ assertTrue(fi.omitNorms == !dir.fileExists(segName + ".f" + i));
}
-
- } catch (IOException e) {
- e.printStackTrace();
- assertTrue(false);
}
+
+ TermPositions termPositions = reader.termPositions(new Term(DocHelper.REPEATED_KEY, "repeated"));
+ assertTrue(termPositions.next());
+ int freq = termPositions.freq();
+ assertEquals(2, freq);
+ assertEquals(0, termPositions.nextPosition());
+ assertEquals(502, termPositions.nextPosition());
}
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestPhraseQuery.java
URL: http://svn.apache.org/viewcvs/lucene/java/trunk/src/test/org/apache/lucene/search/TestPhraseQuery.java?rev=348060&r1=348059&r2=348060&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestPhraseQuery.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestPhraseQuery.java Mon Nov 21 17:46:24 2005
@@ -17,10 +17,14 @@
*/
import java.io.IOException;
+import java.io.Reader;
import junit.framework.TestCase;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.StopAnalyzer;
+import org.apache.lucene.analysis.Analyzer;
+import org.apache.lucene.analysis.TokenStream;
+import org.apache.lucene.analysis.WhitespaceTokenizer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexWriter;
@@ -41,10 +45,22 @@
public void setUp() throws Exception {
directory = new RAMDirectory();
- IndexWriter writer = new IndexWriter(directory, new WhitespaceAnalyzer(), true);
+ Analyzer analyzer = new Analyzer() {
+ public TokenStream tokenStream(String fieldName, Reader reader) {
+ return new WhitespaceTokenizer(reader);
+ }
+
+ public int getPositionIncrementGap(String fieldName) {
+ return 100;
+ }
+ };
+ IndexWriter writer = new IndexWriter(directory, analyzer, true);
Document doc = new Document();
doc.add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.TOKENIZED));
+ doc.add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.TOKENIZED));
+ Field repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED);
+ doc.add(repeatedField);
writer.addDocument(doc);
writer.optimize();
@@ -292,6 +308,17 @@
assertEquals(1, hits.id(1));
assertEquals(0.31, hits.score(2), 0.01);
assertEquals(2, hits.id(2));
+ }
+
+ public void testWrappedPhrase() throws IOException {
+ query.add(new Term("repeated", "first"));
+ query.add(new Term("repeated", "part"));
+ query.add(new Term("repeated", "second"));
+ query.add(new Term("repeated", "part"));
+ query.setSlop(99);
+
+ Hits hits = searcher.search(query);
+ assertEquals(0, hits.length());
}
}
Re: svn commit: r348060 - in /lucene/java/trunk/src: java/org/apache/lucene/analysis/ java/org/apache/lucene/index/ test/org/apache/lucene/index/ test/org/apache/lucene/search/
Posted by Erik Hatcher <er...@ehatchersolutions.com>.
On 21 Nov 2005, at 21:20, Yonik Seeley wrote:
> On 11/21/05, ehatcher@apache.org <eh...@apache.org> wrote:
>> Author: ehatcher
>> Date: Mon Nov 21 17:46:24 2005
>> New Revision: 348060
>>
>> URL: http://svn.apache.org/viewcvs?rev=348060&view=rev
>> Log:
>> added getPositionIncrementGap(String fieldName) to Analyzer
>
> Cool.
>
> Oh, uh, did I mention I wasn't the best at picking names off the top
> of my head ;-)
I deliberated on the name and your suggestion was just fine, so I
stuck with it.
Thanks again for your input on this valuable change.
Erik
---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org
Re: svn commit: r348060 - in /lucene/java/trunk/src: java/org/apache/lucene/analysis/ java/org/apache/lucene/index/ test/org/apache/lucene/index/ test/org/apache/lucene/search/
Posted by Yonik Seeley <ys...@gmail.com>.
On 11/21/05, ehatcher@apache.org <eh...@apache.org> wrote:
> Author: ehatcher
> Date: Mon Nov 21 17:46:24 2005
> New Revision: 348060
>
> URL: http://svn.apache.org/viewcvs?rev=348060&view=rev
> Log:
> added getPositionIncrementGap(String fieldName) to Analyzer
Cool.
Oh, uh, did I mention I wasn't the best at picking names off the top
of my head ;-)
If you like getPositionIncrementGap, that's fine with me, but
getPositionGap or getFieldPositionGap, or something of the sort might
be clearer. Your call... (and I wouldn't even bring it up, but IDEs
today make it sooo easy to change your mind)
-Yonik
Re: svn commit: r348060 - in /lucene/java/trunk/src: java/org/apache/lucene/analysis/ java/org/apache/lucene/index/ test/org/apache/lucene/index/ test/org/apache/lucene/search/
Posted by Yonik Seeley <ys...@gmail.com>.
On 11/21/05, ehatcher@apache.org <eh...@apache.org> wrote:
> Author: ehatcher
> Date: Mon Nov 21 17:46:24 2005
> New Revision: 348060
>
> URL: http://svn.apache.org/viewcvs?rev=348060&view=rev
> Log:
> added getPositionIncrementGap(String fieldName) to Analyzer
Cool.
Oh, uh, did I mention I wasn't the best at picking names off the top
of my head ;-)
If you like getPositionIncrementGap, that's fine with me, but
getPositionGap or getFieldPositionGap, or something of the sort might
be clearer. Your call... (and I wouldn't even bring it up, but IDEs
today make it sooo easy to change your mind)
-Yonik
---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org
Re: svn commit: r348060 - in /lucene/java/trunk/src: java/org/apache/lucene/analysis/ java/org/apache/lucene/index/ test/org/apache/lucene/index/ test/org/apache/lucene/search/
Posted by Erik Hatcher <er...@ehatchersolutions.com>.
On 22 Nov 2005, at 13:19, Doug Cutting wrote:
> ehatcher@apache.org wrote:
>> + * Invoked, by DocumentWriter, before indexing a Field instance if
>> + * terms have already been added to that field. This allows
>> custom
>> + * analyzers to place an automatic position increment gap between
>> + * Field instances using the same field name. The default value
>
> DocumentWriter is not a public class and thus should probably not
> be mentioned in public javadoc.
Good point. Done.
Erik
---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org
Re: svn commit: r348060 - in /lucene/java/trunk/src: java/org/apache/lucene/analysis/
java/org/apache/lucene/index/ test/org/apache/lucene/index/ test/org/apache/lucene/search/
Posted by Doug Cutting <cu...@apache.org>.
ehatcher@apache.org wrote:
> + * Invoked, by DocumentWriter, before indexing a Field instance if
> + * terms have already been added to that field. This allows custom
> + * analyzers to place an automatic position increment gap between
> + * Field instances using the same field name. The default value
DocumentWriter is not a public class and thus should probably not be
mentioned in public javadoc.
Doug
---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org