You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-commits@lucene.apache.org by gs...@apache.org on 2006/06/10 03:23:24 UTC
svn commit: r413201 [2/2] - in /lucene/java/trunk: ./
contrib/memory/src/java/org/apache/lucene/index/memory/
contrib/swing/src/java/org/apache/lucene/swing/models/
src/java/org/apache/lucene/analysis/ src/java/org/apache/lucene/document/
src/java/org/...
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/IndexReader.java Fri Jun 9 18:23:22 2006
@@ -17,7 +17,7 @@
*/
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
@@ -42,7 +42,7 @@
document in the index. These document numbers are ephemeral--they may change
as documents are added to and deleted from an index. Clients should thus not
rely on a given document having the same number between sessions.
-
+
<p> An IndexReader can be opened on a directory for which an IndexWriter is
opened already, but it cannot be used to delete documents from the index then.
@@ -50,13 +50,13 @@
@version $Id$
*/
public abstract class IndexReader {
-
+
public static final class FieldOption {
private String option;
private FieldOption() { }
private FieldOption(String option) {
this.option = option;
- }
+ }
public String toString() {
return this.option;
}
@@ -79,7 +79,7 @@
// all fields where termvectors with offset and position values set
public static final FieldOption TERMVECTOR_WITH_POSITION_OFFSET = new FieldOption ("TERMVECTOR_WITH_POSITION_OFFSET");
}
-
+
/**
* Constructor used if IndexReader is not owner of its directory.
* This is used for IndexReaders that are used within other IndexReaders that take care or locking directories.
@@ -89,7 +89,7 @@
protected IndexReader(Directory directory) {
this.directory = directory;
}
-
+
/**
* Constructor used if IndexReader is owner of its directory.
* If IndexReader is owner of its directory, it locks its directory in case of write operations.
@@ -117,7 +117,7 @@
private Lock writeLock;
private boolean stale;
private boolean hasChanges;
-
+
/** Returns an IndexReader reading the index in an FSDirectory in the named
path. */
@@ -130,7 +130,7 @@
public static IndexReader open(File path) throws IOException {
return open(FSDirectory.getDirectory(path, false), true);
}
-
+
/** Returns an IndexReader reading the index in the given Directory. */
public static IndexReader open(final Directory directory) throws IOException {
return open(directory, false);
@@ -151,7 +151,7 @@
for (int i = 0; i < infos.size(); i++)
readers[i] = SegmentReader.get(infos.info(i));
return new MultiReader(directory, infos, closeDirectory, readers);
-
+
}
}.run();
}
@@ -160,7 +160,7 @@
/** Returns the directory this index resides in. */
public Directory directory() { return directory; }
- /**
+ /**
* Returns the time the index in the named directory was last modified.
* Do not use this to check whether the reader is still up-to-date, use
* {@link #isCurrent()} instead.
@@ -169,7 +169,7 @@
return lastModified(new File(directory));
}
- /**
+ /**
* Returns the time the index in the named directory was last modified.
* Do not use this to check whether the reader is still up-to-date, use
* {@link #isCurrent()} instead.
@@ -178,7 +178,7 @@
return FSDirectory.fileModified(directory, IndexFileNames.SEGMENTS);
}
- /**
+ /**
* Returns the time the index in the named directory was last modified.
* Do not use this to check whether the reader is still up-to-date, use
* {@link #isCurrent()} instead.
@@ -228,12 +228,12 @@
public static long getCurrentVersion(Directory directory) throws IOException {
synchronized (directory) { // in- & inter-process sync
Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME);
-
+
boolean locked=false;
-
+
try {
locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT);
-
+
return SegmentInfos.readCurrentVersion(directory);
} finally {
if (locked) {
@@ -242,7 +242,7 @@
}
}
}
-
+
/**
* Version number when this IndexReader was opened.
*/
@@ -260,12 +260,12 @@
public boolean isCurrent() throws IOException {
synchronized (directory) { // in- & inter-process sync
Lock commitLock=directory.makeLock(IndexWriter.COMMIT_LOCK_NAME);
-
+
boolean locked=false;
-
+
try {
locked=commitLock.obtain(IndexWriter.COMMIT_LOCK_TIMEOUT);
-
+
return SegmentInfos.readCurrentVersion(directory) == segmentInfos.getVersion();
} finally {
if (locked) {
@@ -292,7 +292,7 @@
abstract public TermFreqVector[] getTermFreqVectors(int docNumber)
throws IOException;
-
+
/**
* Return a term frequency vector for the specified document and field. The
* returned vector contains terms and frequencies for the terms in
@@ -309,7 +309,7 @@
*/
abstract public TermFreqVector getTermFreqVector(int docNumber, String field)
throws IOException;
-
+
/**
* Returns <code>true</code> if an index exists at the specified directory.
* If the directory does not exist or if there is no index in it.
@@ -353,14 +353,40 @@
/** Returns the stored fields of the <code>n</code><sup>th</sup>
<code>Document</code> in this index. */
- public abstract Document document(int n) throws IOException;
+ public Document document(int n) throws IOException{
+ return document(n, null);
+ }
+
+ /**
+ * Get the {@link org.apache.lucene.document.Document} at the <code>n</code><sup>th</sup> position. The {@link org.apache.lucene.document.FieldSelector}
+ * may be used to determine what {@link org.apache.lucene.document.Field}s to load and how they should be loaded.
+ *
+ * <b>NOTE:</b> If this Reader (more specifically, the underlying {@link FieldsReader} is closed before the lazy {@link org.apache.lucene.document.Field} is
+ * loaded an exception may be thrown. If you want the value of a lazy {@link org.apache.lucene.document.Field} to be available after closing you must
+ * explicitly load it or fetch the Document again with a new loader.
+ *
+ *
+ * @param n Get the document at the <code>n</code><sup>th</sup> position
+ * @param fieldSelector The {@link org.apache.lucene.document.FieldSelector} to use to determine what Fields should be loaded on the Document. May be null, in which case all Fields will be loaded.
+ * @return The stored fields of the {@link org.apache.lucene.document.Document} at the nth position
+ * @throws IOException If there is a problem reading this document
+ *
+ * @see org.apache.lucene.document.Fieldable
+ * @see org.apache.lucene.document.FieldSelector
+ * @see org.apache.lucene.document.SetBasedFieldSelector
+ * @see org.apache.lucene.document.LoadFirstFieldSelector
+ */
+ //When we convert to JDK 1.5 make this Set<String>
+ public abstract Document document(int n, FieldSelector fieldSelector) throws IOException;
+
+
/** Returns true if document <i>n</i> has been deleted */
public abstract boolean isDeleted(int n);
/** Returns true if any documents have been deleted */
public abstract boolean hasDeletions();
-
+
/** Returns true if there are norms stored for this field. */
public boolean hasNorms(String field) throws IOException {
// backward compatible implementation.
@@ -371,21 +397,21 @@
/** Returns the byte-encoded normalization factor for the named field of
* every document. This is used by the search code to score documents.
*
- * @see Field#setBoost(float)
+ * @see org.apache.lucene.document.Field#setBoost(float)
*/
public abstract byte[] norms(String field) throws IOException;
/** Reads the byte-encoded normalization factor for the named field of every
* document. This is used by the search code to score documents.
*
- * @see Field#setBoost(float)
+ * @see org.apache.lucene.document.Field#setBoost(float)
*/
public abstract void norms(String field, byte[] bytes, int offset)
throws IOException;
/** Expert: Resets the normalization factor for the named field of the named
* document. The norm represents the product of the field's {@link
- * Field#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
+ * Fieldable#setBoost(float) boost} and its {@link Similarity#lengthNorm(String,
* int) length normalization}. Thus, to preserve the length normalization
* values when resetting this, one should base the new value upon the old.
*
@@ -399,9 +425,9 @@
doSetNorm(doc, field, value);
hasChanges = true;
}
-
+
/** Implements setNorm in subclass.*/
- protected abstract void doSetNorm(int doc, String field, byte value)
+ protected abstract void doSetNorm(int doc, String field, byte value)
throws IOException;
/** Expert: Resets the normalization factor for the named field of the named
@@ -554,7 +580,7 @@
doUndeleteAll();
hasChanges = true;
}
-
+
/** Implements actual undeleteAll() in subclass. */
protected abstract void doUndeleteAll() throws IOException;
@@ -586,10 +612,10 @@
}
hasChanges = false;
}
-
+
/** Implements commit. */
protected abstract void doCommit() throws IOException;
-
+
/**
* Closes files associated with this index.
* Also saves any new deletions to disk.
@@ -613,7 +639,7 @@
}
}
-
+
/**
* Get a list of unique field names that exist in this index and have the specified
* field option information.
@@ -659,7 +685,7 @@
directory.makeLock(IndexWriter.WRITE_LOCK_NAME).release();
directory.makeLock(IndexWriter.COMMIT_LOCK_NAME).release();
}
-
+
/**
* Prints the filename and size of each file within a given compound file.
* Add the -extract flag to extract files to the current working directory.
@@ -686,7 +712,7 @@
Directory dir = null;
CompoundFileReader cfr = null;
-
+
try {
File file = new File(filename);
String dirname = file.getAbsoluteFile().getParent();
@@ -696,7 +722,7 @@
String [] files = cfr.list();
Arrays.sort(files); // sort the array of filename so that the output is more readable
-
+
for (int i = 0; i < files.length; ++i) {
long len = cfr.fileLength(files[i]);
@@ -705,7 +731,7 @@
IndexInput ii = cfr.openInput(files[i]);
FileOutputStream f = new FileOutputStream(files[i]);
-
+
// read and write with a small buffer, which is more effectiv than reading byte by byte
byte[] buffer = new byte[1024];
int chunk = buffer.length;
@@ -715,7 +741,7 @@
f.write(buffer, 0, bufLen);
len -= bufLen;
}
-
+
f.close();
ii.close();
}
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/MultiReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/MultiReader.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/MultiReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/MultiReader.java Fri Jun 9 18:23:22 2006
@@ -17,11 +17,14 @@
*/
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldSelector;
import org.apache.lucene.store.Directory;
import java.io.IOException;
-import java.util.*;
+import java.util.Collection;
+import java.util.HashSet;
+import java.util.Hashtable;
+import java.util.Set;
/** An IndexReader which reads multiple indexes, appending their content.
*
@@ -99,9 +102,9 @@
return maxDoc;
}
- public Document document(int n) throws IOException {
+ public Document document(int n, FieldSelector fieldSelector) throws IOException {
int i = readerIndex(n); // find segment num
- return subReaders[i].document(n - starts[i]); // dispatch to segment reader
+ return subReaders[i].document(n - starts[i], fieldSelector); // dispatch to segment reader
}
public boolean isDeleted(int n) {
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/ParallelReader.java Fri Jun 9 18:23:22 2006
@@ -16,20 +16,24 @@
* limitations under the License.
*/
+import org.apache.lucene.document.Document;
+import org.apache.lucene.document.Fieldable;
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.document.FieldSelectorResult;
+
import java.io.IOException;
+import java.util.SortedMap;
import java.util.ArrayList;
-import java.util.Collection;
-import java.util.Enumeration;
-import java.util.HashSet;
-import java.util.Iterator;
import java.util.List;
+import java.util.HashMap;
import java.util.Map;
-import java.util.Set;
-import java.util.SortedMap;
import java.util.TreeMap;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.Enumeration;
+import java.util.Set;
+import java.util.HashSet;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
/** An IndexReader which reads multiple, parallel indexes. Each index added
* must have the same number of documents, but typically each contains
@@ -41,7 +45,7 @@
* change rarely and small fields that change more frequently. The smaller
* fields may be re-indexed in a new index and both indexes may be searched
* together.
- *
+ *
* <p><strong>Warning:</strong> It is up to you to make sure all indexes
* are created and modified the same way. For example, if you add
* documents to one index, you need to add the same documents in the
@@ -51,7 +55,8 @@
public class ParallelReader extends IndexReader {
private List readers = new ArrayList();
private SortedMap fieldToReader = new TreeMap();
- private List storedFieldReaders = new ArrayList();
+ private Map readerToFields = new HashMap();
+ private List storedFieldReaders = new ArrayList();
private int maxDoc;
private int numDocs;
@@ -59,7 +64,7 @@
/** Construct a ParallelReader. */
public ParallelReader() throws IOException { super(null); }
-
+
/** Add an IndexReader. */
public void add(IndexReader reader) throws IOException {
add(reader, false);
@@ -68,10 +73,10 @@
/** Add an IndexReader whose stored fields will not be returned. This can
* accellerate search when stored fields are only needed from a subset of
* the IndexReaders.
- *
- * @throws IllegalArgumentException if not all indexes contain the same number
+ *
+ * @throws IllegalArgumentException if not all indexes contain the same number
* of documents
- * @throws IllegalArgumentException if not all indexes have the same value
+ * @throws IllegalArgumentException if not all indexes have the same value
* of {@link IndexReader#maxDoc()}
*/
public void add(IndexReader reader, boolean ignoreStoredFields)
@@ -89,8 +94,10 @@
if (reader.numDocs() != numDocs)
throw new IllegalArgumentException
("All readers must have same numDocs: "+numDocs+"!="+reader.numDocs());
-
- Iterator i = reader.getFieldNames(IndexReader.FieldOption.ALL).iterator();
+
+ Collection fields = reader.getFieldNames(IndexReader.FieldOption.ALL);
+ readerToFields.put(reader, fields);
+ Iterator i = fields.iterator();
while (i.hasNext()) { // update fieldToReader map
String field = (String)i.next();
if (fieldToReader.get(field) == null)
@@ -132,13 +139,25 @@
}
// append fields from storedFieldReaders
- public Document document(int n) throws IOException {
+ public Document document(int n, FieldSelector fieldSelector) throws IOException {
Document result = new Document();
for (int i = 0; i < storedFieldReaders.size(); i++) {
IndexReader reader = (IndexReader)storedFieldReaders.get(i);
- Enumeration fields = reader.document(n).fields();
- while (fields.hasMoreElements()) {
- result.add((Field)fields.nextElement());
+
+ boolean include = (fieldSelector==null);
+ if (!include) {
+ Iterator it = ((Collection) readerToFields.get(reader)).iterator();
+ while (it.hasNext())
+ if (fieldSelector.accept((String)it.next())!=FieldSelectorResult.NO_LOAD) {
+ include = true;
+ break;
+ }
+ }
+ if (include) {
+ Enumeration fields = reader.document(n, fieldSelector).fields();
+ while (fields.hasMoreElements()) {
+ result.add((Fieldable)fields.nextElement());
+ }
}
}
return result;
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentMerger.java Fri Jun 9 18:23:22 2006
@@ -120,7 +120,7 @@
files.add(segment + "." + IndexFileNames.COMPOUND_EXTENSIONS[i]);
}
- // Field norm files
+ // Fieldable norm files
for (int i = 0; i < fieldInfos.size(); i++) {
FieldInfo fi = fieldInfos.fieldInfo(i);
if (fi.isIndexed && !fi.omitNorms) {
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/SegmentReader.java Fri Jun 9 18:23:22 2006
@@ -16,16 +16,16 @@
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.*;
-
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.document.FieldSelector;
+import org.apache.lucene.search.DefaultSimilarity;
+import org.apache.lucene.store.Directory;
import org.apache.lucene.store.IndexInput;
import org.apache.lucene.store.IndexOutput;
-import org.apache.lucene.store.Directory;
import org.apache.lucene.util.BitVector;
-import org.apache.lucene.search.DefaultSimilarity;
+
+import java.io.IOException;
+import java.util.*;
/**
* @version $Id$
@@ -277,11 +277,11 @@
return tis.terms(t);
}
- public synchronized Document document(int n) throws IOException {
+ public synchronized Document document(int n, FieldSelector fieldSelector) throws IOException {
if (isDeleted(n))
throw new IllegalArgumentException
("attempt to access a deleted document");
- return fieldsReader.doc(n);
+ return fieldsReader.doc(n, fieldSelector);
}
public synchronized boolean isDeleted(int n) {
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsReader.java Fri Jun 9 18:23:22 2006
@@ -127,7 +127,7 @@
result = readTermVector(field, position);
} else {
- //System.out.println("Field not found");
+ //System.out.println("Fieldable not found");
}
} else {
//System.out.println("No tvx file");
Modified: lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsWriter.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsWriter.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/index/TermVectorsWriter.java Fri Jun 9 18:23:22 2006
@@ -150,7 +150,7 @@
return currentField != null;
}
- /** Add term to the field's term vector. Field must already be open.
+ /** Add term to the field's term vector. Fieldable must already be open.
* Terms should be added in
* increasing order of terms, one call per unique termNum. ProxPointer
* is a pointer into the TermPosition file (prx). Freq is the number of
@@ -268,7 +268,7 @@
private void writeField() throws IOException {
// remember where this field is written
currentField.tvfPointer = tvf.getFilePointer();
- //System.out.println("Field Pointer: " + currentField.tvfPointer);
+ //System.out.println("Fieldable Pointer: " + currentField.tvfPointer);
final int size = terms.size();
tvf.writeVInt(size);
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldCacheImpl.java Fri Jun 9 18:23:22 2006
@@ -20,7 +20,6 @@
import org.apache.lucene.index.Term;
import org.apache.lucene.index.TermDocs;
import org.apache.lucene.index.TermEnum;
-import org.apache.lucene.search.FieldCache.StringIndex; // required by GCJ
import java.io.IOException;
import java.util.Locale;
@@ -43,7 +42,7 @@
/** Expert: Every key in the internal cache is of this type. */
static class Entry {
- final String field; // which Field
+ final String field; // which Fieldable
final int type; // which SortField type
final Object custom; // which custom comparator
final Locale locale; // the locale we're sorting (if string)
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldDocSortedHitQueue.java Fri Jun 9 18:23:22 2006
@@ -45,7 +45,7 @@
/**
* Creates a hit queue sorted by the given list of fields.
- * @param fields Field names, in priority order (highest priority first).
+ * @param fields Fieldable names, in priority order (highest priority first).
* @param size The number of hits to retain. Must be greater than zero.
*/
FieldDocSortedHitQueue (SortField[] fields, int size) {
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/FieldSortedHitQueue.java Fri Jun 9 18:23:22 2006
@@ -44,7 +44,7 @@
/**
* Creates a hit queue sorted by the given list of fields.
* @param reader Index to use.
- * @param fields Field names, in priority order (highest priority first). Cannot be <code>null</code> or empty.
+ * @param fields Fieldable names, in priority order (highest priority first). Cannot be <code>null</code> or empty.
* @param size The number of hits to retain. Must be greater than zero.
* @throws IOException
*/
@@ -212,7 +212,7 @@
/**
* Returns a comparator for sorting hits according to a field containing integers.
* @param reader Index to use.
- * @param fieldname Field containg integer values.
+ * @param fieldname Fieldable containg integer values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
@@ -243,7 +243,7 @@
/**
* Returns a comparator for sorting hits according to a field containing floats.
* @param reader Index to use.
- * @param fieldname Field containg float values.
+ * @param fieldname Fieldable containg float values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
@@ -274,7 +274,7 @@
/**
* Returns a comparator for sorting hits according to a field containing strings.
* @param reader Index to use.
- * @param fieldname Field containg string values.
+ * @param fieldname Fieldable containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
@@ -305,7 +305,7 @@
/**
* Returns a comparator for sorting hits according to a field containing strings.
* @param reader Index to use.
- * @param fieldname Field containg string values.
+ * @param fieldname Fieldable containg string values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
@@ -336,7 +336,7 @@
* floats or strings. Once the type is determined, one of the other static methods
* in this class is called to get the comparator.
* @param reader Index to use.
- * @param fieldname Field containg values.
+ * @param fieldname Fieldable containg values.
* @return Comparator for sorting hits.
* @throws IOException If an error occurs reading the index.
*/
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/Similarity.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/Similarity.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/Similarity.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/Similarity.java Fri Jun 9 18:23:22 2006
@@ -16,19 +16,16 @@
* limitations under the License.
*/
+import org.apache.lucene.index.IndexReader;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
+import org.apache.lucene.util.SmallFloat;
+
import java.io.IOException;
import java.io.Serializable;
-
import java.util.Collection;
import java.util.Iterator;
-import org.apache.lucene.index.Term;
-
-import org.apache.lucene.index.IndexReader; // for javadoc
-import org.apache.lucene.index.IndexWriter; // for javadoc
-import org.apache.lucene.document.Field; // for javadoc
-import org.apache.lucene.util.SmallFloat;
-
/** Expert: Scoring API.
* <p>Subclasses implement search scoring.
*
@@ -44,7 +41,7 @@
* ( {@link #tf(int) tf}(t in d) *
* {@link #idf(Term,Searcher) idf}(t)^2 *
* {@link Query#getBoost getBoost}(t in q) *
- * {@link Field#getBoost getBoost}(t.field in d) *
+ * {@link org.apache.lucene.document.Field#getBoost getBoost}(t.field in d) *
* {@link #lengthNorm(String,int) lengthNorm}(t.field in d) )
* </small></td>
* <td valign="middle" rowspan="2"> *
@@ -152,7 +149,7 @@
* <i>fieldName</i> of <i>doc</i>.
* @return a normalization factor for hits on this field of this document
*
- * @see Field#setBoost(float)
+ * @see org.apache.lucene.document.Field#setBoost(float)
*/
public abstract float lengthNorm(String fieldName, int numTokens);
@@ -179,7 +176,7 @@
* small to represent are rounded up to the smallest positive representable
* value.
*
- * @see Field#setBoost(float)
+ * @see org.apache.lucene.document.Field#setBoost(float)
* @see SmallFloat
*/
public static byte encodeNorm(float f) {
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/Sort.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/Sort.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/Sort.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/Sort.java Fri Jun 9 18:23:22 2006
@@ -29,7 +29,7 @@
* and does not need to be stored (unless you happen to want it back with the
* rest of your document data). In other words:
*
- * <p><code>document.add (new Field ("byNumber", Integer.toString(x), Field.Store.NO, Field.Index.UN_TOKENIZED));</code></p>
+ * <p><code>document.add (new Fieldable ("byNumber", Integer.toString(x), Fieldable.Store.NO, Fieldable.Index.UN_TOKENIZED));</code></p>
*
*
* <p><h3>Valid Types of Values</h3>
Modified: lucene/java/trunk/src/java/org/apache/lucene/search/SortComparatorSource.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/search/SortComparatorSource.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/search/SortComparatorSource.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/search/SortComparatorSource.java Fri Jun 9 18:23:22 2006
@@ -19,7 +19,7 @@
/**
* Creates a comparator for the field in the given index.
* @param reader Index to create comparator for.
- * @param fieldname Field to create comparator for.
+ * @param fieldname Fieldable to create comparator for.
* @return Comparator of ScoreDoc objects.
* @throws IOException If an error occurs reading the index.
*/
Modified: lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java (original)
+++ lucene/java/trunk/src/java/org/apache/lucene/store/IndexInput.java Fri Jun 9 18:23:22 2006
@@ -116,6 +116,32 @@
}
}
+ /**
+ * Expert
+ *
+ * Similar to {@link #readChars(char[], int, int)} but does not do any conversion operations on the bytes it is reading in. It still
+ * has to invoke {@link #readByte()} just as {@link #readChars(char[], int, int)} does, but it does not need a buffer to store anything
+ * and it does not have to do any of the bitwise operations, since we don't actually care what is in the byte except to determine
+ * how many more bytes to read
+ * @param length The number of chars to read
+ */
+ public void skipChars(int length) throws IOException{
+ for (int i = 0; i < length; i++) {
+ byte b = readByte();
+ if ((b & 0x80) == 0){
+ //do nothing, we only need one byte
+ }
+ else if ((b & 0xE0) != 0xE0) {
+ readByte();//read an additional byte
+ } else{
+ //read two additional bytes.
+ readByte();
+ readByte();
+ }
+ }
+ }
+
+
/** Closes the stream to futher operations. */
public abstract void close() throws IOException;
Modified: lucene/java/trunk/src/test/org/apache/lucene/document/TestBinaryDocument.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/document/TestBinaryDocument.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/document/TestBinaryDocument.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/document/TestBinaryDocument.java Fri Jun 9 18:23:22 2006
@@ -38,10 +38,10 @@
public void testBinaryFieldInIndex()
throws Exception
{
- Field binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES);
- Field binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS);
- Field stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
- Field stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO);
+ Fieldable binaryFldStored = new Field("binaryStored", binaryValStored.getBytes(), Field.Store.YES);
+ Fieldable binaryFldCompressed = new Field("binaryCompressed", binaryValCompressed.getBytes(), Field.Store.COMPRESS);
+ Fieldable stringFldStored = new Field("stringStored", binaryValStored, Field.Store.YES, Field.Index.NO, Field.TermVector.NO);
+ Fieldable stringFldCompressed = new Field("stringCompressed", binaryValCompressed, Field.Store.COMPRESS, Field.Index.NO, Field.TermVector.NO);
try {
// binary fields with store off are not allowed
Modified: lucene/java/trunk/src/test/org/apache/lucene/document/TestDocument.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/document/TestDocument.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/document/TestDocument.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/document/TestDocument.java Fri Jun 9 18:23:22 2006
@@ -46,9 +46,9 @@
throws Exception
{
Document doc = new Document();
- Field stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO);
- Field binaryFld = new Field("binary", binaryVal.getBytes(), Field.Store.YES);
- Field binaryFld2 = new Field("binary", binaryVal2.getBytes(), Field.Store.YES);
+ Fieldable stringFld = new Field("string", binaryVal, Field.Store.YES, Field.Index.NO);
+ Fieldable binaryFld = new Field("binary", binaryVal.getBytes(), Field.Store.YES);
+ Fieldable binaryFld2 = new Field("binary", binaryVal2.getBytes(), Field.Store.YES);
doc.add(stringFld);
doc.add(binaryFld);
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/DocHelper.java Fri Jun 9 18:23:22 2006
@@ -18,12 +18,12 @@
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.document.*;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.Directory;
import java.io.IOException;
+import java.io.UnsupportedEncodingException;
import java.util.HashMap;
import java.util.Map;
import java.util.Enumeration;
@@ -39,6 +39,13 @@
public static final int [] FIELD_2_FREQS = {3, 1, 1};
public static final String TEXT_FIELD_2_KEY = "textField2";
public static Field textField2 = new Field(TEXT_FIELD_2_KEY, FIELD_2_TEXT, Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
+
+ public static final String FIELD_2_COMPRESSED_TEXT = "field field field two text";
+ //Fields will be lexicographically sorted. So, the order is: field, text, two
+ public static final int [] COMPRESSED_FIELD_2_FREQS = {3, 1, 1};
+ public static final String COMPRESSED_TEXT_FIELD_2_KEY = "compressedTextField2";
+ public static Field compressedTextField2 = new Field(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT, Field.Store.COMPRESS, Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
+
public static final String FIELD_3_TEXT = "aaaNoNorms aaaNoNorms bbbNoNorms";
public static final String TEXT_FIELD_3_KEY = "textField3";
@@ -71,6 +78,34 @@
public static Field unStoredField2 = new Field(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT,
Field.Store.NO, Field.Index.TOKENIZED, Field.TermVector.YES);
+ public static final String LAZY_FIELD_BINARY_KEY = "lazyFieldBinary";
+ public static byte [] LAZY_FIELD_BINARY_BYTES;
+ public static Field lazyFieldBinary;
+
+ public static final String LAZY_FIELD_KEY = "lazyField";
+ public static final String LAZY_FIELD_TEXT = "These are some field bytes";
+ public static Field lazyField = new Field(LAZY_FIELD_KEY, LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.TOKENIZED);
+
+ public static final String LARGE_LAZY_FIELD_KEY = "largeLazyField";
+ public static String LARGE_LAZY_FIELD_TEXT;
+ public static Field largeLazyField;
+
+ //From Issue 509
+ public static final String FIELD_UTF1_TEXT = "field one \u4e00text";
+ public static final String TEXT_FIELD_UTF1_KEY = "textField1Utf8";
+ public static Field textUtfField1 = new Field(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT,
+ Field.Store.YES, Field.Index.TOKENIZED, Field.TermVector.NO);
+
+ public static final String FIELD_UTF2_TEXT = "field field field \u4e00two text";
+ //Fields will be lexicographically sorted. So, the order is: field, text, two
+ public static final int [] FIELD_UTF2_FREQS = {3, 1, 1};
+ public static final String TEXT_FIELD_UTF2_KEY = "textField2Utf8";
+ public static Field textUtfField2 = new Field(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT, Field.Store.YES,
+ Field.Index.TOKENIZED, Field.TermVector.WITH_POSITIONS_OFFSETS);
+
+
+
+
public static Map nameValues = null;
// ordered list of all the fields...
@@ -79,14 +114,20 @@
textField1,
textField2,
textField3,
+ compressedTextField2,
keyField,
noNormsField,
unIndField,
unStoredField1,
unStoredField2,
+ textUtfField1,
+ textUtfField2,
+ lazyField,
+ lazyFieldBinary,//placeholder for binary field, since this is null. It must be second to last.
+ largeLazyField//placeholder for large field, since this is null. It must always be last
};
- // Map<String fieldName, Field field>
+ // Map<String fieldName, Fieldable field>
public static Map all=new HashMap();
public static Map indexed=new HashMap();
public static Map stored=new HashMap();
@@ -94,11 +135,28 @@
public static Map unindexed=new HashMap();
public static Map termvector=new HashMap();
public static Map notermvector=new HashMap();
+ public static Map lazy= new HashMap();
public static Map noNorms=new HashMap();
static {
+ //Initialize the large Lazy Field
+ StringBuffer buffer = new StringBuffer();
+ for (int i = 0; i < 10000; i++)
+ {
+ buffer.append("Lazily loading lengths of language in lieu of laughing ");
+ }
+
+ try {
+ LAZY_FIELD_BINARY_BYTES = "These are some binary field bytes".getBytes("UTF8");
+ } catch (UnsupportedEncodingException e) {
+ }
+ lazyFieldBinary = new Field(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES, Field.Store.YES);
+ fields[fields.length - 2] = lazyFieldBinary;
+ LARGE_LAZY_FIELD_TEXT = buffer.toString();
+ largeLazyField = new Field(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT, Field.Store.YES, Field.Index.TOKENIZED);
+ fields[fields.length - 1] = largeLazyField;
for (int i=0; i<fields.length; i++) {
- Field f = fields[i];
+ Fieldable f = fields[i];
add(all,f);
if (f.isIndexed()) add(indexed,f);
else add(unindexed,f);
@@ -107,11 +165,12 @@
if (f.isStored()) add(stored,f);
else add(unstored,f);
if (f.getOmitNorms()) add(noNorms,f);
+ if (f.isLazy()) add(lazy, f);
}
}
- private static void add(Map map, Field field) {
+ private static void add(Map map, Fieldable field) {
map.put(field.name(), field);
}
@@ -121,13 +180,19 @@
nameValues = new HashMap();
nameValues.put(TEXT_FIELD_1_KEY, FIELD_1_TEXT);
nameValues.put(TEXT_FIELD_2_KEY, FIELD_2_TEXT);
+ nameValues.put(COMPRESSED_TEXT_FIELD_2_KEY, FIELD_2_COMPRESSED_TEXT);
nameValues.put(TEXT_FIELD_3_KEY, FIELD_3_TEXT);
nameValues.put(KEYWORD_FIELD_KEY, KEYWORD_TEXT);
nameValues.put(NO_NORMS_KEY, NO_NORMS_TEXT);
nameValues.put(UNINDEXED_FIELD_KEY, UNINDEXED_FIELD_TEXT);
nameValues.put(UNSTORED_FIELD_1_KEY, UNSTORED_1_FIELD_TEXT);
nameValues.put(UNSTORED_FIELD_2_KEY, UNSTORED_2_FIELD_TEXT);
- }
+ nameValues.put(LAZY_FIELD_KEY, LAZY_FIELD_TEXT);
+ nameValues.put(LAZY_FIELD_BINARY_KEY, LAZY_FIELD_BINARY_BYTES);
+ nameValues.put(LARGE_LAZY_FIELD_KEY, LARGE_LAZY_FIELD_TEXT);
+ nameValues.put(TEXT_FIELD_UTF1_KEY, FIELD_UTF1_TEXT);
+ nameValues.put(TEXT_FIELD_UTF2_KEY, FIELD_UTF2_TEXT);
+ }
/**
* Adds the fields above to a document
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestDocumentWriter.java Fri Jun 9 18:23:22 2006
@@ -21,8 +21,7 @@
import org.apache.lucene.analysis.WhitespaceAnalyzer;
import org.apache.lucene.analysis.TokenStream;
import org.apache.lucene.analysis.WhitespaceTokenizer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.document.*;
import org.apache.lucene.search.Similarity;
import org.apache.lucene.store.RAMDirectory;
@@ -64,7 +63,7 @@
assertTrue(doc != null);
//System.out.println("Document: " + doc);
- Field [] fields = doc.getFields("textField2");
+ Fieldable [] fields = doc.getFields("textField2");
assertTrue(fields != null && fields.length == 1);
assertTrue(fields[0].stringValue().equals(DocHelper.FIELD_2_TEXT));
assertTrue(fields[0].isTermVectorStored());
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestFieldsReader.java Fri Jun 9 18:23:22 2006
@@ -17,13 +17,18 @@
*/
import junit.framework.TestCase;
-import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
import org.apache.lucene.analysis.WhitespaceAnalyzer;
+import org.apache.lucene.document.*;
import org.apache.lucene.search.Similarity;
+import org.apache.lucene.store.FSDirectory;
+import org.apache.lucene.store.RAMDirectory;
+import java.io.File;
import java.io.IOException;
+import java.util.Collections;
+import java.util.HashSet;
+import java.util.Set;
+import java.util.Enumeration;
public class TestFieldsReader extends TestCase {
private RAMDirectory dir = new RAMDirectory();
@@ -50,19 +55,19 @@
FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
assertTrue(reader != null);
assertTrue(reader.size() == 1);
- Document doc = reader.doc(0);
+ Document doc = reader.doc(0, null);
assertTrue(doc != null);
- assertTrue(doc.getField("textField1") != null);
-
- Field field = doc.getField("textField2");
+ assertTrue(doc.getField(DocHelper.TEXT_FIELD_1_KEY) != null);
+
+ Fieldable field = doc.getField(DocHelper.TEXT_FIELD_2_KEY);
assertTrue(field != null);
assertTrue(field.isTermVectorStored() == true);
-
+
assertTrue(field.isStoreOffsetWithTermVector() == true);
assertTrue(field.isStorePositionWithTermVector() == true);
assertTrue(field.getOmitNorms() == false);
- field = doc.getField("textField3");
+ field = doc.getField(DocHelper.TEXT_FIELD_3_KEY);
assertTrue(field != null);
assertTrue(field.isTermVectorStored() == false);
assertTrue(field.isStoreOffsetWithTermVector() == false);
@@ -72,4 +77,144 @@
reader.close();
}
+
+
+ public void testLazyFields() throws Exception {
+ assertTrue(dir != null);
+ assertTrue(fieldInfos != null);
+ FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+ assertTrue(reader != null);
+ assertTrue(reader.size() == 1);
+ Set loadFieldNames = new HashSet();
+ loadFieldNames.add(DocHelper.TEXT_FIELD_1_KEY);
+ loadFieldNames.add(DocHelper.TEXT_FIELD_UTF1_KEY);
+ Set lazyFieldNames = new HashSet();
+ //new String[]{DocHelper.LARGE_LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_KEY, DocHelper.LAZY_FIELD_BINARY_KEY};
+ lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
+ lazyFieldNames.add(DocHelper.LAZY_FIELD_KEY);
+ lazyFieldNames.add(DocHelper.LAZY_FIELD_BINARY_KEY);
+ lazyFieldNames.add(DocHelper.TEXT_FIELD_UTF2_KEY);
+ SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(loadFieldNames, lazyFieldNames);
+ Document doc = reader.doc(0, fieldSelector);
+ assertTrue("doc is null and it shouldn't be", doc != null);
+ Fieldable field = doc.getField(DocHelper.LAZY_FIELD_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("field is not lazy and it should be", field.isLazy());
+ String value = field.stringValue();
+ assertTrue("value is null and it shouldn't be", value != null);
+ assertTrue(value + " is not equal to " + DocHelper.LAZY_FIELD_TEXT, value.equals(DocHelper.LAZY_FIELD_TEXT) == true);
+ field = doc.getField(DocHelper.TEXT_FIELD_1_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("Field is lazy and it should not be", field.isLazy() == false);
+ field = doc.getField(DocHelper.TEXT_FIELD_UTF1_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("Field is lazy and it should not be", field.isLazy() == false);
+ assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF1_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF1_TEXT) == true);
+
+ field = doc.getField(DocHelper.TEXT_FIELD_UTF2_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ assertTrue("Field is lazy and it should not be", field.isLazy() == true);
+ assertTrue(field.stringValue() + " is not equal to " + DocHelper.FIELD_UTF2_TEXT, field.stringValue().equals(DocHelper.FIELD_UTF2_TEXT) == true);
+
+ field = doc.getField(DocHelper.LAZY_FIELD_BINARY_KEY);
+ assertTrue("field is null and it shouldn't be", field != null);
+ byte [] bytes = field.binaryValue();
+ assertTrue("bytes is null and it shouldn't be", bytes != null);
+ assertTrue("", DocHelper.LAZY_FIELD_BINARY_BYTES.length == bytes.length);
+ for (int i = 0; i < bytes.length; i++) {
+ assertTrue("byte[" + i + "] is mismatched", bytes[i] == DocHelper.LAZY_FIELD_BINARY_BYTES[i]);
+
+ }
+ }
+
+ public void testLoadFirst() throws Exception {
+ assertTrue(dir != null);
+ assertTrue(fieldInfos != null);
+ FieldsReader reader = new FieldsReader(dir, "test", fieldInfos);
+ assertTrue(reader != null);
+ assertTrue(reader.size() == 1);
+ LoadFirstFieldSelector fieldSelector = new LoadFirstFieldSelector();
+ Document doc = reader.doc(0, fieldSelector);
+ assertTrue("doc is null and it shouldn't be", doc != null);
+ int count = 0;
+ Enumeration enumeration = doc.fields();
+ while (enumeration.hasMoreElements()) {
+ Field field = (Field) enumeration.nextElement();
+ assertTrue("field is null and it shouldn't be", field != null);
+ String sv = field.stringValue();
+ assertTrue("sv is null and it shouldn't be", sv != null);
+ count++;
+ }
+ assertTrue(count + " does not equal: " + 1, count == 1);
+ }
+
+ /**
+ * Not really a test per se, but we should have some way of assessing whether this is worthwhile.
+ * <p/>
+ * Must test using a File based directory
+ *
+ * @throws Exception
+ */
+ public void testLazyPerformance() throws Exception {
+ String tmpIODir = System.getProperty("java.io.tmpdir");
+ String path = tmpIODir + File.separator + "lazyDir";
+ File file = new File(path);
+ FSDirectory tmpDir = FSDirectory.getDirectory(file, true);
+ assertTrue(tmpDir != null);
+ DocumentWriter writer = new DocumentWriter(tmpDir, new WhitespaceAnalyzer(),
+ Similarity.getDefault(), 50);
+ assertTrue(writer != null);
+ writer.addDocument("test", testDoc);
+ assertTrue(fieldInfos != null);
+ FieldsReader reader;
+ long lazyTime = 0;
+ long regularTime = 0;
+ int length = 50;
+ Set lazyFieldNames = new HashSet();
+ lazyFieldNames.add(DocHelper.LARGE_LAZY_FIELD_KEY);
+ SetBasedFieldSelector fieldSelector = new SetBasedFieldSelector(Collections.EMPTY_SET, lazyFieldNames);
+
+ for (int i = 0; i < length; i++) {
+ reader = new FieldsReader(tmpDir, "test", fieldInfos);
+ assertTrue(reader != null);
+ assertTrue(reader.size() == 1);
+
+ Document doc;
+ doc = reader.doc(0, null);//Load all of them
+ assertTrue("doc is null and it shouldn't be", doc != null);
+ Fieldable field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY);
+ assertTrue("field is lazy", field.isLazy() == false);
+ String value;
+ long start;
+ long finish;
+ start = System.currentTimeMillis();
+ //On my machine this was always 0ms.
+ value = field.stringValue();
+ finish = System.currentTimeMillis();
+ assertTrue("value is null and it shouldn't be", value != null);
+ assertTrue("field is null and it shouldn't be", field != null);
+ regularTime += (finish - start);
+ reader.close();
+ reader = null;
+ doc = null;
+ //Hmmm, are we still in cache???
+ System.gc();
+ reader = new FieldsReader(tmpDir, "test", fieldInfos);
+ doc = reader.doc(0, fieldSelector);
+ field = doc.getField(DocHelper.LARGE_LAZY_FIELD_KEY);
+ assertTrue("field is not lazy", field.isLazy() == true);
+ start = System.currentTimeMillis();
+ //On my machine this took around 50 - 70ms
+ value = field.stringValue();
+ finish = System.currentTimeMillis();
+ assertTrue("value is null and it shouldn't be", value != null);
+ lazyTime += (finish - start);
+ reader.close();
+
+ }
+ System.out.println("Average Non-lazy time (should be very close to zero): " + regularTime / length + " ms for " + length + " reads");
+ System.out.println("Average Lazy Time (should be greater than zero): " + lazyTime / length + " ms for " + length + " reads");
+ }
+
+
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexInput.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexInput.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexInput.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexInput.java Fri Jun 9 18:23:22 2006
@@ -22,16 +22,56 @@
import java.io.IOException;
public class TestIndexInput extends TestCase {
- public void testRead() throws IOException {
- IndexInput is = new MockIndexInput(new byte[] { (byte) 0x80, 0x01,
- (byte) 0xFF, 0x7F,
- (byte) 0x80, (byte) 0x80, 0x01,
- (byte) 0x81, (byte) 0x80, 0x01,
- 0x06, 'L', 'u', 'c', 'e', 'n', 'e'});
- assertEquals(128,is.readVInt());
- assertEquals(16383,is.readVInt());
- assertEquals(16384,is.readVInt());
- assertEquals(16385,is.readVInt());
- assertEquals("Lucene",is.readString());
- }
+ public void testRead() throws IOException {
+ IndexInput is = new MockIndexInput(new byte[]{(byte) 0x80, 0x01,
+ (byte) 0xFF, 0x7F,
+ (byte) 0x80, (byte) 0x80, 0x01,
+ (byte) 0x81, (byte) 0x80, 0x01,
+ 0x06, 'L', 'u', 'c', 'e', 'n', 'e'});
+ assertEquals(128, is.readVInt());
+ assertEquals(16383, is.readVInt());
+ assertEquals(16384, is.readVInt());
+ assertEquals(16385, is.readVInt());
+ assertEquals("Lucene", is.readString());
+ }
+
+ /**
+ * Expert
+ *
+ * @throws IOException
+ */
+ public void testSkipChars() throws IOException {
+ byte[] bytes = new byte[]{(byte) 0x80, 0x01,
+ (byte) 0xFF, 0x7F,
+ (byte) 0x80, (byte) 0x80, 0x01,
+ (byte) 0x81, (byte) 0x80, 0x01,
+ 0x06, 'L', 'u', 'c', 'e', 'n', 'e',
+ };
+ String utf8Str = "\u0634\u1ea1";
+ byte [] utf8Bytes = utf8Str.getBytes("UTF-8");
+ byte [] theBytes = new byte[bytes.length + 1 + utf8Bytes.length];
+ System.arraycopy(bytes, 0, theBytes, 0, bytes.length);
+ theBytes[bytes.length] = (byte)utf8Str.length();//Add in the number of chars we are storing, which should fit in a byte for this test
+ System.arraycopy(utf8Bytes, 0, theBytes, bytes.length + 1, utf8Bytes.length);
+ IndexInput is = new MockIndexInput(theBytes);
+ assertEquals(128, is.readVInt());
+ assertEquals(16383, is.readVInt());
+ assertEquals(16384, is.readVInt());
+ assertEquals(16385, is.readVInt());
+ int charsToRead = is.readVInt();//number of chars in the Lucene string
+ assertTrue(0x06 + " does not equal: " + charsToRead, 0x06 == charsToRead);
+ is.skipChars(3);
+ char [] chars = new char[3];//there should be 6 chars remaining
+ is.readChars(chars, 0, 3);
+ String tmpStr = new String(chars);
+ assertTrue(tmpStr + " is not equal to " + "ene", tmpStr.equals("ene" ) == true);
+ //Now read the UTF8 stuff
+ charsToRead = is.readVInt() - 1;//since we are skipping one
+ is.skipChars(1);
+ assertTrue(utf8Str.length() - 1 + " does not equal: " + charsToRead, utf8Str.length() - 1 == charsToRead);
+ chars = new char[charsToRead];
+ is.readChars(chars, 0, charsToRead);
+ tmpStr = new String(chars);
+ assertTrue(tmpStr + " is not equal to " + utf8Str.substring(1), tmpStr.equals(utf8Str.substring(1)) == true);
+ }
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexModifier.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexModifier.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexModifier.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestIndexModifier.java Fri Jun 9 18:23:22 2006
@@ -22,6 +22,8 @@
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Field.Index;
+import org.apache.lucene.document.Field.Store;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.store.RAMDirectory;
@@ -270,9 +272,9 @@
id++;
}
// add random stuff:
- doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES,
+ doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES,
Field.Index.TOKENIZED));
- doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES,
+ doc.add(new Field("content", new Integer(random.nextInt(1000)).toString(), Field.Store.YES,
Field.Index.TOKENIZED));
doc.add(new Field("all", "x", Field.Store.YES, Field.Index.TOKENIZED));
return doc;
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestParallelReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestParallelReader.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestParallelReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestParallelReader.java Fri Jun 9 18:23:22 2006
@@ -16,20 +16,25 @@
* limitations under the License.
*/
-import java.io.IOException;
-import java.util.Collection;
-
import junit.framework.TestCase;
-
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
-import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.index.Term;
+import org.apache.lucene.document.MapFieldSelector;
+import org.apache.lucene.search.BooleanClause.Occur;
+import org.apache.lucene.search.BooleanQuery;
+import org.apache.lucene.search.Hits;
+import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.search.Query;
+import org.apache.lucene.search.Searcher;
+import org.apache.lucene.search.TermQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.search.BooleanClause.Occur;
-import org.apache.lucene.search.*;
+
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.Enumeration;
public class TestParallelReader extends TestCase {
@@ -69,6 +74,35 @@
assertTrue(fieldNames.contains("f2"));
assertTrue(fieldNames.contains("f3"));
assertTrue(fieldNames.contains("f4"));
+ }
+
+ public void testDocument() throws IOException {
+ Directory dir1 = getDir1();
+ Directory dir2 = getDir2();
+ ParallelReader pr = new ParallelReader();
+ pr.add(IndexReader.open(dir1));
+ pr.add(IndexReader.open(dir2));
+
+ Document doc11 = pr.document(0, new MapFieldSelector(new String[] {"f1"}));
+ Document doc24 = pr.document(1, new MapFieldSelector(Arrays.asList(new String[] {"f4"})));
+ Document doc223 = pr.document(1, new MapFieldSelector(new String[] {"f2", "f3"}));
+
+ assertEquals(1, numFields(doc11));
+ assertEquals(1, numFields(doc24));
+ assertEquals(2, numFields(doc223));
+
+ assertEquals("v1", doc11.get("f1"));
+ assertEquals("v2", doc24.get("f4"));
+ assertEquals("v2", doc223.get("f2"));
+ assertEquals("v2", doc223.get("f3"));
+ }
+
+ private int numFields(Document doc) {
+ int num;
+ Enumeration e = doc.fields();
+ for (num=0; e.hasMoreElements(); num++)
+ e.nextElement();
+ return num;
}
public void testIncompatibleIndexes() throws IOException {
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentMerger.java Fri Jun 9 18:23:22 2006
@@ -87,7 +87,7 @@
Collection stored = mergedReader.getFieldNames(IndexReader.FieldOption.INDEXED_WITH_TERMVECTOR);
assertTrue(stored != null);
//System.out.println("stored size: " + stored.size());
- assertTrue(stored.size() == 2);
+ assertTrue("We do not have 4 fields that were indexed with term vector",stored.size() == 4);
TermFreqVector vector = mergedReader.getTermFreqVector(0, DocHelper.TEXT_FIELD_2_KEY);
assertTrue(vector != null);
Modified: lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentReader.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentReader.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentReader.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/index/TestSegmentReader.java Fri Jun 9 18:23:22 2006
@@ -19,7 +19,7 @@
import junit.framework.TestCase;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.document.Fieldable;
import org.apache.lucene.search.DefaultSimilarity;
import java.io.IOException;
@@ -64,7 +64,7 @@
Enumeration fields = result.fields();
while (fields.hasMoreElements()) {
- Field field = (Field) fields.nextElement();
+ Fieldable field = (Fieldable) fields.nextElement();
assertTrue(field != null);
assertTrue(DocHelper.nameValues.containsKey(field.name()));
}
@@ -166,7 +166,7 @@
public static void checkNorms(IndexReader reader) throws IOException {
// test omit norms
for (int i=0; i<DocHelper.fields.length; i++) {
- Field f = DocHelper.fields[i];
+ Fieldable f = DocHelper.fields[i];
if (f.isIndexed()) {
assertEquals(reader.hasNorms(f.name()), !f.getOmitNorms());
assertEquals(reader.hasNorms(f.name()), !DocHelper.noNorms.containsKey(f.name()));
@@ -202,7 +202,7 @@
TermFreqVector [] results = reader.getTermFreqVectors(0);
assertTrue(results != null);
- assertTrue(results.length == 2);
+ assertTrue("We do not have 4 term freq vectors, we have: " + results.length, results.length == 4);
}
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestDocBoost.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestDocBoost.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestDocBoost.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestDocBoost.java Fri Jun 9 18:23:22 2006
@@ -17,14 +17,11 @@
*/
import junit.framework.TestCase;
-
-import org.apache.lucene.index.Term;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
-import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.analysis.SimpleAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
/** Document boost unit test.
*
@@ -35,27 +32,27 @@
public TestDocBoost(String name) {
super(name);
}
-
+
public void testDocBoost() throws Exception {
RAMDirectory store = new RAMDirectory();
IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true);
-
- Field f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
- Field f2 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
+
+ Fieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
+ Fieldable f2 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
f2.setBoost(2.0f);
-
+
Document d1 = new Document();
Document d2 = new Document();
Document d3 = new Document();
Document d4 = new Document();
d3.setBoost(3.0f);
d4.setBoost(2.0f);
-
+
d1.add(f1); // boost = 1
d2.add(f2); // boost = 2
d3.add(f1); // boost = 3
d4.add(f2); // boost = 4
-
+
writer.addDocument(d1);
writer.addDocument(d2);
writer.addDocument(d3);
@@ -72,7 +69,7 @@
scores[doc] = score;
}
});
-
+
float lastScore = 0.0f;
for (int i = 0; i < 4; i++) {
Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java Fri Jun 9 18:23:22 2006
@@ -16,19 +16,17 @@
* limitations under the License.
*/
-import java.io.IOException;
-
import junit.framework.TestCase;
-
import org.apache.lucene.analysis.SimpleAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.English;
+import java.io.IOException;
+
/**
* @author Bernhard Messer
* @version $rcs = ' $Id$ ' ;
@@ -49,7 +47,7 @@
//writer.infoStream = System.out;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
- Field fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES);
+ Fieldable fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES);
doc.add(fld);
writer.addDocument(doc);
}
Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestPhraseQuery.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestPhraseQuery.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestPhraseQuery.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestPhraseQuery.java Fri Jun 9 18:23:22 2006
@@ -16,22 +16,17 @@
* limitations under the License.
*/
-import java.io.IOException;
-import java.io.Reader;
-
import junit.framework.TestCase;
-import org.apache.lucene.analysis.WhitespaceAnalyzer;
-import org.apache.lucene.analysis.StopAnalyzer;
-import org.apache.lucene.analysis.Analyzer;
-import org.apache.lucene.analysis.TokenStream;
-import org.apache.lucene.analysis.WhitespaceTokenizer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
+import org.apache.lucene.analysis.*;
+import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.RAMDirectory;
+import java.io.IOException;
+import java.io.Reader;
+
/**
* Tests {@link PhraseQuery}.
*
@@ -59,7 +54,7 @@
Document doc = new Document();
doc.add(new Field("field", "one two three four five", Field.Store.YES, Field.Index.TOKENIZED));
doc.add(new Field("repeated", "this is a repeated field - first part", Field.Store.YES, Field.Index.TOKENIZED));
- Field repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED);
+ Fieldable repeatedField = new Field("repeated", "second part of a repeated field", Field.Store.YES, Field.Index.TOKENIZED);
doc.add(repeatedField);
writer.addDocument(doc);
Modified: lucene/java/trunk/src/test/org/apache/lucene/search/TestSetNorm.java
URL: http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestSetNorm.java?rev=413201&r1=413200&r2=413201&view=diff
==============================================================================
--- lucene/java/trunk/src/test/org/apache/lucene/search/TestSetNorm.java (original)
+++ lucene/java/trunk/src/test/org/apache/lucene/search/TestSetNorm.java Fri Jun 9 18:23:22 2006
@@ -17,15 +17,12 @@
*/
import junit.framework.TestCase;
-
-import org.apache.lucene.index.Term;
-import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.analysis.SimpleAnalyzer;
+import org.apache.lucene.document.*;
import org.apache.lucene.index.IndexReader;
-import org.apache.lucene.search.IndexSearcher;
+import org.apache.lucene.index.IndexWriter;
+import org.apache.lucene.index.Term;
import org.apache.lucene.store.RAMDirectory;
-import org.apache.lucene.analysis.SimpleAnalyzer;
-import org.apache.lucene.document.Document;
-import org.apache.lucene.document.Field;
/** Document boost unit test.
*
@@ -36,13 +33,13 @@
public TestSetNorm(String name) {
super(name);
}
-
+
public void testSetNorm() throws Exception {
RAMDirectory store = new RAMDirectory();
IndexWriter writer = new IndexWriter(store, new SimpleAnalyzer(), true);
-
+
// add the same document four times
- Field f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
+ Fieldable f1 = new Field("field", "word", Field.Store.YES, Field.Index.TOKENIZED);
Document d1 = new Document();
d1.add(f1);
writer.addDocument(d1);
@@ -69,7 +66,7 @@
scores[doc] = score;
}
});
-
+
float lastScore = 0.0f;
for (int i = 0; i < 4; i++) {