You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by go...@apache.org on 2004/10/06 12:40:23 UTC
cvs commit: jakarta-lucene/src/test/org/apache/lucene/search TestMultiThreadTermVectors.java
goller 2004/10/06 03:40:23
Modified: src/java/org/apache/lucene/index IndexReader.java
TermVectorsReader.java SegmentReader.java
Added: src/test/org/apache/lucene/search
TestMultiThreadTermVectors.java
Log:
Remove synchronization from TermVectors
(Patch #30736)
Revision Changes Path
1.39 +2 -2 jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java
Index: IndexReader.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java,v
retrieving revision 1.38
retrieving revision 1.39
diff -u -r1.38 -r1.39
--- IndexReader.java 6 Oct 2004 09:05:56 -0000 1.38
+++ IndexReader.java 6 Oct 2004 10:40:23 -0000 1.39
@@ -532,7 +532,7 @@
protected abstract void doClose() throws IOException;
/** Release the write lock, if needed. */
- protected final void finalize() {
+ protected void finalize() {
if (writeLock != null) {
writeLock.release(); // release write lock
writeLock = null;
1.6 +22 -11 jakarta-lucene/src/java/org/apache/lucene/index/TermVectorsReader.java
Index: TermVectorsReader.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermVectorsReader.java,v
retrieving revision 1.5
retrieving revision 1.6
diff -u -r1.5 -r1.6
--- TermVectorsReader.java 5 Oct 2004 17:30:47 -0000 1.5
+++ TermVectorsReader.java 6 Oct 2004 10:40:23 -0000 1.6
@@ -22,11 +22,9 @@
import java.io.IOException;
/**
- * FIXME: relax synchro!
- *
* @version $Id$
*/
-class TermVectorsReader {
+class TermVectorsReader implements Cloneable {
private FieldInfos fieldInfos;
private IndexInput tvx;
@@ -86,9 +84,9 @@
* @param docNum The document number to retrieve the vector for
* @param field The field within the document to retrieve
* @return The TermFreqVector for the document and field or null if there is no termVector for this field.
- * @throws IOException
+ * @throws IOException if there is an error reading the term vector files
*/
- synchronized TermFreqVector get(int docNum, String field) throws IOException {
+ TermFreqVector get(int docNum, String field) throws IOException {
// Check if no term vectors are available for this segment at all
int fieldNumber = fieldInfos.fieldNumber(field);
TermFreqVector result = null;
@@ -137,13 +135,14 @@
return result;
}
-
/**
- * Return all term vectors stored for this document or null if there are no term vectors
- * for the document.
- * @throws IOException
+ * Return all term vectors stored for this document or null if the could not be read in.
+ *
+ * @param docNum The document number to retrieve the vector for
+ * @return All term frequency vectors
+ * @throws IOException if there is an error reading the term vector files
*/
- synchronized TermFreqVector[] get(int docNum) throws IOException {
+ TermFreqVector[] get(int docNum) throws IOException {
TermFreqVector[] result = null;
// Check if no term vectors are available for this segment at all
if (tvx != null) {
@@ -295,4 +294,16 @@
return tv;
}
+ protected Object clone() {
+ TermVectorsReader clone = null;
+ try {
+ clone = (TermVectorsReader) super.clone();
+ } catch (CloneNotSupportedException e) {}
+
+ clone.tvx = (IndexInput) tvx.clone();
+ clone.tvd = (IndexInput) tvd.clone();
+ clone.tvf = (IndexInput) tvf.clone();
+
+ return clone;
+ }
}
1.32 +31 -9 jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java
Index: SegmentReader.java
===================================================================
RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java,v
retrieving revision 1.31
retrieving revision 1.32
diff -u -r1.31 -r1.32
--- SegmentReader.java 6 Oct 2004 09:05:56 -0000 1.31
+++ SegmentReader.java 6 Oct 2004 10:40:23 -0000 1.32
@@ -43,7 +43,8 @@
private FieldsReader fieldsReader;
TermInfosReader tis;
- TermVectorsReader termVectorsReader;
+ TermVectorsReader termVectorsReaderOrig = null;
+ ThreadLocal termVectorsLocal = new ThreadLocal();
BitVector deletedDocs = null;
private boolean deletedDocsDirty = false;
@@ -156,9 +157,15 @@
openNorms(cfsDir);
if (fieldInfos.hasVectors()) { // open term vector files only as needed
- termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos);
+ termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
}
}
+
+ protected void finalize() {
+ // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
+ termVectorsLocal.set(null);
+ super.finalize();
+ }
protected void doCommit() throws IOException {
if (deletedDocsDirty) { // re-write deleted
@@ -193,8 +200,8 @@
closeNorms();
- if (termVectorsReader != null)
- termVectorsReader.close();
+ if (termVectorsReaderOrig != null)
+ termVectorsReaderOrig.close();
if (cfsReader != null)
cfsReader.close();
@@ -456,6 +463,19 @@
}
}
+ /**
+ * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
+ * @return TermVectorsReader
+ */
+ private TermVectorsReader getTermVectorsReader() {
+ TermVectorsReader tvReader = (TermVectorsReader)termVectorsLocal.get();
+ if (tvReader == null) {
+ tvReader = (TermVectorsReader)termVectorsReaderOrig.clone();
+ termVectorsLocal.set(tvReader);
+ }
+ return tvReader;
+ }
+
/** Return a term frequency vector for the specified document and field. The
* vector returned contains term numbers and frequencies for all terms in
* the specified field of this document, if the field had storeTermVector
@@ -465,9 +485,10 @@
public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
// Check if this field is invalid or has no stored term vector
FieldInfo fi = fieldInfos.fieldInfo(field);
- if (fi == null || !fi.storeTermVector || termVectorsReader == null)
+ if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null)
return null;
-
+
+ TermVectorsReader termVectorsReader = getTermVectorsReader();
return termVectorsReader.get(docNumber, field);
}
@@ -480,9 +501,10 @@
* @throws IOException
*/
public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
- if (termVectorsReader == null)
+ if (termVectorsReaderOrig == null)
return null;
-
+
+ TermVectorsReader termVectorsReader = getTermVectorsReader();
return termVectorsReader.get(docNumber);
}
}
1.1 jakarta-lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java
Index: TestMultiThreadTermVectors.java
===================================================================
package org.apache.lucene.search;
/**
* Copyright 2004 The Apache Software Foundation
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
import java.io.IOException;
import junit.framework.TestCase;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.TermFreqVector;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.English;
/**
* @author Bernhard Messer
* @version $rcs = ' $Id: TestMultiThreadTermVectors.java,v 1.1 2004/10/06 10:40:23 goller Exp $ ' ;
*/
public class TestMultiThreadTermVectors extends TestCase {
private RAMDirectory directory = new RAMDirectory();
public int numDocs = 100;
public int numThreads = 3;
public TestMultiThreadTermVectors(String s) {
super(s);
}
public void setUp() throws Exception {
IndexWriter writer
= new IndexWriter(directory, new SimpleAnalyzer(), true);
//writer.setUseCompoundFile(false);
//writer.infoStream = System.out;
for (int i = 0; i < numDocs; i++) {
Document doc = new Document();
Field fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES);
doc.add(fld);
writer.addDocument(doc);
}
writer.close();
}
public void test() {
IndexReader reader = null;
try {
reader = IndexReader.open(directory);
for(int i = 1; i <= numThreads; i++)
testTermPositionVectors(reader, i);
}
catch (IOException ioe) {
fail(ioe.getMessage());
}
finally {
if (reader != null) {
try {
/** close the opened reader */
reader.close();
} catch (IOException ioe) {
ioe.printStackTrace();
}
}
}
}
public void testTermPositionVectors(final IndexReader reader, int threadCount) {
MultiThreadTermVectorsReader[] mtr = new MultiThreadTermVectorsReader[threadCount];
for (int i = 0; i < threadCount; i++) {
mtr[i] = new MultiThreadTermVectorsReader();
mtr[i].init(reader);
}
/** run until all threads finished */
int threadsAlive = mtr.length;
while (threadsAlive > 0) {
try {
//System.out.println("Threads alive");
Thread.sleep(10);
threadsAlive = mtr.length;
for (int i = 0; i < mtr.length; i++) {
if (mtr[i].isAlive() == true) {
break;
}
threadsAlive--;
}
} catch (InterruptedException ie) {}
}
long totalTime = 0L;
for (int i = 0; i < mtr.length; i++) {
totalTime += mtr[i].timeElapsed;
mtr[i] = null;
}
//System.out.println("threadcount: " + mtr.length + " average term vector time: " + totalTime/mtr.length);
}
}
class MultiThreadTermVectorsReader implements Runnable {
private IndexReader reader = null;
private Thread t = null;
private final int runsToDo = 100;
long timeElapsed = 0;
public void init(IndexReader reader) {
this.reader = reader;
timeElapsed = 0;
t=new Thread(this);
t.start();
}
public boolean isAlive() {
if (t == null) return false;
return t.isAlive();
}
public void run() {
try {
// run the test 100 times
for (int i = 0; i < runsToDo; i++)
testTermVectors();
}
catch (Exception e) {
e.printStackTrace();
}
return;
}
private void testTermVectors() throws Exception {
// check:
int numDocs = reader.numDocs();
long start = 0L;
for (int docId = 0; docId < numDocs; docId++) {
start = System.currentTimeMillis();
TermFreqVector [] vectors = reader.getTermFreqVectors(docId);
timeElapsed += System.currentTimeMillis()-start;
// verify vectors result
verifyVectors(vectors, docId);
start = System.currentTimeMillis();
TermFreqVector vector = reader.getTermFreqVector(docId, "field");
timeElapsed += System.currentTimeMillis()-start;
vectors = new TermFreqVector[1];
vectors[0] = vector;
verifyVectors(vectors, docId);
}
}
private void verifyVectors(TermFreqVector[] vectors, int num) {
StringBuffer temp = new StringBuffer();
String[] terms = null;
for (int i = 0; i < vectors.length; i++) {
terms = vectors[i].getTerms();
for (int z = 0; z < terms.length; z++) {
temp.append(terms[z]);
}
}
if (!English.intToEnglish(num).trim().equals(temp.toString().trim()))
System.out.println("worng term result");
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org