You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by go...@apache.org on 2004/10/06 12:40:23 UTC

cvs commit: jakarta-lucene/src/test/org/apache/lucene/search TestMultiThreadTermVectors.java

goller      2004/10/06 03:40:23

  Modified:    src/java/org/apache/lucene/index IndexReader.java
                        TermVectorsReader.java SegmentReader.java
  Added:       src/test/org/apache/lucene/search
                        TestMultiThreadTermVectors.java
  Log:
  Remove synchronization from TermVectors
  (Patch #30736)
  
  Revision  Changes    Path
  1.39      +2 -2      jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java
  
  Index: IndexReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/IndexReader.java,v
  retrieving revision 1.38
  retrieving revision 1.39
  diff -u -r1.38 -r1.39
  --- IndexReader.java	6 Oct 2004 09:05:56 -0000	1.38
  +++ IndexReader.java	6 Oct 2004 10:40:23 -0000	1.39
  @@ -532,7 +532,7 @@
     protected abstract void doClose() throws IOException;
   
     /** Release the write lock, if needed. */
  -  protected final void finalize() {
  +  protected void finalize() {
       if (writeLock != null) {
         writeLock.release();                        // release write lock
         writeLock = null;
  
  
  
  1.6       +22 -11    jakarta-lucene/src/java/org/apache/lucene/index/TermVectorsReader.java
  
  Index: TermVectorsReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/TermVectorsReader.java,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- TermVectorsReader.java	5 Oct 2004 17:30:47 -0000	1.5
  +++ TermVectorsReader.java	6 Oct 2004 10:40:23 -0000	1.6
  @@ -22,11 +22,9 @@
   import java.io.IOException;
   
   /**
  - * FIXME: relax synchro!
  - *
    * @version $Id$
    */
  -class TermVectorsReader {
  +class TermVectorsReader implements Cloneable {
     private FieldInfos fieldInfos;
   
     private IndexInput tvx;
  @@ -86,9 +84,9 @@
      * @param docNum The document number to retrieve the vector for
      * @param field The field within the document to retrieve
      * @return The TermFreqVector for the document and field or null if there is no termVector for this field.
  -   * @throws IOException
  +   * @throws IOException if there is an error reading the term vector files
      */ 
  -  synchronized TermFreqVector get(int docNum, String field) throws IOException {
  +  TermFreqVector get(int docNum, String field) throws IOException {
       // Check if no term vectors are available for this segment at all
       int fieldNumber = fieldInfos.fieldNumber(field);
       TermFreqVector result = null;
  @@ -137,13 +135,14 @@
       return result;
     }
   
  -
     /**
  -   * Return all term vectors stored for this document or null if there are no term vectors
  -   * for the document.
  -   * @throws IOException
  +   * Return all term vectors stored for this document or null if the could not be read in.
  +   * 
  +   * @param docNum The document number to retrieve the vector for
  +   * @return All term frequency vectors
  +   * @throws IOException if there is an error reading the term vector files 
      */
  -  synchronized TermFreqVector[] get(int docNum) throws IOException {
  +  TermFreqVector[] get(int docNum) throws IOException {
       TermFreqVector[] result = null;
       // Check if no term vectors are available for this segment at all
       if (tvx != null) {
  @@ -295,4 +294,16 @@
       return tv;
     }
   
  +  protected Object clone() {
  +    TermVectorsReader clone = null;
  +    try {
  +      clone = (TermVectorsReader) super.clone();
  +    } catch (CloneNotSupportedException e) {}
  +
  +    clone.tvx = (IndexInput) tvx.clone();
  +    clone.tvd = (IndexInput) tvd.clone();
  +    clone.tvf = (IndexInput) tvf.clone();
  +    
  +    return clone;
  +  }
   }
  
  
  
  1.32      +31 -9     jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java
  
  Index: SegmentReader.java
  ===================================================================
  RCS file: /home/cvs/jakarta-lucene/src/java/org/apache/lucene/index/SegmentReader.java,v
  retrieving revision 1.31
  retrieving revision 1.32
  diff -u -r1.31 -r1.32
  --- SegmentReader.java	6 Oct 2004 09:05:56 -0000	1.31
  +++ SegmentReader.java	6 Oct 2004 10:40:23 -0000	1.32
  @@ -43,7 +43,8 @@
     private FieldsReader fieldsReader;
   
     TermInfosReader tis;
  -  TermVectorsReader termVectorsReader;
  +  TermVectorsReader termVectorsReaderOrig = null;
  +  ThreadLocal termVectorsLocal = new ThreadLocal();
   
     BitVector deletedDocs = null;
     private boolean deletedDocsDirty = false;
  @@ -156,9 +157,15 @@
       openNorms(cfsDir);
   
       if (fieldInfos.hasVectors()) { // open term vector files only as needed
  -      termVectorsReader = new TermVectorsReader(cfsDir, segment, fieldInfos);
  +      termVectorsReaderOrig = new TermVectorsReader(cfsDir, segment, fieldInfos);
       }
     }
  +   
  +   protected void finalize() {
  +     // patch for pre-1.4.2 JVMs, whose ThreadLocals leak
  +     termVectorsLocal.set(null);
  +     super.finalize();
  +   }
   
     protected void doCommit() throws IOException {
       if (deletedDocsDirty) {               // re-write deleted 
  @@ -193,8 +200,8 @@
   
       closeNorms();
       
  -    if (termVectorsReader != null) 
  -      termVectorsReader.close();
  +    if (termVectorsReaderOrig != null) 
  +      termVectorsReaderOrig.close();
   
       if (cfsReader != null)
         cfsReader.close();
  @@ -456,6 +463,19 @@
       }
     }
     
  +  /**
  +   * Create a clone from the initial TermVectorsReader and store it in the ThreadLocal.
  +   * @return TermVectorsReader
  +   */
  +  private TermVectorsReader getTermVectorsReader() {
  +    TermVectorsReader tvReader = (TermVectorsReader)termVectorsLocal.get();
  +    if (tvReader == null) {
  +      tvReader = (TermVectorsReader)termVectorsReaderOrig.clone();
  +      termVectorsLocal.set(tvReader);
  +    }
  +    return tvReader;
  +  }
  +  
     /** Return a term frequency vector for the specified document and field. The
      *  vector returned contains term numbers and frequencies for all terms in
      *  the specified field of this document, if the field had storeTermVector
  @@ -465,9 +485,10 @@
     public TermFreqVector getTermFreqVector(int docNumber, String field) throws IOException {
       // Check if this field is invalid or has no stored term vector
       FieldInfo fi = fieldInfos.fieldInfo(field);
  -    if (fi == null || !fi.storeTermVector || termVectorsReader == null) 
  +    if (fi == null || !fi.storeTermVector || termVectorsReaderOrig == null) 
         return null;
  -
  +    
  +    TermVectorsReader termVectorsReader = getTermVectorsReader();
       return termVectorsReader.get(docNumber, field);
     }
   
  @@ -480,9 +501,10 @@
      * @throws IOException
      */
     public TermFreqVector[] getTermFreqVectors(int docNumber) throws IOException {
  -    if (termVectorsReader == null)
  +    if (termVectorsReaderOrig == null)
         return null;
  -
  +    
  +    TermVectorsReader termVectorsReader = getTermVectorsReader();
       return termVectorsReader.get(docNumber);
     }
   }
  
  
  
  1.1                  jakarta-lucene/src/test/org/apache/lucene/search/TestMultiThreadTermVectors.java
  
  Index: TestMultiThreadTermVectors.java
  ===================================================================
  package org.apache.lucene.search;
  
  /**
   * Copyright 2004 The Apache Software Foundation
   *
   * Licensed under the Apache License, Version 2.0 (the "License");
   * you may not use this file except in compliance with the License.
   * You may obtain a copy of the License at
   *
   *     http://www.apache.org/licenses/LICENSE-2.0
   *
   * Unless required by applicable law or agreed to in writing, software
   * distributed under the License is distributed on an "AS IS" BASIS,
   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
   * See the License for the specific language governing permissions and
   * limitations under the License.
   */
  
  import java.io.IOException;
  
  import junit.framework.TestCase;
  
  import org.apache.lucene.analysis.SimpleAnalyzer;
  import org.apache.lucene.document.Document;
  import org.apache.lucene.document.Field;
  import org.apache.lucene.index.IndexReader;
  import org.apache.lucene.index.IndexWriter;
  import org.apache.lucene.index.TermFreqVector;
  import org.apache.lucene.store.RAMDirectory;
  import org.apache.lucene.util.English;
  
  /**
   * @author Bernhard Messer
   * @version $rcs = ' $Id: TestMultiThreadTermVectors.java,v 1.1 2004/10/06 10:40:23 goller Exp $ ' ;
   */
  public class TestMultiThreadTermVectors extends TestCase {
    private RAMDirectory directory = new RAMDirectory();
    public int numDocs = 100;
    public int numThreads = 3;
  	
    public TestMultiThreadTermVectors(String s) {
      super(s);
    }
    
    public void setUp() throws Exception {
    	IndexWriter writer
              = new IndexWriter(directory, new SimpleAnalyzer(), true);
    	//writer.setUseCompoundFile(false);
    	//writer.infoStream = System.out;
    	for (int i = 0; i < numDocs; i++) {
      	Document doc = new Document();
      	Field fld = new Field("field", English.intToEnglish(i), Field.Store.YES, Field.Index.UN_TOKENIZED, Field.TermVector.YES);
      	doc.add(fld);
      	writer.addDocument(doc);
    	}
    	writer.close();
    	
  	}
  	
  	public void test() {
    	
  	  IndexReader reader = null;
  	  
  	  try {
  	    reader = IndexReader.open(directory);
  	    for(int i = 1; i <= numThreads; i++)
  	      testTermPositionVectors(reader, i);
  	  	
  	  	
    	}
  		catch (IOException ioe) {
  			fail(ioe.getMessage());
  		}
  		finally {
  		  if (reader != null) {
  		    try {
  		      /** close the opened reader */
  		      reader.close();
  		    } catch (IOException ioe) {
  		      ioe.printStackTrace();
  		    }
  		  }
  		}
  	}
  	
  	public void testTermPositionVectors(final IndexReader reader, int threadCount) {
  		MultiThreadTermVectorsReader[] mtr = new MultiThreadTermVectorsReader[threadCount];
  		for (int i = 0; i < threadCount; i++) {
  			mtr[i] = new MultiThreadTermVectorsReader();
  			mtr[i].init(reader);
  		}
  		
  		
  		/** run until all threads finished */ 
  		int threadsAlive = mtr.length;
  		while (threadsAlive > 0) {
  			try {
  				//System.out.println("Threads alive");
  				Thread.sleep(10);
  				threadsAlive = mtr.length;
  				for (int i = 0; i < mtr.length; i++) {
  					if (mtr[i].isAlive() == true) {
  						break;
  					}
  					
  					threadsAlive--; 
  					
  			}
  				
  			} catch (InterruptedException ie) {} 
  		}
  		
  		long totalTime = 0L;
  		for (int i = 0; i < mtr.length; i++) {
  			totalTime += mtr[i].timeElapsed;
  			mtr[i] = null;
  		}
  		
  		//System.out.println("threadcount: " + mtr.length + " average term vector time: " + totalTime/mtr.length);
  		
  	}
  	
  }
  
  class MultiThreadTermVectorsReader implements Runnable {
  	
  	private IndexReader reader = null;
  	private Thread t = null;
  	
  	private final int runsToDo = 100;
  	long timeElapsed = 0;
  	
  	
  	public void init(IndexReader reader) {
  		this.reader = reader;
  		timeElapsed = 0;
  		t=new Thread(this);
  		t.start();
  	}
  		
  	public boolean isAlive() {
  		if (t == null) return false;
  		
  		return t.isAlive();
  	}
  	
  	public void run() {
  			try {
  				// run the test 100 times
  				for (int i = 0; i < runsToDo; i++)
  					testTermVectors();
  			}
  			catch (Exception e) {
  				e.printStackTrace();
  			}
  			return;
  	}
  	
  	private void testTermVectors() throws Exception {
  		// check:
  		int numDocs = reader.numDocs();
  		long start = 0L;
  		for (int docId = 0; docId < numDocs; docId++) {
  			start = System.currentTimeMillis();
  			TermFreqVector [] vectors = reader.getTermFreqVectors(docId);
  			timeElapsed += System.currentTimeMillis()-start;
  			
  			// verify vectors result
  			verifyVectors(vectors, docId);
  			
  			start = System.currentTimeMillis();
  			TermFreqVector vector = reader.getTermFreqVector(docId, "field");
  			timeElapsed += System.currentTimeMillis()-start;
  			
  			vectors = new TermFreqVector[1];
  			vectors[0] = vector;
  			
  			verifyVectors(vectors, docId);
  			
  		}
  	}
  	
  	private void verifyVectors(TermFreqVector[] vectors, int num) {
  		StringBuffer temp = new StringBuffer();
  		String[] terms = null;
  		for (int i = 0; i < vectors.length; i++) {
  			terms = vectors[i].getTerms();
  			for (int z = 0; z < terms.length; z++) {
  				temp.append(terms[z]);
  			}
  		}
  		
  		if (!English.intToEnglish(num).trim().equals(temp.toString().trim()))
  				System.out.println("worng term result");
  	}
  }
  
  
  

---------------------------------------------------------------------
To unsubscribe, e-mail: lucene-dev-unsubscribe@jakarta.apache.org
For additional commands, e-mail: lucene-dev-help@jakarta.apache.org