You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@lucene.apache.org by Robert Muir <rc...@gmail.com> on 2009/11/22 16:22:49 UTC

Re: svn commit: r883088 - in /lucene/java/branches/flex_1458/src/java/org/apache/lucene/index: TermRef.java codecs/standard/StandardTermsDictReader.java

was this why i saw strange benchmark results?

On Sun, Nov 22, 2009 at 9:52 AM, <mi...@apache.org> wrote:

> Author: mikemccand
> Date: Sun Nov 22 14:52:02 2009
> New Revision: 883088
>
> URL: http://svn.apache.org/viewvc?rev=883088&view=rev
> Log:
> LUCENE-1458 (on flex branch): small optimization to terms dict cache: don't
> store redundant TermRef
>
> Modified:
>
>  lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java
>
>  lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
>
> Modified:
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java
> URL:
> http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java?rev=883088&r1=883087&r2=883088&view=diff
>
> ==============================================================================
> ---
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java
> (original)
> +++
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/TermRef.java
> Sun Nov 22 14:52:02 2009
> @@ -36,6 +36,8 @@
>     copy(text);
>   }
>
> +  // nocommit: we could do this w/ UnicodeUtil w/o requiring
> +  // allocation of new bytes[]?
>   public void copy(String text) {
>     try {
>       bytes = text.getBytes("UTF-8");
>
> Modified:
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
> URL:
> http://svn.apache.org/viewvc/lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java?rev=883088&r1=883087&r2=883088&view=diff
>
> ==============================================================================
> ---
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
> (original)
> +++
> lucene/java/branches/flex_1458/src/java/org/apache/lucene/index/codecs/standard/StandardTermsDictReader.java
> Sun Nov 22 14:52:02 2009
> @@ -304,6 +304,7 @@
>       public SeekStatus seek(TermRef term) throws IOException {
>         ReuseLRUCache<TermRef, CacheEntry> cache = null;
>         CacheEntry entry = null;
> +        TermRef entryKey = null;
>
>         if (docs.canCaptureState()) {
>           final ThreadResources resources = getThreadResources();
> @@ -312,7 +313,7 @@
>           entry = cache.get(term);
>           if (entry != null) {
>             docFreq = entry.freq;
> -            bytesReader.term.copy(entry.term);
> +            bytesReader.term.copy(term);
>             docs.setState(entry, docFreq);
>             termUpto = entry.termUpTo;
>             // nocommit -- would be better to do this lazy?
> @@ -384,16 +385,17 @@
>                 entry = cache.eldest;
>                 cache.eldest = null;
>                 docs.captureState(entry);
> -                entry.term.copy(bytesReader.term);
> +                entryKey = cache.eldestKey;
> +                entryKey.copy(bytesReader.term);
>               } else {
>                 entry = docs.captureState(null);
> -                entry.term = (TermRef) bytesReader.term.clone();
> +                entryKey = (TermRef) bytesReader.term.clone();
>               }
>               entry.freq = docFreq;
>               entry.termUpTo = termUpto;
>               entry.filePointer = in.getFilePointer();
>
> -              cache.put(entry.term, entry);
> +              cache.put(entryKey, entry);
>             }
>             return SeekStatus.FOUND;
>           } else if (cmp > 0) {
> @@ -517,9 +519,8 @@
>
>   // nocommit -- scrutinize API
>   public static class CacheEntry {
> -    int termUpTo;
> -    TermRef term; // nocommit -- really needed?
> -    long filePointer;
> +    int termUpTo;                                 // ord for this term
> +    long filePointer;                             // fp into the terms
> dict primary file (_X.tis)
>
>     // nocommit -- belongs in Pulsing's CacheEntry class:
>     public int freq;
> @@ -563,6 +564,7 @@
>     private final static float LOADFACTOR = 0.75f;
>     private int cacheSize;
>     V eldest;
> +    K eldestKey;
>
>     /**
>      * Creates a last-recently-used cache with the specified size.
> @@ -580,6 +582,7 @@
>       boolean remove = size() > ReuseLRUCache.this.cacheSize;
>       if (remove) {
>         this.eldest = eldest.getValue();
> +        this.eldestKey = eldest.getKey();
>       }
>       return remove;
>     }
>
>
>


-- 
Robert Muir
rcmuir@gmail.com