You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@lucene.apache.org by "Otis Gospodnetic (JIRA)" <ji...@apache.org> on 2006/12/20 21:29:29 UTC
[jira] Resolved: (LUCENE-436) [PATCH] TermInfosReader,
SegmentTermEnum Out Of Memory Exception
[ http://issues.apache.org/jira/browse/LUCENE-436?page=all ]
Otis Gospodnetic resolved LUCENE-436.
-------------------------------------
Resolution: Fixed
Applied and committed the LUCENE-436.patch (is JIRA smart enough not to hyperlink this?) - all unit tests still pass.
> [PATCH] TermInfosReader, SegmentTermEnum Out Of Memory Exception
> ----------------------------------------------------------------
>
> Key: LUCENE-436
> URL: http://issues.apache.org/jira/browse/LUCENE-436
> Project: Lucene - Java
> Issue Type: Improvement
> Components: Index
> Affects Versions: 1.4
> Environment: Solaris JVM 1.4.1
> Linux JVM 1.4.2/1.5.0
> Windows not tested
> Reporter: kieran
> Assigned To: Otis Gospodnetic
> Attachments: FixedThreadLocal.java, lucene-1.9.1.patch, Lucene-436-TestCase.tar.gz, LUCENE-436.patch, TermInfosReader.java, ThreadLocalTest.java
>
>
> We've been experiencing terrible memory problems on our production search server, running lucene (1.4.3).
> Our live app regularly opens new indexes and, in doing so, releases old IndexReaders for garbage collection.
> But...there appears to be a memory leak in org.apache.lucene.index.TermInfosReader.java.
> Under certain conditions (possibly related to JVM version, although I've personally observed it under both linux JVM 1.4.2_06, and 1.5.0_03, and SUNOS JVM 1.4.1) the ThreadLocal member variable, "enumerators" doesn't get garbage-collected when the TermInfosReader object is gc-ed.
> Looking at the code in TermInfosReader.java, there's no reason why it _shouldn't_ be gc-ed, so I can only presume (and I've seen this suggested elsewhere) that there could be a bug in the garbage collector of some JVMs.
> I've seen this problem briefly discussed; in particular at the following URL:
> http://java2.5341.com/msg/85821.html
> The patch that Doug recommended, which is included in lucene-1.4.3 doesn't work in our particular circumstances. Doug's patch only clears the ThreadLocal variable for the thread running the finalizer (my knowledge of java breaks down here - I'm not sure which thread actually runs the finalizer). In our situation, the TermInfosReader is (potentially) used by more than one thread, meaning that Doug's patch _doesn't_ allow the affected JVMs to correctly collect garbage.
> So...I've devised a simple patch which, from my observations on linux JVMs 1.4.2_06, and 1.5.0_03, fixes this problem.
> Kieran
> PS Thanks to daniel naber for pointing me to jira/lucene
> @@ -19,6 +19,7 @@
> import java.io.IOException;
> import org.apache.lucene.store.Directory;
> +import java.util.Hashtable;
> /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
> * Directory. Pairs are accessed either by Term or by ordinal position the
> @@ -29,7 +30,7 @@
> private String segment;
> private FieldInfos fieldInfos;
> - private ThreadLocal enumerators = new ThreadLocal();
> + private final Hashtable enumeratorsByThread = new Hashtable();
> private SegmentTermEnum origEnum;
> private long size;
> @@ -60,10 +61,10 @@
> }
> private SegmentTermEnum getEnum() {
> - SegmentTermEnum termEnum = (SegmentTermEnum)enumerators.get();
> + SegmentTermEnum termEnum = (SegmentTermEnum)enumeratorsByThread.get(Thread.currentThread());
> if (termEnum == null) {
> termEnum = terms();
> - enumerators.set(termEnum);
> + enumeratorsByThread.put(Thread.currentThread(), termEnum);
> }
> return termEnum;
> }
> @@ -195,5 +196,15 @@
> public SegmentTermEnum terms(Term term) throws IOException {
> get(term);
> return (SegmentTermEnum)getEnum().clone();
> + }
> +
> + /* some jvms might have trouble gc-ing enumeratorsByThread */
> + protected void finalize() throws Throwable {
> + try {
> + // make sure gc can clear up.
> + enumeratorsByThread.clear();
> + } finally {
> + super.finalize();
> + }
> }
> }
> TermInfosReader.java, full source:
> ======================================
> package org.apache.lucene.index;
> /**
> * Copyright 2004 The Apache Software Foundation
> *
> * Licensed under the Apache License, Version 2.0 (the "License");
> * you may not use this file except in compliance with the License.
> * You may obtain a copy of the License at
> *
> * http://www.apache.org/licenses/LICENSE-2.0
> *
> * Unless required by applicable law or agreed to in writing, software
> * distributed under the License is distributed on an "AS IS" BASIS,
> * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
> * See the License for the specific language governing permissions and
> * limitations under the License.
> */
> import java.io.IOException;
> import org.apache.lucene.store.Directory;
> import java.util.Hashtable;
> /** This stores a monotonically increasing set of <Term, TermInfo> pairs in a
> * Directory. Pairs are accessed either by Term or by ordinal position the
> * set. */
> final class TermInfosReader {
> private Directory directory;
> private String segment;
> private FieldInfos fieldInfos;
> private final Hashtable enumeratorsByThread = new Hashtable();
> private SegmentTermEnum origEnum;
> private long size;
> TermInfosReader(Directory dir, String seg, FieldInfos fis)
> throws IOException {
> directory = dir;
> segment = seg;
> fieldInfos = fis;
> origEnum = new SegmentTermEnum(directory.openFile(segment + ".tis"),
> fieldInfos, false);
> size = origEnum.size;
> readIndex();
> }
> public int getSkipInterval() {
> return origEnum.skipInterval;
> }
> final void close() throws IOException {
> if (origEnum != null)
> origEnum.close();
> }
> /** Returns the number of term/value pairs in the set. */
> final long size() {
> return size;
> }
> private SegmentTermEnum getEnum() {
> SegmentTermEnum termEnum = (SegmentTermEnum)enumeratorsByThread.get(Thread.currentThread());
> if (termEnum == null) {
> termEnum = terms();
> enumeratorsByThread.put(Thread.currentThread(), termEnum);
> }
> return termEnum;
> }
> Term[] indexTerms = null;
> TermInfo[] indexInfos;
> long[] indexPointers;
> private final void readIndex() throws IOException {
> SegmentTermEnum indexEnum =
> new SegmentTermEnum(directory.openFile(segment + ".tii"),
> fieldInfos, true);
> try {
> int indexSize = (int)indexEnum.size;
> indexTerms = new Term[indexSize];
> indexInfos = new TermInfo[indexSize];
> indexPointers = new long[indexSize];
> for (int i = 0; indexEnum.next(); i++) {
> indexTerms[i] = indexEnum.term();
> indexInfos[i] = indexEnum.termInfo();
> indexPointers[i] = indexEnum.indexPointer;
> }
> } finally {
> indexEnum.close();
> }
> }
> /** Returns the offset of the greatest index entry which is less than or equal to term.*/
> private final int getIndexOffset(Term term) throws IOException {
> int lo = 0; // binary search indexTerms[]
> int hi = indexTerms.length - 1;
> while (hi >= lo) {
> int mid = (lo + hi) >> 1;
> int delta = term.compareTo(indexTerms[mid]);
> if (delta < 0)
> hi = mid - 1;
> else if (delta > 0)
> lo = mid + 1;
> else
> return mid;
> }
> return hi;
> }
> private final void seekEnum(int indexOffset) throws IOException {
> getEnum().seek(indexPointers[indexOffset],
> (indexOffset * getEnum().indexInterval) - 1,
> indexTerms[indexOffset], indexInfos[indexOffset]);
> }
> /** Returns the TermInfo for a Term in the set, or null. */
> TermInfo get(Term term) throws IOException {
> if (size == 0) return null;
> // optimize sequential access: first try scanning cached enum w/o seeking
> SegmentTermEnum enumerator = getEnum();
> if (enumerator.term() != null // term is at or past current
> && ((enumerator.prev != null && term.compareTo(enumerator.prev) > 0)
> || term.compareTo(enumerator.term()) >= 0)) {
> int enumOffset = (int)(enumerator.position/enumerator.indexInterval)+1;
> if (indexTerms.length == enumOffset // but before end of block
> || term.compareTo(indexTerms[enumOffset]) < 0)
> return scanEnum(term); // no need to seek
> }
> // random-access: must seek
> seekEnum(getIndexOffset(term));
> return scanEnum(term);
> }
> /** Scans within block for matching term. */
> private final TermInfo scanEnum(Term term) throws IOException {
> SegmentTermEnum enumerator = getEnum();
> while (term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}
> if (enumerator.term() != null && term.compareTo(enumerator.term()) == 0)
> return enumerator.termInfo();
> else
> return null;
> }
> /** Returns the nth term in the set. */
> final Term get(int position) throws IOException {
> if (size == 0) return null;
> SegmentTermEnum enumerator = getEnum();
> if (enumerator != null && enumerator.term() != null &&
> position >= enumerator.position &&
> position < (enumerator.position + enumerator.indexInterval))
> return scanEnum(position); // can avoid seek
> seekEnum(position / enumerator.indexInterval); // must seek
> return scanEnum(position);
> }
> private final Term scanEnum(int position) throws IOException {
> SegmentTermEnum enumerator = getEnum();
> while(enumerator.position < position)
> if (!enumerator.next())
> return null;
> return enumerator.term();
> }
> /** Returns the position of a Term in the set or -1. */
> final long getPosition(Term term) throws IOException {
> if (size == 0) return -1;
> int indexOffset = getIndexOffset(term);
> seekEnum(indexOffset);
> SegmentTermEnum enumerator = getEnum();
> while(term.compareTo(enumerator.term()) > 0 && enumerator.next()) {}
> if (term.compareTo(enumerator.term()) == 0)
> return enumerator.position;
> else
> return -1;
> }
> /** Returns an enumeration of all the Terms and TermInfos in the set. */
> public SegmentTermEnum terms() {
> return (SegmentTermEnum)origEnum.clone();
> }
> /** Returns an enumeration of terms starting at or after the named term. */
> public SegmentTermEnum terms(Term term) throws IOException {
> get(term);
> return (SegmentTermEnum)getEnum().clone();
> }
> /* some jvms might have trouble gc-ing enumeratorsByThread */
> protected void finalize() throws Throwable {
> try {
> // make sure gc can clear up.
> enumeratorsByThread.clear();
> } finally {
> super.finalize();
> }
> }
> }
--
This message is automatically generated by JIRA.
-
If you think it was sent incorrectly contact one of the administrators: http://issues.apache.org/jira/secure/Administrators.jspa
-
For more information on JIRA, see: http://www.atlassian.com/software/jira
---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org