You are viewing a plain text version of this content. The canonical link for it is here.

Posted to dev@lucene.apache.org by Grant Ingersoll <gs...@apache.org> on 2007/12/17 20:01:23 UTC

Background Merges

I am running Lucene trunk with Solr and am getting the exception below  
when I call Solr's optimize.  I will see if I can isolate it to a test  
case, but thought I would throw it out there if anyone sees anything  
obvious.

In this case, I am adding documents sequentially and then at the end  
call Solr's optimize, which invokes Lucene's optimize.  The problem  
could be in Solr in that it's notion of commit does not play nice with  
Lucene's new merge policy.  However, I am posting here b/c the signs  
point to an issue in Lucene.

Cheers,
Grant


Exception in thread "Thread-20" org.apache.lucene.index.MergePolicy 
$MergeException: java.io.IOException: read past EOF
         at org.apache.lucene.index.ConcurrentMergeScheduler 
$MergeThread.run(ConcurrentMergeScheduler.java:274)
Caused by: java.io.IOException: read past EOF
         at  
org 
.apache.lucene.store.BufferedIndexInput.refill(BufferedIndexInput.java: 
146)
         at  
org 
.apache 
.lucene.store.BufferedIndexInput.readByte(BufferedIndexInput.java:38)
         at  
org.apache.lucene.store.IndexInput.readVInt(IndexInput.java:76)
         at  
org 
.apache.lucene.index.FieldsReader.addFieldForMerge(FieldsReader.java: 
280)
         at org.apache.lucene.index.FieldsReader.doc(FieldsReader.java: 
167)
         at  
org.apache.lucene.index.SegmentReader.document(SegmentReader.java:659)
         at  
org.apache.lucene.index.SegmentMerger.mergeFields(SegmentMerger.java: 
300)
         at  
org.apache.lucene.index.SegmentMerger.merge(SegmentMerger.java:122)
         at  
org.apache.lucene.index.IndexWriter.mergeMiddle(IndexWriter.java:3050)
         at org.apache.lucene.index.IndexWriter.merge(IndexWriter.java: 
2792)
         at org.apache.lucene.index.ConcurrentMergeScheduler 
$MergeThread.run(ConcurrentMergeScheduler.java:240)
Dec 17, 2007 1:44:26 PM org.apache.solr.common.SolrException log
SEVERE: java.io.IOException: background merge hit exception: _3:C500  
_4:C3 _l:C500 into _m [optimize]
         at  
org.apache.lucene.index.IndexWriter.optimize(IndexWriter.java:1744)
         at  
org.apache.lucene.index.IndexWriter.optimize(IndexWriter.java:1684)
         at  
org.apache.lucene.index.IndexWriter.optimize(IndexWriter.java:1664)
         at  
org 
.apache 
.solr.update.DirectUpdateHandler2.commit(DirectUpdateHandler2.java:544)
         at  
org 
.apache 
.solr 
.update 
.processor 
.RunUpdateProcessor.processCommit(RunUpdateProcessorFactory.java:85)
         at  
org 
.apache 
.solr 
.handler.RequestHandlerUtils.handleCommit(RequestHandlerUtils.java:102)
         at  
org 
.apache 
.solr 
.handler 
.XmlUpdateRequestHandler 
.handleRequestBody(XmlUpdateRequestHandler.java:113)
         at  
org 
.apache 
.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java: 
121)
         at org.apache.solr.core.SolrCore.execute(SolrCore.java:875)
         at  
org 
.apache 
.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:283)
         at  
org 
.apache 
.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:234)
         at org.mortbay.jetty.servlet.ServletHandler 
$CachedChain.doFilter(ServletHandler.java:1089)
...
Caused by: java.io.IOException: read past EOF
         at  
org 
.apache.lucene.store.BufferedIndexInput.refill(BufferedIndexInput.java: 
146)
         at  
org 
.apache 
.lucene.store.BufferedIndexInput.readByte(BufferedIndexInput.java:38)
         at  
org.apache.lucene.store.IndexInput.readVInt(IndexInput.java:76)
         at  
org 
.apache.lucene.index.FieldsReader.addFieldForMerge(FieldsReader.java: 
280)
         at org.apache.lucene.index.FieldsReader.doc(FieldsReader.java: 
167)
         at  
org.apache.lucene.index.SegmentReader.document(SegmentReader.java:659)
         at  
org.apache.lucene.index.SegmentMerger.mergeFields(SegmentMerger.java: 
300)
         at  
org.apache.lucene.index.SegmentMerger.merge(SegmentMerger.java:122)
         at  
org.apache.lucene.index.IndexWriter.mergeMiddle(IndexWriter.java:3050)
         at org.apache.lucene.index.IndexWriter.merge(IndexWriter.java: 
2792)
         at org.apache.lucene.index.ConcurrentMergeScheduler 
$MergeThread.run(ConcurrentMergeScheduler.java:240)

Dec 17, 2007 1:44:26 PM org.apache.solr.core.SolrCore execute
INFO: [null] /update  
optimize=true&wt=xml&waitFlush=true&waitSearcher=true&version=2.2 0 1626
Dec 17, 2007 1:44:26 PM org.apache.solr.common.SolrException log
SEVERE: java.io.IOException: background merge hit exception: _3:C500  
_4:C3 _l:C500 into _m [optimize]
         at  
org.apache.lucene.index.IndexWriter.optimize(IndexWriter.java:1744)
         at  
org.apache.lucene.index.IndexWriter.optimize(IndexWriter.java:1684)
         at  
org.apache.lucene.index.IndexWriter.optimize(IndexWriter.java:1664)
         at  
org 
.apache 
.solr.update.DirectUpdateHandler2.commit(DirectUpdateHandler2.java:544)
         at  
org 
.apache 
.solr 
.update 
.processor 
.RunUpdateProcessor.processCommit(RunUpdateProcessorFactory.java:85)
         at  
org 
.apache 
.solr 
.handler.RequestHandlerUtils.handleCommit(RequestHandlerUtils.java:102)
         at  
org 
.apache 
.solr 
.handler 
.XmlUpdateRequestHandler 
.handleRequestBody(XmlUpdateRequestHandler.java:113)
         at  
org 
.apache 
.solr.handler.RequestHandlerBase.handleRequest(RequestHandlerBase.java: 
121)
         at org.apache.solr.core.SolrCore.execute(SolrCore.java:875)
         at  
org 
.apache 
.solr.servlet.SolrDispatchFilter.execute(SolrDispatchFilter.java:283)
         at  
org 
.apache 
.solr.servlet.SolrDispatchFilter.doFilter(SolrDispatchFilter.java:234)
         at org.mortbay.jetty.servlet.ServletHandler 
$CachedChain.doFilter(ServletHandler.java:1089)
...
Caused by: java.io.IOException: read past EOF
         at  
org 
.apache.lucene.store.BufferedIndexInput.refill(BufferedIndexInput.java: 
146)
         at  
org 
.apache 
.lucene.store.BufferedIndexInput.readByte(BufferedIndexInput.java:38)
         at  
org.apache.lucene.store.IndexInput.readVInt(IndexInput.java:76)
         at  
org 
.apache.lucene.index.FieldsReader.addFieldForMerge(FieldsReader.java: 
280)
         at org.apache.lucene.index.FieldsReader.doc(FieldsReader.java: 
167)
         at  
org.apache.lucene.index.SegmentReader.document(SegmentReader.java:659)
         at  
org.apache.lucene.index.SegmentMerger.mergeFields(SegmentMerger.java: 
300)
         at  
org.apache.lucene.index.SegmentMerger.merge(SegmentMerger.java:122)
         at  
org.apache.lucene.index.IndexWriter.mergeMiddle(IndexWriter.java:3050)
         at org.apache.lucene.index.IndexWriter.merge(IndexWriter.java: 
2792)
         at org.apache.lucene.index.ConcurrentMergeScheduler 
$MergeThread.run(ConcurrentMergeScheduler.java:240)

---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org

Re: Background Merges

Posted by Grant Ingersoll <gs...@apache.org>.

I don't think I did, but I wasn't really thinking too much about it at  
the time.  Like I said, let's hold off on it and at least we have a  
record of it for now.  Sorry for the noise.


On Dec 18, 2007, at 1:30 PM, Yonik Seeley wrote:

> On Dec 18, 2007 1:09 PM, Grant Ingersoll <gs...@apache.org> wrote:
>> Based on the comment in the if condition, I am assuming the field
>> numbers are not identical in this clause, which would explain the  
>> fact
>> that the Fields info is being misinterpreted.
>
> Did you change the schema then add some more data?
> Perhaps this bug is being tickled when a segment with a stored field
> is merged with
> one where the same field is unstored?
>
> -Yonik
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-dev-help@lucene.apache.org
>



---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org

Re: Background Merges

Posted by Yonik Seeley <yo...@apache.org>.

On Dec 18, 2007 1:09 PM, Grant Ingersoll <gs...@apache.org> wrote:
> Based on the comment in the if condition, I am assuming the field
> numbers are not identical in this clause, which would explain the fact
> that the Fields info is being misinterpreted.

Did you change the schema then add some more data?
Perhaps this bug is being tickled when a segment with a stored field
is merged with
one where the same field is unstored?

-Yonik

---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org

Re: Background Merges

Posted by Grant Ingersoll <gs...@apache.org>.

On Dec 18, 2007, at 2:22 PM, Michael McCandless wrote:

>
> Grant Ingersoll wrote:
>
>> The field that is causing the problem in the stack trace is neither  
>> binary nor compressed, nor is it even stored.
>
> This would also be possible with the one bug I found on hitting an  
> exception in DocumentsWriter.addDocument.
>
> Basically the bug can cause only a subset of the stored fields to be  
> added to the fdt file even though the vint header claimed more  
> stored fields were written.  Grant, you're really sure you saw no  
> exception in Solr's logs right?  Note that the exception would  
> corrupt the index but would then not be detected until that  
> corrupted segment gets merged, so it could have been in an earlier  
> batch of added docs, for example.
>

I am not 100% on any of it, other than it does seem like there are  
still possibilities of it happening in my mind, even if I can't  
reproduce it.

FWIW, I was indexing Wikipedia using the EnwikiDocMaker to get docs  
and send them to Solr.

-Grant


---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org

Re: Background Merges

Posted by Michael McCandless <lu...@mikemccandless.com>.

Suresh,

I just opened this Jira issue:

     https://issues.apache.org/jira/browse/LUCENE-1168

and attached a patch that fixes the issue.

If possible, can you confirm that it fixes your issue?  Thanks for  
discovering & raising this!

Mike

On Feb 7, 2008, at 6:18 AM, suresh guvvala wrote:

> I think, I have a test case to reproduce java.io.IOException: read  
> past EOF execption while merging. The attached code generates this  
> exception upon executing it.
>
> Suresh.
>
>
> On 12/19/07, Michael McCandless <lu...@mikemccandless.com> wrote:
>
> Grant Ingersoll wrote:
>
> > The field that is causing the problem in the stack trace is neither
> > binary nor compressed, nor is it even stored.
>
> This would also be possible with the one bug I found on hitting an
> exception in DocumentsWriter.addDocument.
>
> Basically the bug can cause only a subset of the stored fields to be
> added to the fdt file even though the vint header claimed more stored
> fields were written.  Grant, you're really sure you saw no exception
> in Solr's logs right?  Note that the exception would corrupt the
> index but would then not be detected until that corrupted segment
> gets merged, so it could have been in an earlier batch of added docs,
> for example.
>
> I've been testing various combinations of changing schema, turning on/
> off stored for the same field, interested deletions, empty stored
> fields, etc, and can't otherwise get the bug to come out.  It's a
> sneaky one!
>
> Mike
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-dev-help@lucene.apache.org
>
>
> <TestCase.java>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-dev-help@lucene.apache.org

Re: Background Merges

Posted by Michael McCandless <lu...@mikemccandless.com>.

Awesome, thanks!!

I'll track it down...

Mike

suresh guvvala wrote:

> I think, I have a test case to reproduce java.io.IOException: read  
> past EOF execption while merging. The attached code generates this  
> exception upon executing it.
>
> Suresh.
>
>
> On 12/19/07, Michael McCandless <lu...@mikemccandless.com> wrote:
>
> Grant Ingersoll wrote:
>
> > The field that is causing the problem in the stack trace is neither
> > binary nor compressed, nor is it even stored.
>
> This would also be possible with the one bug I found on hitting an
> exception in DocumentsWriter.addDocument.
>
> Basically the bug can cause only a subset of the stored fields to be
> added to the fdt file even though the vint header claimed more stored
> fields were written.  Grant, you're really sure you saw no exception
> in Solr's logs right?  Note that the exception would corrupt the
> index but would then not be detected until that corrupted segment
> gets merged, so it could have been in an earlier batch of added docs,
> for example.
>
> I've been testing various combinations of changing schema, turning on/
> off stored for the same field, interested deletions, empty stored
> fields, etc, and can't otherwise get the bug to come out.  It's a
> sneaky one!
>
> Mike
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-dev-help@lucene.apache.org
>
>
> <TestCase.java>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-dev-help@lucene.apache.org

Re: Background Merges

Posted by suresh guvvala <gu...@gmail.com>.

I think, I have a test case to reproduce java.io.IOException: read past EOF
execption while merging. The attached code generates this exception upon
executing it.

Suresh.


On 12/19/07, Michael McCandless <lu...@mikemccandless.com> wrote:
>
>
> Grant Ingersoll wrote:
>
> > The field that is causing the problem in the stack trace is neither
> > binary nor compressed, nor is it even stored.
>
> This would also be possible with the one bug I found on hitting an
> exception in DocumentsWriter.addDocument.
>
> Basically the bug can cause only a subset of the stored fields to be
> added to the fdt file even though the vint header claimed more stored
> fields were written.  Grant, you're really sure you saw no exception
> in Solr's logs right?  Note that the exception would corrupt the
> index but would then not be detected until that corrupted segment
> gets merged, so it could have been in an earlier batch of added docs,
> for example.
>
> I've been testing various combinations of changing schema, turning on/
> off stored for the same field, interested deletions, empty stored
> fields, etc, and can't otherwise get the bug to come out.  It's a
> sneaky one!
>
> Mike
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-dev-help@lucene.apache.org
>
>

Re: Background Merges

Posted by Michael McCandless <lu...@mikemccandless.com>.

Grant Ingersoll wrote:

> The field that is causing the problem in the stack trace is neither  
> binary nor compressed, nor is it even stored.

This would also be possible with the one bug I found on hitting an  
exception in DocumentsWriter.addDocument.

Basically the bug can cause only a subset of the stored fields to be  
added to the fdt file even though the vint header claimed more stored  
fields were written.  Grant, you're really sure you saw no exception  
in Solr's logs right?  Note that the exception would corrupt the  
index but would then not be detected until that corrupted segment  
gets merged, so it could have been in an earlier batch of added docs,  
for example.

I've been testing various combinations of changing schema, turning on/ 
off stored for the same field, interested deletions, empty stored  
fields, etc, and can't otherwise get the bug to come out.  It's a  
sneaky one!

Mike

---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org

Re: Background Merges

Posted by Grant Ingersoll <gs...@apache.org>.

I think the issue is my fault, but I am not exactly sure how it  
happened.  I deleted my index and have not been able to reproduce the  
problem since.

However, here's what I can tell from some debugging I did before that:

The field that is causing the problem in the stack trace is neither  
binary nor compressed, nor is it even stored.  So, the fact that it is  
being merged (see the stack trace) is just wrong, since it isn't  
stored.  I start out with 6 fields, 2 of which are stored.  When I  
come into FieldsReader, it gets the correct number of Fields, however  
they must be out of order from when I originally indexed or something  
like that.  AFAICT, FieldsWriter is also correctly writing out the  
Fields.  In looking at SegmentMerger, we are in the else clause of
if (matchingSegmentReader != null) {
                 // We can optimize this case (doing a bulk
                 // byte copy) since the field numbers are
                 // identical
                 int start = j;
                 int numDocs = 0;
                 do {
                   j++;
                   numDocs++;
                 } while(j < maxDoc && ! 
matchingSegmentReader.isDeleted(j) && numDocs < MAX_RAW_MERGE_DOCS);

                 IndexInput stream =  
matchingFieldsReader.rawDocs(rawDocLengths, start, numDocs);
                 fieldsWriter.addRawDocuments(stream, rawDocLengths,  
numDocs);
                 docCount += numDocs;
               } else {
                 fieldsWriter.addDocument(reader.document(j,  
fieldSelectorMerge));   ///////  HERE
                 j++;
                 docCount++;
               }

Based on the comment in the if condition, I am assuming the field  
numbers are not identical in this clause, which would explain the fact  
that the Fields info is being misinterpreted.

I still wonder if there isn't a problem in that somehow the index got  
corrupted such that the Field numbering was off between various runs  
of the IndexWriter?  Does that even seem possible in the code?

I am just thinking out loud here, not sure if it even makes sense.  I  
think we can just put this on hold for now and see if it comes up  
again, since I can't reproduce it (and I forgot to save the mislabeled  
index)

-Grant


On Dec 18, 2007, at 7:27 AM, Grant Ingersoll wrote:

> No, there were not any exceptions during indexing.  I am still  
> trying to work up some test cases using open documents (i.e.  
> wikipedia)
>
> -Grant
>
> On Dec 18, 2007, at 6:09 AM, Michael McCandless wrote:
>
>>
>> Grant,
>>
>> Do you know whether you hit any exceptions while adding docs,  
>> before you hit those merge exceptions?
>>
>> I have found one case where an exception that runs back through  
>> DocumentsWriter (during addDocument()) can produce a corrupt fdt  
>> (stored field) file.  I have a test case that shows this, and a fix.
>>
>> Mike
>>
>> Grant Ingersoll wrote:
>>
>>> I will try to work up a test case that I can share and will double  
>>> check that I have all the right pieces in place.
>>>
>>> -Grant
>>>
>>> On Dec 17, 2007, at 2:50 PM, Michael McCandless wrote:
>>>
>>>>
>>>> Yonik Seeley wrote:
>>>>
>>>>> On Dec 17, 2007 2:15 PM, Michael McCandless <lucene@mikemccandless.com 
>>>>> > wrote:
>>>>>>
>>>>>> Not good!
>>>>>>
>>>>>> It's almost certainly a bug with Lucene, I think, because Solr is
>>>>>> just a consumer of Lucene's API, which shouldn't ever cause  
>>>>>> something
>>>>>> like this.
>>>>>
>>>>> Yeah... a solr level commit should just translate into
>>>>> writer.close
>>>>> reader.open  // assuming there are "overwrites"
>>>>> delete duplicates via TermDocs
>>>>> reader.close
>>>>> writer.open
>>>>> writer.optimize
>>>>> writer.close
>>>>
>>>> Seems fine!
>>>>
>>>>>> Apparently, while merging stored fields, SegmentMerger tried to  
>>>>>> read
>>>>>> too far.
>>>>>
>>>>> The code to merge stored fields was recently optimized to do  
>>>>> bulk copy
>>>>> of contiguous fields, right?
>>>>
>>>> Yes, I'm wondering the same thing... though Grant's exception is  
>>>> on the un-optimized case, because the field name->number mapping  
>>>> differed for that segment.  I'll scrutinize that change some  
>>>> more...
>>>>
>>>> Mike
>>>>
>>>> ---------------------------------------------------------------------
>>>> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
>>>> For additional commands, e-mail: java-dev-help@lucene.apache.org
>>>>
>>>
>>> --------------------------
>>> Grant Ingersoll
>>> http://lucene.grantingersoll.com
>>>
>>> Lucene Helpful Hints:
>>> http://wiki.apache.org/lucene-java/BasicsOfPerformance
>>> http://wiki.apache.org/lucene-java/LuceneFAQ
>>>
>>>
>>>
>>>
>>> ---------------------------------------------------------------------
>>> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
>>> For additional commands, e-mail: java-dev-help@lucene.apache.org
>>>
>>
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
>> For additional commands, e-mail: java-dev-help@lucene.apache.org
>>
>
> --------------------------
> Grant Ingersoll
> http://lucene.grantingersoll.com
>
> Lucene Helpful Hints:
> http://wiki.apache.org/lucene-java/BasicsOfPerformance
> http://wiki.apache.org/lucene-java/LuceneFAQ
>
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-dev-help@lucene.apache.org
>

--------------------------
Grant Ingersoll
http://lucene.grantingersoll.com

Lucene Helpful Hints:
http://wiki.apache.org/lucene-java/BasicsOfPerformance
http://wiki.apache.org/lucene-java/LuceneFAQ




---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org

Re: Background Merges

Posted by Grant Ingersoll <gs...@apache.org>.

No, there were not any exceptions during indexing.  I am still trying  
to work up some test cases using open documents (i.e. wikipedia)

-Grant

On Dec 18, 2007, at 6:09 AM, Michael McCandless wrote:

>
> Grant,
>
> Do you know whether you hit any exceptions while adding docs, before  
> you hit those merge exceptions?
>
> I have found one case where an exception that runs back through  
> DocumentsWriter (during addDocument()) can produce a corrupt fdt  
> (stored field) file.  I have a test case that shows this, and a fix.
>
> Mike
>
> Grant Ingersoll wrote:
>
>> I will try to work up a test case that I can share and will double  
>> check that I have all the right pieces in place.
>>
>> -Grant
>>
>> On Dec 17, 2007, at 2:50 PM, Michael McCandless wrote:
>>
>>>
>>> Yonik Seeley wrote:
>>>
>>>> On Dec 17, 2007 2:15 PM, Michael McCandless <lucene@mikemccandless.com 
>>>> > wrote:
>>>>>
>>>>> Not good!
>>>>>
>>>>> It's almost certainly a bug with Lucene, I think, because Solr is
>>>>> just a consumer of Lucene's API, which shouldn't ever cause  
>>>>> something
>>>>> like this.
>>>>
>>>> Yeah... a solr level commit should just translate into
>>>> writer.close
>>>> reader.open  // assuming there are "overwrites"
>>>> delete duplicates via TermDocs
>>>> reader.close
>>>> writer.open
>>>> writer.optimize
>>>> writer.close
>>>
>>> Seems fine!
>>>
>>>>> Apparently, while merging stored fields, SegmentMerger tried to  
>>>>> read
>>>>> too far.
>>>>
>>>> The code to merge stored fields was recently optimized to do bulk  
>>>> copy
>>>> of contiguous fields, right?
>>>
>>> Yes, I'm wondering the same thing... though Grant's exception is  
>>> on the un-optimized case, because the field name->number mapping  
>>> differed for that segment.  I'll scrutinize that change some more...
>>>
>>> Mike
>>>
>>> ---------------------------------------------------------------------
>>> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
>>> For additional commands, e-mail: java-dev-help@lucene.apache.org
>>>
>>
>> --------------------------
>> Grant Ingersoll
>> http://lucene.grantingersoll.com
>>
>> Lucene Helpful Hints:
>> http://wiki.apache.org/lucene-java/BasicsOfPerformance
>> http://wiki.apache.org/lucene-java/LuceneFAQ
>>
>>
>>
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
>> For additional commands, e-mail: java-dev-help@lucene.apache.org
>>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-dev-help@lucene.apache.org
>

--------------------------
Grant Ingersoll
http://lucene.grantingersoll.com

Lucene Helpful Hints:
http://wiki.apache.org/lucene-java/BasicsOfPerformance
http://wiki.apache.org/lucene-java/LuceneFAQ




---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org

Re: Background Merges

Posted by Michael McCandless <lu...@mikemccandless.com>.

Grant,

Do you know whether you hit any exceptions while adding docs, before  
you hit those merge exceptions?

I have found one case where an exception that runs back through  
DocumentsWriter (during addDocument()) can produce a corrupt fdt  
(stored field) file.  I have a test case that shows this, and a fix.

Mike

Grant Ingersoll wrote:

> I will try to work up a test case that I can share and will double  
> check that I have all the right pieces in place.
>
> -Grant
>
> On Dec 17, 2007, at 2:50 PM, Michael McCandless wrote:
>
>>
>> Yonik Seeley wrote:
>>
>>> On Dec 17, 2007 2:15 PM, Michael McCandless  
>>> <lu...@mikemccandless.com> wrote:
>>>>
>>>> Not good!
>>>>
>>>> It's almost certainly a bug with Lucene, I think, because Solr is
>>>> just a consumer of Lucene's API, which shouldn't ever cause  
>>>> something
>>>> like this.
>>>
>>> Yeah... a solr level commit should just translate into
>>> writer.close
>>> reader.open  // assuming there are "overwrites"
>>> delete duplicates via TermDocs
>>> reader.close
>>> writer.open
>>> writer.optimize
>>> writer.close
>>
>> Seems fine!
>>
>>>> Apparently, while merging stored fields, SegmentMerger tried to  
>>>> read
>>>> too far.
>>>
>>> The code to merge stored fields was recently optimized to do bulk  
>>> copy
>>> of contiguous fields, right?
>>
>> Yes, I'm wondering the same thing... though Grant's exception is  
>> on the un-optimized case, because the field name->number mapping  
>> differed for that segment.  I'll scrutinize that change some more...
>>
>> Mike
>>
>> ---------------------------------------------------------------------
>> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
>> For additional commands, e-mail: java-dev-help@lucene.apache.org
>>
>
> --------------------------
> Grant Ingersoll
> http://lucene.grantingersoll.com
>
> Lucene Helpful Hints:
> http://wiki.apache.org/lucene-java/BasicsOfPerformance
> http://wiki.apache.org/lucene-java/LuceneFAQ
>
>
>
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-dev-help@lucene.apache.org
>


---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org

Re: Background Merges

Posted by Grant Ingersoll <gs...@apache.org>.

I will try to work up a test case that I can share and will double  
check that I have all the right pieces in place.

-Grant

On Dec 17, 2007, at 2:50 PM, Michael McCandless wrote:

>
> Yonik Seeley wrote:
>
>> On Dec 17, 2007 2:15 PM, Michael McCandless <lucene@mikemccandless.com 
>> > wrote:
>>>
>>> Not good!
>>>
>>> It's almost certainly a bug with Lucene, I think, because Solr is
>>> just a consumer of Lucene's API, which shouldn't ever cause  
>>> something
>>> like this.
>>
>> Yeah... a solr level commit should just translate into
>> writer.close
>> reader.open  // assuming there are "overwrites"
>> delete duplicates via TermDocs
>> reader.close
>> writer.open
>> writer.optimize
>> writer.close
>
> Seems fine!
>
>>> Apparently, while merging stored fields, SegmentMerger tried to read
>>> too far.
>>
>> The code to merge stored fields was recently optimized to do bulk  
>> copy
>> of contiguous fields, right?
>
> Yes, I'm wondering the same thing... though Grant's exception is on  
> the un-optimized case, because the field name->number mapping  
> differed for that segment.  I'll scrutinize that change some more...
>
> Mike
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-dev-help@lucene.apache.org
>

--------------------------
Grant Ingersoll
http://lucene.grantingersoll.com

Lucene Helpful Hints:
http://wiki.apache.org/lucene-java/BasicsOfPerformance
http://wiki.apache.org/lucene-java/LuceneFAQ




---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org

Re: Background Merges

Posted by Michael McCandless <lu...@mikemccandless.com>.

Yonik Seeley wrote:

> On Dec 17, 2007 2:15 PM, Michael McCandless  
> <lu...@mikemccandless.com> wrote:
>>
>> Not good!
>>
>> It's almost certainly a bug with Lucene, I think, because Solr is
>> just a consumer of Lucene's API, which shouldn't ever cause something
>> like this.
>
> Yeah... a solr level commit should just translate into
> writer.close
> reader.open  // assuming there are "overwrites"
> delete duplicates via TermDocs
> reader.close
> writer.open
> writer.optimize
> writer.close

Seems fine!

>> Apparently, while merging stored fields, SegmentMerger tried to read
>> too far.
>
> The code to merge stored fields was recently optimized to do bulk copy
> of contiguous fields, right?

Yes, I'm wondering the same thing... though Grant's exception is on  
the un-optimized case, because the field name->number mapping  
differed for that segment.  I'll scrutinize that change some more...

Mike

---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org

Re: Background Merges

Posted by Yonik Seeley <yo...@apache.org>.

On Dec 17, 2007 2:15 PM, Michael McCandless <lu...@mikemccandless.com> wrote:
>
> Not good!
>
> It's almost certainly a bug with Lucene, I think, because Solr is
> just a consumer of Lucene's API, which shouldn't ever cause something
> like this.

Yeah... a solr level commit should just translate into
writer.close
reader.open  // assuming there are "overwrites"
delete duplicates via TermDocs
reader.close
writer.open
writer.optimize
writer.close

> Apparently, while merging stored fields, SegmentMerger tried to read
> too far.

The code to merge stored fields was recently optimized to do bulk copy
of contiguous fields, right?

-Yonik

---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org

Re: Background Merges

Posted by Michael McCandless <lu...@mikemccandless.com>.

Not good!

It's almost certainly a bug with Lucene, I think, because Solr is  
just a consumer of Lucene's API, which shouldn't ever cause something  
like this.

Apparently, while merging stored fields, SegmentMerger tried to read  
too far.

Is this easily repeatable?

Mike

Grant Ingersoll wrote:

> I am running Lucene trunk with Solr and am getting the exception  
> below when I call Solr's optimize.  I will see if I can isolate it  
> to a test case, but thought I would throw it out there if anyone  
> sees anything obvious.
>
> In this case, I am adding documents sequentially and then at the  
> end call Solr's optimize, which invokes Lucene's optimize.  The  
> problem could be in Solr in that it's notion of commit does not  
> play nice with Lucene's new merge policy.  However, I am posting  
> here b/c the signs point to an issue in Lucene.
>
> Cheers,
> Grant
>
>
> Exception in thread "Thread-20" org.apache.lucene.index.MergePolicy 
> $MergeException: java.io.IOException: read past EOF
>         at org.apache.lucene.index.ConcurrentMergeScheduler 
> $MergeThread.run(ConcurrentMergeScheduler.java:274)
> Caused by: java.io.IOException: read past EOF
>         at org.apache.lucene.store.BufferedIndexInput.refill 
> (BufferedIndexInput.java:146)
>         at org.apache.lucene.store.BufferedIndexInput.readByte 
> (BufferedIndexInput.java:38)
>         at org.apache.lucene.store.IndexInput.readVInt 
> (IndexInput.java:76)
>         at org.apache.lucene.index.FieldsReader.addFieldForMerge 
> (FieldsReader.java:280)
>         at org.apache.lucene.index.FieldsReader.doc 
> (FieldsReader.java:167)
>         at org.apache.lucene.index.SegmentReader.document 
> (SegmentReader.java:659)
>         at org.apache.lucene.index.SegmentMerger.mergeFields 
> (SegmentMerger.java:300)
>         at org.apache.lucene.index.SegmentMerger.merge 
> (SegmentMerger.java:122)
>         at org.apache.lucene.index.IndexWriter.mergeMiddle 
> (IndexWriter.java:3050)
>         at org.apache.lucene.index.IndexWriter.merge 
> (IndexWriter.java:2792)
>         at org.apache.lucene.index.ConcurrentMergeScheduler 
> $MergeThread.run(ConcurrentMergeScheduler.java:240)
> Dec 17, 2007 1:44:26 PM org.apache.solr.common.SolrException log
> SEVERE: java.io.IOException: background merge hit exception:  
> _3:C500 _4:C3 _l:C500 into _m [optimize]
>         at org.apache.lucene.index.IndexWriter.optimize 
> (IndexWriter.java:1744)
>         at org.apache.lucene.index.IndexWriter.optimize 
> (IndexWriter.java:1684)
>         at org.apache.lucene.index.IndexWriter.optimize 
> (IndexWriter.java:1664)
>         at org.apache.solr.update.DirectUpdateHandler2.commit 
> (DirectUpdateHandler2.java:544)
>         at  
> org.apache.solr.update.processor.RunUpdateProcessor.processCommit 
> (RunUpdateProcessorFactory.java:85)
>         at org.apache.solr.handler.RequestHandlerUtils.handleCommit 
> (RequestHandlerUtils.java:102)
>         at  
> org.apache.solr.handler.XmlUpdateRequestHandler.handleRequestBody 
> (XmlUpdateRequestHandler.java:113)
>         at org.apache.solr.handler.RequestHandlerBase.handleRequest 
> (RequestHandlerBase.java:121)
>         at org.apache.solr.core.SolrCore.execute(SolrCore.java:875)
>         at org.apache.solr.servlet.SolrDispatchFilter.execute 
> (SolrDispatchFilter.java:283)
>         at org.apache.solr.servlet.SolrDispatchFilter.doFilter 
> (SolrDispatchFilter.java:234)
>         at org.mortbay.jetty.servlet.ServletHandler 
> $CachedChain.doFilter(ServletHandler.java:1089)
> ...
> Caused by: java.io.IOException: read past EOF
>         at org.apache.lucene.store.BufferedIndexInput.refill 
> (BufferedIndexInput.java:146)
>         at org.apache.lucene.store.BufferedIndexInput.readByte 
> (BufferedIndexInput.java:38)
>         at org.apache.lucene.store.IndexInput.readVInt 
> (IndexInput.java:76)
>         at org.apache.lucene.index.FieldsReader.addFieldForMerge 
> (FieldsReader.java:280)
>         at org.apache.lucene.index.FieldsReader.doc 
> (FieldsReader.java:167)
>         at org.apache.lucene.index.SegmentReader.document 
> (SegmentReader.java:659)
>         at org.apache.lucene.index.SegmentMerger.mergeFields 
> (SegmentMerger.java:300)
>         at org.apache.lucene.index.SegmentMerger.merge 
> (SegmentMerger.java:122)
>         at org.apache.lucene.index.IndexWriter.mergeMiddle 
> (IndexWriter.java:3050)
>         at org.apache.lucene.index.IndexWriter.merge 
> (IndexWriter.java:2792)
>         at org.apache.lucene.index.ConcurrentMergeScheduler 
> $MergeThread.run(ConcurrentMergeScheduler.java:240)
>
> Dec 17, 2007 1:44:26 PM org.apache.solr.core.SolrCore execute
> INFO: [null] /update  
> optimize=true&wt=xml&waitFlush=true&waitSearcher=true&version=2.2 0  
> 1626
> Dec 17, 2007 1:44:26 PM org.apache.solr.common.SolrException log
> SEVERE: java.io.IOException: background merge hit exception:  
> _3:C500 _4:C3 _l:C500 into _m [optimize]
>         at org.apache.lucene.index.IndexWriter.optimize 
> (IndexWriter.java:1744)
>         at org.apache.lucene.index.IndexWriter.optimize 
> (IndexWriter.java:1684)
>         at org.apache.lucene.index.IndexWriter.optimize 
> (IndexWriter.java:1664)
>         at org.apache.solr.update.DirectUpdateHandler2.commit 
> (DirectUpdateHandler2.java:544)
>         at  
> org.apache.solr.update.processor.RunUpdateProcessor.processCommit 
> (RunUpdateProcessorFactory.java:85)
>         at org.apache.solr.handler.RequestHandlerUtils.handleCommit 
> (RequestHandlerUtils.java:102)
>         at  
> org.apache.solr.handler.XmlUpdateRequestHandler.handleRequestBody 
> (XmlUpdateRequestHandler.java:113)
>         at org.apache.solr.handler.RequestHandlerBase.handleRequest 
> (RequestHandlerBase.java:121)
>         at org.apache.solr.core.SolrCore.execute(SolrCore.java:875)
>         at org.apache.solr.servlet.SolrDispatchFilter.execute 
> (SolrDispatchFilter.java:283)
>         at org.apache.solr.servlet.SolrDispatchFilter.doFilter 
> (SolrDispatchFilter.java:234)
>         at org.mortbay.jetty.servlet.ServletHandler 
> $CachedChain.doFilter(ServletHandler.java:1089)
> ...
> Caused by: java.io.IOException: read past EOF
>         at org.apache.lucene.store.BufferedIndexInput.refill 
> (BufferedIndexInput.java:146)
>         at org.apache.lucene.store.BufferedIndexInput.readByte 
> (BufferedIndexInput.java:38)
>         at org.apache.lucene.store.IndexInput.readVInt 
> (IndexInput.java:76)
>         at org.apache.lucene.index.FieldsReader.addFieldForMerge 
> (FieldsReader.java:280)
>         at org.apache.lucene.index.FieldsReader.doc 
> (FieldsReader.java:167)
>         at org.apache.lucene.index.SegmentReader.document 
> (SegmentReader.java:659)
>         at org.apache.lucene.index.SegmentMerger.mergeFields 
> (SegmentMerger.java:300)
>         at org.apache.lucene.index.SegmentMerger.merge 
> (SegmentMerger.java:122)
>         at org.apache.lucene.index.IndexWriter.mergeMiddle 
> (IndexWriter.java:3050)
>         at org.apache.lucene.index.IndexWriter.merge 
> (IndexWriter.java:2792)
>         at org.apache.lucene.index.ConcurrentMergeScheduler 
> $MergeThread.run(ConcurrentMergeScheduler.java:240)
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-dev-help@lucene.apache.org
>


---------------------------------------------------------------------
To unsubscribe, e-mail: java-dev-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-dev-help@lucene.apache.org