You are viewing a plain text version of this content. The canonical link for it is here.
Posted to java-user@lucene.apache.org by Java Programmer <jp...@gmail.com> on 2006/12/14 13:04:26 UTC

Lucene change field values to wrong ones when indexing

Hello,
I have problem with my search code - i try to index some data with
searching simultanously. Everything goes fine till some number of data
are indexed then my fields are bugged.
Eg. I have field with title indexed as "Nowitzki führt "Mavs" zum
ersten Heimsieg" and inner id "15" (not doc id, just field called id).
At the end of indexing this field disappear, and some other values in
id field appear. I provide full listing of my program, without AXIS
part which is responsible for data transmitting. If you can watch my
code, maybe somewhere is wrong locking mechanism, or any other bug -
please help me if you can.

import java.io.IOException;
import java.io.InputStream;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Properties;

import javax.servlet.ServletContext;
import javax.servlet.http.HttpServlet;

import org.apache.axis.MessageContext;
import org.apache.axis.transport.http.HTTPConstants;
import org.apache.lucene.analysis.SimpleAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexModifier;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Hits;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.RangeQuery;
import org.apache.lucene.search.Sort;

public class SNewsSearch {
	
	private static SNewsSearch search = new SNewsSearch();
	
	private static IndexSearcher indexSearcher;
	
	private String indexLocation;
	
	private String adminLogin;
	
	private String adminPass;
	
	private SNewsSearch(){
		System.out.println("Constructor SNewsSearch");
		
		Properties props = new Properties();
		HttpServlet srv = (HttpServlet)
MessageContext.getCurrentContext().getProperty(HTTPConstants.MC_HTTP_SERVLET);
		ServletContext context = srv.getServletContext ();
		InputStream is = context.getResourceAsStream("/WEB-INF/lucene.properties");
		try {
			props.load(is);
			System.out.println("Using index: " + props.getProperty("index.location"));
			this.indexLocation = props.getProperty("index.location");
			this.adminLogin = props.getProperty("admin.login");
			this.adminPass = props.getProperty("admin.pass");
		} catch (IOException e) {
			e.printStackTrace();
		}
	}
	
	public static SNewsSearch getInstance(){
		return search;
	}
	
	public void setIndexLocation(String indexLocation) {
		this.indexLocation = indexLocation;
	}
	
	private static Object lock = new Object();
	
	public void addDocument(int id,String title, String body, int
pubDate) throws IOException{
		Document doc = new Document();
		doc.add(new Field("id",Integer.toString(id),Store.YES,Index.UN_TOKENIZED));
		doc.add(new Field("title",title,Store.NO,Index.TOKENIZED));
		doc.add(new Field("body",body,Store.NO,Index.TOKENIZED));
		doc.add(new Field("pubDate",Integer.toString(pubDate),Store.NO,Index.UN_TOKENIZED));
		synchronized(lock){
			IndexWriter indexWriter = new IndexWriter(indexLocation, new
SimpleAnalyzer(), false);
			indexWriter.addDocument(doc);
			indexWriter.close();
			if(indexSearcher!=null)
				indexSearcher.close();
			indexSearcher = null;
		}
	}
	
	public Map<String,Object> search(String query, int pubDate, int page,
int results, boolean reverse) throws IOException, ParseException{
		MultiFieldQueryParser queryparser = new MultiFieldQueryParser(new
String[]{"title","body"}, new SimpleAnalyzer());
		Query q; // = queryparser.parse(query);
		if(pubDate!=0){
			BooleanQuery bq = new BooleanQuery();
			bq.add(queryparser.parse(query),BooleanClause.Occur.MUST);
			bq.add(new RangeQuery(
					new Term("pubDate",Integer.toString(pubDate)),
					new Term("pubDate","99999999"),
					true),BooleanClause.Occur.MUST);
			q = bq;
		} else {
			q = queryparser.parse(query);
		}
		Hits hits;
		synchronized(lock){
			if(indexSearcher==null)
				indexSearcher = new IndexSearcher(indexLocation);
			
			if(reverse)
				hits = indexSearcher.search(q, new Sort("pubDate",true));
			else
				hits = indexSearcher.search(q);
		}

		List<Integer> output = new ArrayList<Integer>();
		int i = page*results;
		while(i<hits.length()){
			if(i >= (page+1)*results) break;
			Document doc = hits.doc(i);
			output.add(Integer.parseInt(doc.get("id")));
			i++;
		}
		Map<String,Object> searchResult = new HashMap<String,Object>();
		searchResult.put("numberOfResults", hits.length());
		searchResult.put("results", output);
		return searchResult;
	}

	public void deleteDocument(int id) throws IOException{
		synchronized(lock){
			IndexModifier indexModifier = new IndexModifier(indexLocation, new
SimpleAnalyzer(), false);
			indexModifier.deleteDocuments(new Term("id", Integer.toString(id)));
			indexModifier.close();
			if(indexSearcher!=null)
				indexSearcher.close();
			indexSearcher = null;
		}
	}
	
	public void setUp(String adminLogin, String adminPass) throws IOException {
		if(adminLogin.equals(this.adminLogin) && adminPass.equals(this.adminPass)){
			synchronized(lock){
				IndexWriter indexWriter = new IndexWriter(indexLocation, new
SimpleAnalyzer(), true);
				indexWriter.close();			
				if(indexSearcher!=null)
					indexSearcher.close();
				indexSearcher = null;
			}
		}
	}
}

Best regards,
Adrian

---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Re: Lucene change field values to wrong ones when indexing

Posted by Steven Rowe <sa...@syr.edu>.
Hi Adrian,

I don't see anything obviously wrong with your code.

Can you give more details about which field values are different from
what you expect?  I'm guessing it's the id field you're worried about,
but it's not clear from what you have written whether it's the title or
the id field which is corrupted.  A full example would be useful,
showing how you retrieve a result and how you know it contains the wrong
thing.

If you're not familiar with it, Luke is quite useful for directly
looking at Lucene index contents: http://www.getopt.org/luke/

Steve

Java Programmer wrote:
> Hello,
> I have problem with my search code - i try to index some data with
> searching simultanously. Everything goes fine till some number of data
> are indexed then my fields are bugged.
> Eg. I have field with title indexed as "Nowitzki führt "Mavs" zum
> ersten Heimsieg" and inner id "15" (not doc id, just field called id).
> At the end of indexing this field disappear, and some other values in
> id field appear. I provide full listing of my program, without AXIS
> part which is responsible for data transmitting. If you can watch my
> code, maybe somewhere is wrong locking mechanism, or any other bug -
> please help me if you can.
> 
> import java.io.IOException;
> import java.io.InputStream;
> import java.util.ArrayList;
> import java.util.HashMap;
> import java.util.List;
> import java.util.Map;
> import java.util.Properties;
> 
> import javax.servlet.ServletContext;
> import javax.servlet.http.HttpServlet;
> 
> import org.apache.axis.MessageContext;
> import org.apache.axis.transport.http.HTTPConstants;
> import org.apache.lucene.analysis.SimpleAnalyzer;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.Field;
> import org.apache.lucene.document.Field.Index;
> import org.apache.lucene.document.Field.Store;
> import org.apache.lucene.index.IndexModifier;
> import org.apache.lucene.index.IndexWriter;
> import org.apache.lucene.index.Term;
> import org.apache.lucene.queryParser.MultiFieldQueryParser;
> import org.apache.lucene.queryParser.ParseException;
> import org.apache.lucene.search.BooleanClause;
> import org.apache.lucene.search.BooleanQuery;
> import org.apache.lucene.search.Hits;
> import org.apache.lucene.search.IndexSearcher;
> import org.apache.lucene.search.Query;
> import org.apache.lucene.search.RangeQuery;
> import org.apache.lucene.search.Sort;
> 
> public class SNewsSearch {
>     
>     private static SNewsSearch search = new SNewsSearch();
>     
>     private static IndexSearcher indexSearcher;
>     
>     private String indexLocation;
>     
>     private String adminLogin;
>     
>     private String adminPass;
>     
>     private SNewsSearch(){
>         System.out.println("Constructor SNewsSearch");
>        
>         Properties props = new Properties();
>         HttpServlet srv = (HttpServlet)
> MessageContext.getCurrentContext().getProperty(HTTPConstants.MC_HTTP_SERVLET);
> 
>         ServletContext context = srv.getServletContext ();
>         InputStream is =
> context.getResourceAsStream("/WEB-INF/lucene.properties");
>         try {
>             props.load(is);
>             System.out.println("Using index: " +
> props.getProperty("index.location"));
>             this.indexLocation = props.getProperty("index.location");
>             this.adminLogin = props.getProperty("admin.login");
>             this.adminPass = props.getProperty("admin.pass");
>         } catch (IOException e) {
>             e.printStackTrace();
>         }
>     }
>     
>     public static SNewsSearch getInstance(){
>         return search;
>     }
>     
>     public void setIndexLocation(String indexLocation) {
>         this.indexLocation = indexLocation;
>     }
>     
>     private static Object lock = new Object();
>     
>     public void addDocument(int id,String title, String body, int
> pubDate) throws IOException{
>         Document doc = new Document();
>         doc.add(new
> Field("id",Integer.toString(id),Store.YES,Index.UN_TOKENIZED));
>         doc.add(new Field("title",title,Store.NO,Index.TOKENIZED));
>         doc.add(new Field("body",body,Store.NO,Index.TOKENIZED));
>         doc.add(new
> Field("pubDate",Integer.toString(pubDate),Store.NO,Index.UN_TOKENIZED));
>         synchronized(lock){
>             IndexWriter indexWriter = new IndexWriter(indexLocation, new
> SimpleAnalyzer(), false);
>             indexWriter.addDocument(doc);
>             indexWriter.close();
>             if(indexSearcher!=null)
>                 indexSearcher.close();
>             indexSearcher = null;
>         }
>     }
>     
>     public Map<String,Object> search(String query, int pubDate, int page,
> int results, boolean reverse) throws IOException, ParseException{
>         MultiFieldQueryParser queryparser = new MultiFieldQueryParser(new
> String[]{"title","body"}, new SimpleAnalyzer());
>         Query q; // = queryparser.parse(query);
>         if(pubDate!=0){
>             BooleanQuery bq = new BooleanQuery();
>             bq.add(queryparser.parse(query),BooleanClause.Occur.MUST);
>             bq.add(new RangeQuery(
>                     new Term("pubDate",Integer.toString(pubDate)),
>                     new Term("pubDate","99999999"),
>                     true),BooleanClause.Occur.MUST);
>             q = bq;
>         } else {
>             q = queryparser.parse(query);
>         }
>         Hits hits;
>         synchronized(lock){
>             if(indexSearcher==null)
>                 indexSearcher = new IndexSearcher(indexLocation);
>            
>             if(reverse)
>                 hits = indexSearcher.search(q, new Sort("pubDate",true));
>             else
>                 hits = indexSearcher.search(q);
>         }
> 
>         List<Integer> output = new ArrayList<Integer>();
>         int i = page*results;
>         while(i<hits.length()){
>             if(i >= (page+1)*results) break;
>             Document doc = hits.doc(i);
>             output.add(Integer.parseInt(doc.get("id")));
>             i++;
>         }
>         Map<String,Object> searchResult = new HashMap<String,Object>();
>         searchResult.put("numberOfResults", hits.length());
>         searchResult.put("results", output);
>         return searchResult;
>     }
> 
>     public void deleteDocument(int id) throws IOException{
>         synchronized(lock){
>             IndexModifier indexModifier = new
> IndexModifier(indexLocation, new
> SimpleAnalyzer(), false);
>             indexModifier.deleteDocuments(new Term("id",
> Integer.toString(id)));
>             indexModifier.close();
>             if(indexSearcher!=null)
>                 indexSearcher.close();
>             indexSearcher = null;
>         }
>     }
>     
>     public void setUp(String adminLogin, String adminPass) throws
> IOException {
>         if(adminLogin.equals(this.adminLogin) &&
> adminPass.equals(this.adminPass)){
>             synchronized(lock){
>                 IndexWriter indexWriter = new IndexWriter(indexLocation,
> new
> SimpleAnalyzer(), true);
>                 indexWriter.close();           
>                 if(indexSearcher!=null)
>                     indexSearcher.close();
>                 indexSearcher = null;
>             }
>         }
>     }
> }
> 
> Best regards,
> Adrian
> 
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-user-help@lucene.apache.org
> 


---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org


Re: Lucene change field values to wrong ones when indexing

Posted by Doron Cohen <DO...@il.ibm.com>.
Two things I would check:

1) converting pubDate to String during indexing for later
date-range-filtering search results might not work well, because, e.g.,
string wise, "9" > "1000000". You could use Lucene's DateTools - there's an
example in TestDateFilter -
http://svn.apache.org/viewvc/lucene/java/trunk/src/test/org/apache/lucene/search/TestDateFilter.java?view=log

2) the synchronization on lock, protects from closing indexSearcher while
calling indexSearcher.search(). But it is not protecting the fetching of
results. So it is possible that an indexSearcher is closed (by add or
delete) before/while fetching those id fields. Even worse, if another
search arrives (if you have concurrent searches), the later search might
(re)open indexSearcher, and now, if there were deletes in between, you
would definitely fetch different docs because the internal docids have
changed - I think this would explain the change of id field. It would be
probably better to organize the synchronization better, so that an update
would only mark the need in refreshing the searcher, and would not close it
- the searching part would take care of that.

Hope I got this right...
Doron

"Java Programmer" <jp...@gmail.com> wrote:

> Hello,
> I have problem with my search code - i try to index some data with
> searching simultanously. Everything goes fine till some number of data
> are indexed then my fields are bugged.
> Eg. I have field with title indexed as "Nowitzki führt "Mavs" zum
> ersten Heimsieg" and inner id "15" (not doc id, just field called id).
> At the end of indexing this field disappear, and some other values in
> id field appear. I provide full listing of my program, without AXIS
> part which is responsible for data transmitting. If you can watch my
> code, maybe somewhere is wrong locking mechanism, or any other bug -
> please help me if you can.
>
> import java.io.IOException;
> import java.io.InputStream;
> import java.util.ArrayList;
> import java.util.HashMap;
> import java.util.List;
> import java.util.Map;
> import java.util.Properties;
>
> import javax.servlet.ServletContext;
> import javax.servlet.http.HttpServlet;
>
> import org.apache.axis.MessageContext;
> import org.apache.axis.transport.http.HTTPConstants;
> import org.apache.lucene.analysis.SimpleAnalyzer;
> import org.apache.lucene.document.Document;
> import org.apache.lucene.document.Field;
> import org.apache.lucene.document.Field.Index;
> import org.apache.lucene.document.Field.Store;
> import org.apache.lucene.index.IndexModifier;
> import org.apache.lucene.index.IndexWriter;
> import org.apache.lucene.index.Term;
> import org.apache.lucene.queryParser.MultiFieldQueryParser;
> import org.apache.lucene.queryParser.ParseException;
> import org.apache.lucene.search.BooleanClause;
> import org.apache.lucene.search.BooleanQuery;
> import org.apache.lucene.search.Hits;
> import org.apache.lucene.search.IndexSearcher;
> import org.apache.lucene.search.Query;
> import org.apache.lucene.search.RangeQuery;
> import org.apache.lucene.search.Sort;
>
> public class SNewsSearch {
>
>    private static SNewsSearch search = new SNewsSearch();
>
>    private static IndexSearcher indexSearcher;
>
>    private String indexLocation;
>
>    private String adminLogin;
>
>    private String adminPass;
>
>    private SNewsSearch(){
>       System.out.println("Constructor SNewsSearch");
>
>       Properties props = new Properties();
>       HttpServlet srv = (HttpServlet)
>
MessageContext.getCurrentContext().getProperty(HTTPConstants.MC_HTTP_SERVLET);

>       ServletContext context = srv.getServletContext ();
>       InputStream is = context.getResourceAsStream("/WEB-INF/lucene.
> properties");
>       try {
>          props.load(is);
>          System.out.println("Using index: " + props.
> getProperty("index.location"));
>          this.indexLocation = props.getProperty("index.location");
>          this.adminLogin = props.getProperty("admin.login");
>          this.adminPass = props.getProperty("admin.pass");
>       } catch (IOException e) {
>          e.printStackTrace();
>       }
>    }
>
>    public static SNewsSearch getInstance(){
>       return search;
>    }
>
>    public void setIndexLocation(String indexLocation) {
>       this.indexLocation = indexLocation;
>    }
>
>    private static Object lock = new Object();
>
>    public void addDocument(int id,String title, String body, int
> pubDate) throws IOException{
>       Document doc = new Document();
>       doc.add(new Field("id",Integer.toString(id),Store.YES,Index.
> UN_TOKENIZED));
>       doc.add(new Field("title",title,Store.NO,Index.TOKENIZED));
>       doc.add(new Field("body",body,Store.NO,Index.TOKENIZED));
>       doc.add(new Field("pubDate",Integer.toString(pubDate),Store.
> NO,Index.UN_TOKENIZED));
>       synchronized(lock){
>          IndexWriter indexWriter = new IndexWriter(indexLocation, new
> SimpleAnalyzer(), false);
>          indexWriter.addDocument(doc);
>          indexWriter.close();
>          if(indexSearcher!=null)
>             indexSearcher.close();
>          indexSearcher = null;
>       }
>    }
>
>    public Map<String,Object> search(String query, int pubDate, int page,
> int results, boolean reverse) throws IOException, ParseException{
>       MultiFieldQueryParser queryparser = new MultiFieldQueryParser(new
> String[]{"title","body"}, new SimpleAnalyzer());
>       Query q; // = queryparser.parse(query);
>       if(pubDate!=0){
>          BooleanQuery bq = new BooleanQuery();
>          bq.add(queryparser.parse(query),BooleanClause.Occur.MUST);
>          bq.add(new RangeQuery(
>                new Term("pubDate",Integer.toString(pubDate)),
>                new Term("pubDate","99999999"),
>                true),BooleanClause.Occur.MUST);
>          q = bq;
>       } else {
>          q = queryparser.parse(query);
>       }
>       Hits hits;
>       synchronized(lock){
>          if(indexSearcher==null)
>             indexSearcher = new IndexSearcher(indexLocation);
>
>          if(reverse)
>             hits = indexSearcher.search(q, new Sort("pubDate",true));
>          else
>             hits = indexSearcher.search(q);
>       }
>
>       List<Integer> output = new ArrayList<Integer>();
>       int i = page*results;
>       while(i<hits.length()){
>          if(i >= (page+1)*results) break;
>          Document doc = hits.doc(i);
>          output.add(Integer.parseInt(doc.get("id")));
>          i++;
>       }
>       Map<String,Object> searchResult = new HashMap<String,Object>();
>       searchResult.put("numberOfResults", hits.length());
>       searchResult.put("results", output);
>       return searchResult;
>    }
>
>    public void deleteDocument(int id) throws IOException{
>       synchronized(lock){
>          IndexModifier indexModifier = new IndexModifier(indexLocation,
new
> SimpleAnalyzer(), false);
>          indexModifier.deleteDocuments(new Term("id",
Integer.toString(id)));
>          indexModifier.close();
>          if(indexSearcher!=null)
>             indexSearcher.close();
>          indexSearcher = null;
>       }
>    }
>
>    public void setUp(String adminLogin, String adminPass) throws
IOException {
>       if(adminLogin.equals(this.adminLogin) && adminPass.
> equals(this.adminPass)){
>          synchronized(lock){
>             IndexWriter indexWriter = new IndexWriter(indexLocation, new
> SimpleAnalyzer(), true);
>             indexWriter.close();
>             if(indexSearcher!=null)
>                indexSearcher.close();
>             indexSearcher = null;
>          }
>       }
>    }
> }
>
> Best regards,
> Adrian
>
> ---------------------------------------------------------------------
> To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
> For additional commands, e-mail: java-user-help@lucene.apache.org
>


---------------------------------------------------------------------
To unsubscribe, e-mail: java-user-unsubscribe@lucene.apache.org
For additional commands, e-mail: java-user-help@lucene.apache.org