You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@lucenenet.apache.org by Seán McDonnell <se...@gmail.com> on 2019/02/19 20:50:50 UTC
No matches for TermQuery with exact search team
Hi all,
I have been working on my first project with Lucene.net and I have a
problem I have spent a couple of days working on (actually a week now).
I have an e-commerce system and I have an implementation that uses a
straightforward custom analyser:
public class CaseInsensitiveWhiteSpaceAnalyser : Analyzer{
protected override TokenStreamComponents CreateComponents(string
fieldName, TextReader reader)
{
var tokenizer = new WhitespaceTokenizer(LuceneVersion.LUCENE_48, reader);
var lowercaseFilter = new LowerCaseFilter(LuceneVersion.LUCENE_48, tokenizer);
return new TokenStreamComponents(tokenizer, lowercaseFilter);
}}
The index is constructed and items are added to the index like:
protected internal override void BuildIndex(){
Analyzer analyzer = new CaseInsensitiveWhiteSpaceAnalyser();
var writer = new IndexWriter(FSDirectory.Open(IndexPath), new
IndexWriterConfig(LuceneVersion.LUCENE_48, analyzer)
{
OpenMode = OpenMode.CREATE_OR_APPEND
});
var products = IndexItems;
var totalOrderCount = Convert.ToSingle(IndexItems.Sum(x => x.OrderCount));
foreach (var product in products)
{
AddDocToIndex(writer, product, totalOrderCount);
}
writer.Flush(true, true);
writer.Commit();
writer.Dispose();}
protected internal override void AddDocToIndex(IndexWriter writer,
ProductSearchDto product, float totalOrderCount){
var productOrderCountWeighting = Convert.ToSingle(product.OrderCount)
/ totalOrderCount * 100.0f;
var productIdField = new Field("ProductId", product.ProductId, new FieldType
{
IsStored = true,
IsIndexed = true,
IsTokenized = false
})
{
Boost = 10.0f * productOrderCountWeighting
};
var productNameField = new Field("ProductName", product.ProductName,
new FieldType
{
IsStored = true,
IsIndexed = true,
IsTokenized = true
});
productNameField.Boost = 8.0f * productOrderCountWeighting;
var productDescriptionField = new Field("Description",
!string.IsNullOrEmpty(product.Description) ?
product.Description.ToLower() : "", new FieldType
{
IsStored = true,
IsIndexed = false,
IsTokenized = false
});
var productLargeImageUrlField = new Field("LargeImageUrl",
product.LargeImageUrl, new FieldType
{
IsStored = true,
IsIndexed = false,
IsTokenized = false
});
var keywordFields = new List<Field>();
foreach (var keyword in product.ProductKeywords)
{
var keywordField = new Field("Keywords", keyword.Keyword, new FieldType
{
IsStored = true,
IsIndexed = true,
IsTokenized = false
});
keywordFields.Add(keywordField);
}
var doc = new Document();
doc.Fields.Add(productIdField);
doc.Fields.Add(productNameField);
doc.Fields.Add(productDescriptionField);
doc.Fields.Add(productLargeImageUrlField);
foreach (var keywordField in keywordFields)
{
doc.Fields.Add(keywordField);
}
if (writer.Config.OpenMode == OpenMode.CREATE)
writer.AddDocument(doc);
else
writer.UpdateDocument(new Term("ProductId", product.ProductId), doc);}
It applies 4 types of Lucene queries to a single search (Term, Wildcard,
Fuzzy and Keyword).
I am having a problem with only the TermQuery implementation:
internal class TermQueryHandler : QueryHandler{
private readonly IndexSearcher manager;
public TermQueryHandler(IndexSearcher manager) : base(manager)
{
this.manager = manager;
}
public override TopDocs HandleQuery(string searchTerm, string
categoryId, int? recordCount)
{
var searchTerms = searchTerm.Split(new string[] { " " },
StringSplitOptions.RemoveEmptyEntries);
var query = new BooleanQuery();
foreach (var search in searchTerms)
{
query.Add(new TermQuery(new Term("ProductName", search)), Occur.MUST);
query.Boost = 10.0f;
}
var results = manager.Search(query, recordCount ?? 10);
return results;
}}
If I search for an exact product name/term such as "*SMS - Request
International*" I am getting no matches in the TermQuery search.
This is the method that is the entry point in to the whole Lucene
implementation I have:
//Removed some of the noise at the start and end of this method to try
and make it easier to read.public override
IList<ProductSearchResponseDto> Search(string search, string
categoryId, int? recordCount){
if (!DirectoryReader.IndexExists(FSDirectory.Open(IndexPath)))
ConstructItemsIndex();
var docs = new List<ScoreDoc>();
search = QueryParser.Escape(search);
if (string.IsNullOrEmpty(categoryId) || categoryId.Equals("all",
StringComparison.OrdinalIgnoreCase))
{
categoryId = null;
}
var searchManager = new SearcherManager(FSDirectory.Open(IndexPath), null);
searchManager.MaybeRefreshBlocking();
var searcher = searchManager.Acquire();
var termHandler = new TermQueryHandler(searcher);
var fuzzyHandler = new FuzzyQueryHandler(searcher);
var wildcardHandler = new WildcardQueryHandler(searcher);
var keywordHandler = new KeywordQueryHandler(searcher);
var termResults = termHandler.HandleQuery(search, categoryId, recordCount);
docs.AddRange(termResults.ScoreDocs);
var wildcardResults = wildcardHandler.HandleQuery(search, categoryId,
recordCount);
docs.AddRange(wildcardResults.ScoreDocs);
var keywordResults = keywordHandler.HandleQuery(search, categoryId,
recordCount);
docs.AddRange(keywordResults.ScoreDocs);
if (docs.Count() == 0)
{
var fuzzyResults = fuzzyHandler.HandleQuery(search, categoryId, recordCount);
docs.AddRange(fuzzyResults.ScoreDocs);
}}
Any ideas would be great as I have tried (for days) and am getting no
closer I feel.
Many thanks if you made it this far.
Seán