You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by di...@apache.org on 2011/05/29 17:47:21 UTC
[Lucene.Net] svn commit: r1128893 - in
/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch:
FacetName.cs GroupName.cs Hits.cs HitsPerGroup.cs README.txt
SimpleFacetedSearch.cs SimpleFacetedSearch.csproj
Author: digy
Date: Sun May 29 15:47:21 2011
New Revision: 1128893
URL: http://svn.apache.org/viewvc?rev=1128893&view=rev
Log:
[LUCENENET-415]
* max-facet-count check added (default 2048)
* multi-threaded calculation of facets added
* HitCount is not a performance killer anymore(due to multithreading)
Added:
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/FacetName.cs
Removed:
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/GroupName.cs
Modified:
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/Hits.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/HitsPerGroup.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/README.txt
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.cs
incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.csproj
Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/FacetName.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/FacetName.cs?rev=1128893&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/FacetName.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/FacetName.cs Sun May 29 15:47:21 2011
@@ -0,0 +1,51 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Search
+{
+ public partial class SimpleFacetedSearch
+ {
+ public class FacetName
+ {
+ string[] _Names;
+ internal FacetName(string[] names)
+ {
+ this._Names = names;
+ }
+
+ public string this[int i]
+ {
+ get { return _Names[i]; }
+ }
+
+ public int Length
+ {
+ get { return _Names.Length; }
+ }
+
+ public override string ToString()
+ {
+ return String.Join("/", _Names);
+ }
+ }
+ }
+}
Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/Hits.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/Hits.cs?rev=1128893&r1=1128892&r2=1128893&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/Hits.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/Hits.cs Sun May 29 15:47:21 2011
@@ -27,7 +27,7 @@ namespace Lucene.Net.Search
public class Hits
{
long _TotalHitCount = -1;
- HitsPerGroup[] _HitsPerGroup;
+ HitsPerFacet[] _HitsPerGroup;
public long TotalHitCount
{
@@ -45,7 +45,7 @@ namespace Lucene.Net.Search
}
}
- public HitsPerGroup[] HitsPerGroup
+ public HitsPerFacet[] HitsPerFacet
{
get { return _HitsPerGroup; }
internal set { _HitsPerGroup = value; }
Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/HitsPerGroup.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/HitsPerGroup.cs?rev=1128893&r1=1128892&r2=1128893&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/HitsPerGroup.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/HitsPerGroup.cs Sun May 29 15:47:21 2011
@@ -33,42 +33,48 @@ namespace Lucene.Net.Search
{
public partial class SimpleFacetedSearch
{
- public class HitsPerGroup : IEnumerable<Document>, IEnumerator<Document>
+ public class HitsPerFacet : IEnumerable<Document>, IEnumerator<Document>
{
IndexReader _Reader;
- int _MaxDocPerGroup;
+ int _MaxDocPerFacet;
int _ItemsReturned = 0;
DocIdSetIterator _ResultIterator;
OpenBitSetDISI _ResultBitSet;
int _CurrentDocId;
+ DocIdSet _QueryDocidSet;
+ OpenBitSetDISI _GroupBitSet;
- GroupName _GroupName;
+ FacetName _FacetName;
long _HitCount = -1;
- internal HitsPerGroup(GroupName group, IndexReader reader, DocIdSet queryDocidSet, OpenBitSetDISI groupBitSet, int maxDocPerGroup)
+ internal HitsPerFacet(FacetName facetName, IndexReader reader, DocIdSet queryDocidSet, OpenBitSetDISI groupBitSet, int maxDocPerFacet)
{
- this._GroupName = group;
+ this._FacetName = facetName;
this._Reader = reader;
- this._MaxDocPerGroup = maxDocPerGroup;
+ this._MaxDocPerFacet = maxDocPerFacet;
+ this._QueryDocidSet = queryDocidSet;
+ this._GroupBitSet = groupBitSet;
+
+ }
- _ResultBitSet = new OpenBitSetDISI(queryDocidSet.Iterator(), _Reader.MaxDoc());
- _ResultBitSet.And(groupBitSet);
+ internal void Calculate()
+ {
+ _ResultBitSet = new OpenBitSetDISI(_QueryDocidSet.Iterator(), _Reader.MaxDoc());
+ _ResultBitSet.And(_GroupBitSet);
_ResultIterator = _ResultBitSet.Iterator();
+
+ _HitCount = _ResultBitSet.Cardinality();
}
- public GroupName Name
+ public FacetName Name
{
- get { return _GroupName; }
+ get { return _FacetName; }
}
public long HitCount
{
- get
- {
- if (_HitCount == -1) _HitCount = _ResultBitSet.Cardinality();
- return _HitCount;
- }
+ get{ return _HitCount; }
}
public Document Current
@@ -84,7 +90,7 @@ namespace Lucene.Net.Search
public bool MoveNext()
{
_CurrentDocId = _ResultIterator.NextDoc();
- return _CurrentDocId != DocIdSetIterator.NO_MORE_DOCS && ++_ItemsReturned <= _MaxDocPerGroup;
+ return _CurrentDocId != DocIdSetIterator.NO_MORE_DOCS && ++_ItemsReturned <= _MaxDocPerFacet;
}
public IEnumerator<Document> GetEnumerator()
@@ -107,7 +113,7 @@ namespace Lucene.Net.Search
}
- public HitsPerGroup Documents
+ public HitsPerFacet Documents
{
get { return this; }
}
Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/README.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/README.txt?rev=1128893&r1=1128892&r2=1128893&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/README.txt (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/README.txt Sun May 29 15:47:21 2011
@@ -7,16 +7,21 @@ Sample Usage:
Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, field, analyzer).Parse(searchString);
SimpleFacetedSearch.Hits hits = sfs.Search(query, 10);
-
- foreach (SimpleFacetedSearch.HitsPerGroup hpg in hits.HitsPerGroup)
+
+ long totalHits = hits.TotalHitCount;
+
+ foreach (SimpleFacetedSearch.HitsPerFacet hpf in hits.HitsPerFacet)
{
- SimpleFacetedSearch.GroupName name = hpg.Name;
- foreach (Document doc in hpg.Documents)
+ long hitCountPerFacet = hpf.HitCount;
+ SimpleFacetedSearch.FacetName name = hpf.Name;
+ //name[0]
+ //name[1]
+ //name.ToString()
+
+ foreach (Document doc in hpf.Documents)
{
........
}
}
-
-PS: Hits.TotalHitCount & HitsPerGroup.HitCount properties are costly operations. Try to avoid using them if possible.
Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.cs?rev=1128893&r1=1128892&r2=1128893&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.cs Sun May 29 15:47:21 2011
@@ -28,6 +28,7 @@ using Lucene.Net.Search;
using Lucene.Net.QueryParsers;
using Lucene.Net.Store;
using Lucene.Net.Util;
+using System.Threading;
/*
Suppose, we want a faceted search on fields f1 f2 f3,
@@ -58,9 +59,11 @@ namespace Lucene.Net.Search
public partial class SimpleFacetedSearch : IDisposable
{
public const int DefaultMaxDocPerGroup = 25;
+ public static int MAX_FACETS = 2048;
IndexReader _Reader;
List<KeyValuePair<List<string>, OpenBitSetDISI>> _Groups = new List<KeyValuePair<List<string>, OpenBitSetDISI>>();
+ Semaphore _Sync;
public SimpleFacetedSearch(IndexReader reader, string groupByField) : this(reader, new string[] { groupByField })
{
@@ -76,10 +79,13 @@ namespace Lucene.Net.Search
//f1 = A, B
//f2 = I, J
//f3 = 1, 2, 3
+ int maxFacets = 1;
List<List<string>> inputToCP = new List<List<string>>();
foreach (string field in groupByFields)
{
FieldValuesBitSets f = new FieldValuesBitSets(reader, field);
+ maxFacets *= f.FieldValueBitSetPair.Count;
+ if (maxFacets > MAX_FACETS) throw new Exception("Facet count exceeded " + MAX_FACETS);
fieldValuesBitSets.Add(f);
inputToCP.Add(f.FieldValueBitSetPair.Keys.ToList());
}
@@ -114,22 +120,39 @@ namespace Lucene.Net.Search
}
//Now _Groups has 7 rows (as <List<string>, BitSet> pairs)
+
+ if( _Groups.Count>0) _Sync = new Semaphore(_Groups.Count, _Groups.Count);
}
+
public Hits Search(Query query, int maxDocPerGroup = DefaultMaxDocPerGroup)
{
- List<HitsPerGroup> hitsPerGroup = new List<HitsPerGroup>();
+ List<HitsPerFacet> hitsPerGroup = new List<HitsPerFacet>();
DocIdSet queryDocidSet = new CachingWrapperFilter(new QueryWrapperFilter(query)).GetDocIdSet(_Reader);
-
+
for (int i = 0; i < _Groups.Count; i++)
{
- HitsPerGroup h = new HitsPerGroup(new GroupName(_Groups[i].Key.ToArray()), _Reader, queryDocidSet, _Groups[i].Value, maxDocPerGroup);
+ HitsPerFacet h = new HitsPerFacet(new FacetName(_Groups[i].Key.ToArray()), _Reader, queryDocidSet, _Groups[i].Value, maxDocPerGroup);
hitsPerGroup.Add(h);
+ _Sync.WaitOne();
+ ThreadPool.QueueUserWorkItem(
+ hpf =>
+ {
+ ((HitsPerFacet)hpf).Calculate();
+ _Sync.Release();
+ },
+ h
+ );
}
+ for (int i = 0; i < _Groups.Count; i++)
+ _Sync.WaitOne();
+
+ if (_Groups.Count > 0) _Sync.Release(_Groups.Count);
+
Hits hits = new Hits();
- hits.HitsPerGroup = hitsPerGroup.ToArray();
+ hits.HitsPerFacet = hitsPerGroup.ToArray();
return hits;
}
Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.csproj?rev=1128893&r1=1128892&r2=1128893&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.csproj (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.csproj Sun May 29 15:47:21 2011
@@ -42,7 +42,7 @@
<ItemGroup>
<Compile Include="Extensions.cs" />
<Compile Include="FieldValuesBitSets.cs" />
- <Compile Include="GroupName.cs" />
+ <Compile Include="FacetName.cs" />
<Compile Include="Hits.cs" />
<Compile Include="HitsPerGroup.cs" />
<Compile Include="Properties\AssemblyInfo.cs" />