You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucenenet.apache.org by di...@apache.org on 2011/05/29 17:47:21 UTC

[Lucene.Net] svn commit: r1128893 - in /incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch: FacetName.cs GroupName.cs Hits.cs HitsPerGroup.cs README.txt SimpleFacetedSearch.cs SimpleFacetedSearch.csproj

Author: digy
Date: Sun May 29 15:47:21 2011
New Revision: 1128893

URL: http://svn.apache.org/viewvc?rev=1128893&view=rev
Log:
[LUCENENET-415]
* max-facet-count check added (default 2048)
* multi-threaded calculation of facets added
* HitCount is not a performance killer anymore(due to multithreading)

Added:
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/FacetName.cs
Removed:
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/GroupName.cs
Modified:
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/Hits.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/HitsPerGroup.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/README.txt
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.cs
    incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.csproj

Added: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/FacetName.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/FacetName.cs?rev=1128893&view=auto
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/FacetName.cs (added)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/FacetName.cs Sun May 29 15:47:21 2011
@@ -0,0 +1,51 @@
+/* 
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ * 
+ * http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+using System;
+using System.Collections.Generic;
+using System.Linq;
+using System.Text;
+
+namespace Lucene.Net.Search
+{
+    public partial class SimpleFacetedSearch
+    {
+        public class FacetName
+        {
+            string[] _Names;
+            internal FacetName(string[] names)
+            {
+                this._Names = names;
+            }
+
+            public string this[int i]
+            {
+                get { return _Names[i]; }
+            }
+
+            public int Length
+            {
+                get { return _Names.Length; }
+            }
+
+            public override string ToString()
+            {
+                return String.Join("/", _Names);
+            }
+        }
+    }
+}

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/Hits.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/Hits.cs?rev=1128893&r1=1128892&r2=1128893&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/Hits.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/Hits.cs Sun May 29 15:47:21 2011
@@ -27,7 +27,7 @@ namespace Lucene.Net.Search
         public class Hits
         {
             long _TotalHitCount = -1;
-            HitsPerGroup[] _HitsPerGroup;
+            HitsPerFacet[] _HitsPerGroup;
 
             public long TotalHitCount
             {
@@ -45,7 +45,7 @@ namespace Lucene.Net.Search
                 }
             }
 
-            public HitsPerGroup[] HitsPerGroup
+            public HitsPerFacet[] HitsPerFacet
             {
                 get { return _HitsPerGroup; }
                 internal set { _HitsPerGroup = value; }

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/HitsPerGroup.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/HitsPerGroup.cs?rev=1128893&r1=1128892&r2=1128893&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/HitsPerGroup.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/HitsPerGroup.cs Sun May 29 15:47:21 2011
@@ -33,42 +33,48 @@ namespace Lucene.Net.Search
 {
     public partial class SimpleFacetedSearch
     {
-        public class HitsPerGroup : IEnumerable<Document>, IEnumerator<Document>
+        public class HitsPerFacet : IEnumerable<Document>, IEnumerator<Document>
         {
             IndexReader _Reader;
-            int _MaxDocPerGroup;
+            int _MaxDocPerFacet;
             int _ItemsReturned = 0;
             DocIdSetIterator _ResultIterator;
             OpenBitSetDISI _ResultBitSet;
             int _CurrentDocId;
+            DocIdSet _QueryDocidSet;
+            OpenBitSetDISI _GroupBitSet;
 
-            GroupName _GroupName;
+            FacetName _FacetName;
             long _HitCount = -1;
 
-            internal HitsPerGroup(GroupName group, IndexReader reader, DocIdSet queryDocidSet, OpenBitSetDISI groupBitSet, int maxDocPerGroup)
+            internal HitsPerFacet(FacetName facetName, IndexReader reader, DocIdSet queryDocidSet, OpenBitSetDISI groupBitSet, int maxDocPerFacet)
             {
-                this._GroupName = group;
+                this._FacetName = facetName;
                 this._Reader = reader;
-                this._MaxDocPerGroup = maxDocPerGroup;
+                this._MaxDocPerFacet = maxDocPerFacet;
+                this._QueryDocidSet = queryDocidSet;
+                this._GroupBitSet = groupBitSet;
+                
+            }
 
-                _ResultBitSet = new OpenBitSetDISI(queryDocidSet.Iterator(), _Reader.MaxDoc());
-                _ResultBitSet.And(groupBitSet);
+            internal void Calculate()
+            {
+                _ResultBitSet = new OpenBitSetDISI(_QueryDocidSet.Iterator(), _Reader.MaxDoc());
+                _ResultBitSet.And(_GroupBitSet);
 
                 _ResultIterator = _ResultBitSet.Iterator();
+
+                _HitCount = _ResultBitSet.Cardinality();
             }
 
-            public GroupName Name
+            public FacetName Name
             {
-                get { return _GroupName; }
+                get { return _FacetName; }
             }
 
             public long HitCount
             {
-                get
-                {
-                    if (_HitCount == -1) _HitCount = _ResultBitSet.Cardinality();
-                    return _HitCount;
-                }
+                get{ return _HitCount; }
             }
 
             public Document Current
@@ -84,7 +90,7 @@ namespace Lucene.Net.Search
             public bool MoveNext()
             {
                 _CurrentDocId = _ResultIterator.NextDoc();
-                return _CurrentDocId != DocIdSetIterator.NO_MORE_DOCS && ++_ItemsReturned <= _MaxDocPerGroup;
+                return _CurrentDocId != DocIdSetIterator.NO_MORE_DOCS && ++_ItemsReturned <= _MaxDocPerFacet;
             }
 
             public IEnumerator<Document> GetEnumerator()
@@ -107,7 +113,7 @@ namespace Lucene.Net.Search
 
             }
 
-            public HitsPerGroup Documents
+            public HitsPerFacet Documents
             {
                 get { return this; }
             }

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/README.txt
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/README.txt?rev=1128893&r1=1128892&r2=1128893&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/README.txt (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/README.txt Sun May 29 15:47:21 2011
@@ -7,16 +7,21 @@ Sample Usage:
 
     Query query = new QueryParser(Lucene.Net.Util.Version.LUCENE_29, field, analyzer).Parse(searchString);
     SimpleFacetedSearch.Hits hits = sfs.Search(query, 10);
-       
-    foreach (SimpleFacetedSearch.HitsPerGroup hpg in hits.HitsPerGroup)
+    
+	long totalHits = hits.TotalHitCount;
+	
+    foreach (SimpleFacetedSearch.HitsPerFacet hpf in hits.HitsPerFacet)
     {
-        SimpleFacetedSearch.GroupName name = hpg.Name;
-        foreach (Document doc in hpg.Documents)
+		long hitCountPerFacet = hpf.HitCount;
+        SimpleFacetedSearch.FacetName name = hpf.Name;
+		//name[0] 
+		//name[1]
+		//name.ToString()
+		
+        foreach (Document doc in hpf.Documents)
         {
              ........
         }
     }
 
 
-
-PS: Hits.TotalHitCount & HitsPerGroup.HitCount properties are costly operations. Try to avoid using them if possible.

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.cs
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.cs?rev=1128893&r1=1128892&r2=1128893&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.cs (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.cs Sun May 29 15:47:21 2011
@@ -28,6 +28,7 @@ using Lucene.Net.Search;
 using Lucene.Net.QueryParsers;
 using Lucene.Net.Store;
 using Lucene.Net.Util;
+using System.Threading;
 
 /*
  Suppose, we want a faceted search on fields f1 f2 f3, 
@@ -58,9 +59,11 @@ namespace Lucene.Net.Search
     public partial class SimpleFacetedSearch : IDisposable
     {
         public const int DefaultMaxDocPerGroup = 25;
+        public static int MAX_FACETS = 2048;
 
         IndexReader _Reader;
         List<KeyValuePair<List<string>, OpenBitSetDISI>> _Groups = new List<KeyValuePair<List<string>, OpenBitSetDISI>>();
+        Semaphore _Sync;
 
         public SimpleFacetedSearch(IndexReader reader, string groupByField) : this(reader, new string[] { groupByField })
         {
@@ -76,10 +79,13 @@ namespace Lucene.Net.Search
             //f1 = A, B
             //f2 = I, J
             //f3 = 1, 2, 3
+            int maxFacets = 1;
             List<List<string>> inputToCP = new List<List<string>>();
             foreach (string field in groupByFields)
             {
                 FieldValuesBitSets f = new FieldValuesBitSets(reader, field);
+                maxFacets *= f.FieldValueBitSetPair.Count;
+                if (maxFacets > MAX_FACETS) throw new Exception("Facet count exceeded " + MAX_FACETS);
                 fieldValuesBitSets.Add(f);
                 inputToCP.Add(f.FieldValueBitSetPair.Keys.ToList());
             }
@@ -114,22 +120,39 @@ namespace Lucene.Net.Search
             }
 
             //Now _Groups has 7 rows (as <List<string>, BitSet> pairs) 
+
+           if( _Groups.Count>0)  _Sync = new Semaphore(_Groups.Count, _Groups.Count);
         }
 
+        
         public Hits Search(Query query, int maxDocPerGroup = DefaultMaxDocPerGroup)
         {
-            List<HitsPerGroup> hitsPerGroup = new List<HitsPerGroup>();
+            List<HitsPerFacet> hitsPerGroup = new List<HitsPerFacet>();
 
             DocIdSet queryDocidSet = new CachingWrapperFilter(new QueryWrapperFilter(query)).GetDocIdSet(_Reader);
-
+                        
             for (int i = 0; i < _Groups.Count; i++)
             {
-                HitsPerGroup h = new HitsPerGroup(new GroupName(_Groups[i].Key.ToArray()), _Reader, queryDocidSet, _Groups[i].Value, maxDocPerGroup);
+                HitsPerFacet h = new HitsPerFacet(new FacetName(_Groups[i].Key.ToArray()), _Reader, queryDocidSet, _Groups[i].Value, maxDocPerGroup);
                 hitsPerGroup.Add(h);
+                _Sync.WaitOne();
+                ThreadPool.QueueUserWorkItem(
+                    hpf =>
+                    {
+                        ((HitsPerFacet)hpf).Calculate();
+                        _Sync.Release();
+                    },
+                    h
+                );
             }
 
+            for (int i = 0; i < _Groups.Count; i++)
+                _Sync.WaitOne();
+            
+            if (_Groups.Count > 0) _Sync.Release(_Groups.Count);
+                        
             Hits hits = new Hits();
-            hits.HitsPerGroup = hitsPerGroup.ToArray();
+            hits.HitsPerFacet = hitsPerGroup.ToArray();
 
             return hits;
         }

Modified: incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.csproj
URL: http://svn.apache.org/viewvc/incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.csproj?rev=1128893&r1=1128892&r2=1128893&view=diff
==============================================================================
--- incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.csproj (original)
+++ incubator/lucene.net/branches/Lucene.Net_2_9_4g/src/contrib/SimpleFacetedSearch/SimpleFacetedSearch.csproj Sun May 29 15:47:21 2011
@@ -42,7 +42,7 @@
   <ItemGroup>
     <Compile Include="Extensions.cs" />
     <Compile Include="FieldValuesBitSets.cs" />
-    <Compile Include="GroupName.cs" />
+    <Compile Include="FacetName.cs" />
     <Compile Include="Hits.cs" />
     <Compile Include="HitsPerGroup.cs" />
     <Compile Include="Properties\AssemblyInfo.cs" />