You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/05/29 15:09:41 UTC

svn commit: r1487471 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/facet/ lucene/facet/src/java/org/apache/lucene/facet/search/ lucene/facet/src/test/org/apache/lucene/facet/search/

Author: shaie
Date: Wed May 29 13:09:41 2013
New Revision: 1487471

URL: http://svn.apache.org/r1487471
Log:
LUCENE-5022: Add FacetResult.mergeHierarchies

Added:
    lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java
      - copied unchanged from r1487465, lucene/dev/trunk/lucene/facet/src/test/org/apache/lucene/facet/search/FacetResultTest.java
Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/facet/   (props changed)
    lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java
    lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1487471&r1=1487470&r2=1487471&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Wed May 29 13:09:41 2013
@@ -129,6 +129,10 @@ New Features
 * LUCENE-4975: Added a new Replicator module which can replicate index 
   revisions between server and client. (Shai Erera, Mike McCandless)
 
+* LUCENE-5022: Added FacetResult.mergeHierarchies to merge multiple
+  FacetResult of the same dimension into a single one with the reconstructed
+  hierarchy. (Shai Erera)
+  
 Build
 
 * LUCENE-4987: Upgrade randomized testing to version 2.0.10: 

Modified: lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java?rev=1487471&r1=1487470&r2=1487471&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/search/FacetResult.java Wed May 29 13:09:41 2013
@@ -1,5 +1,16 @@
 package org.apache.lucene.facet.search;
 
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.lucene.facet.taxonomy.CategoryPath;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.util.CollectionUtil;
+
 /*
  * Licensed to the Apache Software Foundation (ASF) under one or more
  * contributor license agreements.  See the NOTICE file distributed with
@@ -24,6 +35,140 @@ package org.apache.lucene.facet.search;
  */
 public class FacetResult {
   
+  private static FacetResultNode addIfNotExist(Map<CategoryPath, FacetResultNode> nodes, FacetResultNode node) {
+    FacetResultNode n = nodes.get(node.label);
+    if (n == null) {
+      nodes.put(node.label, node);
+      n = node;
+    }
+    return n;
+  }
+
+  /**
+   * A utility for merging multiple {@link FacetResult} of the same
+   * (hierarchical) dimension into a single {@link FacetResult}, to reconstruct
+   * the hierarchy. The results are merged according to the following rules:
+   * <ul>
+   * <li>If two results share the same dimension (first component in their
+   * {@link CategoryPath}), they are merged.
+   * <li>If a result is missing ancestors in the other results, e.g. A/B/C but
+   * no corresponding A or A/B, these nodes are 'filled' with their label,
+   * ordinal and value (obtained from the respective {@link FacetArrays}).
+   * <li>If a result does not share a dimension with other results, it is
+   * returned as is.
+   * </ul>
+   * <p>
+   * <b>NOTE:</b> the returned results are not guaranteed to be in the same
+   * order of the input ones.
+   * 
+   * @param results
+   *          the results to merge
+   * @param taxoReader
+   *          the {@link TaxonomyReader} to use when creating missing ancestor
+   *          nodes
+   * @param dimArrays
+   *          a mapping from a dimension to the respective {@link FacetArrays}
+   *          from which to pull the nodes values
+   */
+  public static List<FacetResult> mergeHierarchies(List<FacetResult> results, TaxonomyReader taxoReader,
+      Map<String, FacetArrays> dimArrays) throws IOException {
+    final Map<String, List<FacetResult>> dims = new HashMap<String,List<FacetResult>>();
+    for (FacetResult fr : results) {
+      String dim = fr.getFacetRequest().categoryPath.components[0];
+      List<FacetResult> frs = dims.get(dim);
+      if (frs == null) {
+        frs = new ArrayList<FacetResult>();
+        dims.put(dim, frs);
+      }
+      frs.add(fr);
+    }
+
+    final List<FacetResult> res = new ArrayList<FacetResult>();
+    for (List<FacetResult> frs : dims.values()) {
+      FacetResult mergedResult = frs.get(0);
+      if (frs.size() > 1) {
+        CollectionUtil.introSort(frs, new Comparator<FacetResult>() {
+          @Override
+          public int compare(FacetResult fr1, FacetResult fr2) {
+            return fr1.getFacetRequest().categoryPath.compareTo(fr2.getFacetRequest().categoryPath);
+          }
+        });
+        Map<CategoryPath, FacetResultNode> mergedNodes = new HashMap<CategoryPath,FacetResultNode>();
+        FacetArrays arrays = dimArrays != null ? dimArrays.get(frs.get(0).getFacetRequest().categoryPath.components[0]) : null;
+        for (FacetResult fr : frs) {
+          FacetResultNode frn = fr.getFacetResultNode();
+          FacetResultNode merged = mergedNodes.get(frn.label);
+          if (merged == null) {
+            CategoryPath parent = frn.label.subpath(frn.label.length - 1);
+            FacetResultNode childNode = frn;
+            FacetResultNode parentNode = null;
+            while (parent.length > 0 && (parentNode = mergedNodes.get(parent)) == null) {
+              int parentOrd = taxoReader.getOrdinal(parent);
+              double parentValue = arrays != null ? fr.getFacetRequest().getValueOf(arrays, parentOrd) : -1;
+              parentNode = new FacetResultNode(parentOrd, parentValue);
+              parentNode.label = parent;
+              parentNode.subResults = new ArrayList<FacetResultNode>();
+              parentNode.subResults.add(childNode);
+              mergedNodes.put(parent, parentNode);
+              childNode = parentNode;
+              parent = parent.subpath(parent.length - 1);
+            }
+
+            // at least one parent was added, so link the final (existing)
+            // parent with the child
+            if (parent.length > 0) {
+              if (!(parentNode.subResults instanceof ArrayList)) {
+                parentNode.subResults = new ArrayList<FacetResultNode>(parentNode.subResults);
+              }
+              parentNode.subResults.add(childNode);
+            }
+
+            // for missing FRNs, add new ones with label and value=-1
+            // first time encountered this label, add it and all its children to
+            // the map.
+            mergedNodes.put(frn.label, frn);
+            for (FacetResultNode child : frn.subResults) {
+              addIfNotExist(mergedNodes, child);
+            }
+          } else {
+            if (!(merged.subResults instanceof ArrayList)) {
+              merged.subResults = new ArrayList<FacetResultNode>(merged.subResults);
+            }
+            for (FacetResultNode sub : frn.subResults) {
+              // make sure sub wasn't already added
+              sub = addIfNotExist(mergedNodes, sub);
+              if (!merged.subResults.contains(sub)) {
+                merged.subResults.add(sub);
+              }
+            }
+          }
+        }
+        
+        // find the 'first' node to put on the FacetResult root
+        CategoryPath min = null;
+        for (CategoryPath cp : mergedNodes.keySet()) {
+          if (min == null || cp.compareTo(min) < 0) {
+            min = cp;
+          }
+        }
+        FacetRequest dummy = new FacetRequest(min, frs.get(0).getFacetRequest().numResults) {
+          @Override
+          public double getValueOf(FacetArrays arrays, int idx) {
+            throw new UnsupportedOperationException("not supported by this request");
+          }
+          
+          @Override
+          public FacetArraysSource getFacetArraysSource() {
+            throw new UnsupportedOperationException("not supported by this request");
+          }
+        };
+        mergedResult = new FacetResult(dummy, mergedNodes.get(min), -1);
+      }
+      res.add(mergedResult);
+    }
+    return res;
+  }
+
   private final FacetRequest facetRequest;
   private final FacetResultNode rootNode;
   private final int numValidDescendants;

Modified: lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java?rev=1487471&r1=1487470&r2=1487471&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/search/FacetRequestTest.java Wed May 29 13:09:41 2013
@@ -23,7 +23,7 @@ import org.junit.Test;
  */
 
 public class FacetRequestTest extends FacetTestCase {
-
+  
   @Test(expected=IllegalArgumentException.class)
   public void testIllegalNumResults() throws Exception {
     assertNotNull(new CountFacetRequest(new CategoryPath("a", "b"), 0));
@@ -33,7 +33,7 @@ public class FacetRequestTest extends Fa
   public void testIllegalCategoryPath() throws Exception {
     assertNotNull(new CountFacetRequest(null, 1));
   }
-
+  
   @Test
   public void testHashAndEquals() {
     CountFacetRequest fr1 = new CountFacetRequest(new CategoryPath("a"), 8);