You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@lucene.apache.org by sh...@apache.org on 2013/04/04 22:50:22 UTC

svn commit: r1464743 - in /lucene/dev/branches/branch_4x: ./ lucene/ lucene/facet/ lucene/facet/src/java/org/apache/lucene/facet/taxonomy/ lucene/facet/src/java/org/apache/lucene/facet/util/ lucene/facet/src/test/org/apache/lucene/facet/taxonomy/direct...

Author: shaie
Date: Thu Apr  4 20:50:22 2013
New Revision: 1464743

URL: http://svn.apache.org/r1464743
Log:
LUCENE-4897: add a sugar API for traversing categories

Modified:
    lucene/dev/branches/branch_4x/   (props changed)
    lucene/dev/branches/branch_4x/lucene/   (props changed)
    lucene/dev/branches/branch_4x/lucene/CHANGES.txt   (contents, props changed)
    lucene/dev/branches/branch_4x/lucene/facet/   (props changed)
    lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java
    lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/util/PrintTaxonomyStats.java
    lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java

Modified: lucene/dev/branches/branch_4x/lucene/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/CHANGES.txt?rev=1464743&r1=1464742&r2=1464743&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/CHANGES.txt (original)
+++ lucene/dev/branches/branch_4x/lucene/CHANGES.txt Thu Apr  4 20:50:22 2013
@@ -135,6 +135,9 @@ New Features
 * LUCENE-4905: Made the maxPassages parameter per-field in PostingsHighlighter.
   (Robert Muir)
 
+* LUCENE-4897: Added TaxonomyReader.getChildren for traversing a category's 
+  children. (Shai Erera)
+
 Optimizations
 
 * LUCENE-4839: SorterTemplate.merge can now be overridden in order to replace

Modified: lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java?rev=1464743&r1=1464742&r2=1464743&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/taxonomy/TaxonomyReader.java Thu Apr  4 20:50:22 2013
@@ -65,6 +65,31 @@ import org.apache.lucene.store.AlreadyCl
  */
 public abstract class TaxonomyReader implements Closeable {
   
+  /** An iterator over a category's children. */
+  public static class ChildrenIterator {
+    
+    private final int[] siblings;
+    private int child;
+    
+    ChildrenIterator(int child, int[] siblings) {
+      this.siblings = siblings;
+      this.child = child;
+    }
+
+    /**
+     * Return the next child ordinal, or {@link TaxonomyReader#INVALID_ORDINAL}
+     * if no more children.
+     */
+    public int next() {
+      int res = child;
+      if (child != TaxonomyReader.INVALID_ORDINAL) {
+        child = siblings[child];
+      }
+      return res;
+    }
+    
+  }
+  
   /**
    * The root category (the category with the empty path) always has the ordinal
    * 0, to which we give a name ROOT_ORDINAL. {@link #getOrdinal(CategoryPath)}
@@ -167,6 +192,13 @@ public abstract class TaxonomyReader imp
    */
   public abstract ParallelTaxonomyArrays getParallelTaxonomyArrays() throws IOException;
   
+  /** Returns an iterator over the children of the given ordinal. */
+  public ChildrenIterator getChildren(final int ordinal) throws IOException {
+    ParallelTaxonomyArrays arrays = getParallelTaxonomyArrays();
+    int child = ordinal >= 0 ? arrays.children()[ordinal] : INVALID_ORDINAL;
+    return new ChildrenIterator(child, arrays.siblings());
+  }
+  
   /**
    * Retrieve user committed data.
    * 

Modified: lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/util/PrintTaxonomyStats.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/util/PrintTaxonomyStats.java?rev=1464743&r1=1464742&r2=1464743&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/util/PrintTaxonomyStats.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/java/org/apache/lucene/facet/util/PrintTaxonomyStats.java Thu Apr  4 20:50:22 2013
@@ -22,8 +22,8 @@ import java.io.IOException;
 import java.io.PrintStream;
 
 import org.apache.lucene.facet.taxonomy.CategoryPath;
-import org.apache.lucene.facet.taxonomy.ParallelTaxonomyArrays;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator;
 import org.apache.lucene.facet.taxonomy.directory.DirectoryTaxonomyReader;
 import org.apache.lucene.store.Directory;
 import org.apache.lucene.store.FSDirectory;
@@ -55,45 +55,40 @@ public class PrintTaxonomyStats {
   }
 
   public static void printStats(TaxonomyReader r, PrintStream out, boolean printTree) throws IOException {
-    ParallelTaxonomyArrays arrays = r.getParallelTaxonomyArrays();
-    //int[] parents = arrays.parents();
-    int[] children = arrays.children();
-    int[] siblings = arrays.siblings();
     out.println(r.getSize() + " total categories.");
 
-    int childOrd = children[TaxonomyReader.ROOT_ORDINAL];
-    while(childOrd != -1) {
-      CategoryPath cp = r.getPath(childOrd);
-      int childOrd2 = children[childOrd];
+    ChildrenIterator it = r.getChildren(TaxonomyReader.ROOT_ORDINAL);
+    int child;
+    while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
+      ChildrenIterator chilrenIt = r.getChildren(child);
       int numImmediateChildren = 0;
-      while(childOrd2 != -1) {
+      while (chilrenIt.next() != TaxonomyReader.INVALID_ORDINAL) {
         numImmediateChildren++;
-        childOrd2 = siblings[childOrd2];
       }
-      out.println("/" + cp + ": " + numImmediateChildren + " immediate children; " + (1+countAllChildren(r, childOrd, children, siblings)) + " total categories");
+      CategoryPath cp = r.getPath(child);
+      out.println("/" + cp + ": " + numImmediateChildren + " immediate children; " + (1+countAllChildren(r, child)) + " total categories");
       if (printTree) {
-        printAllChildren(out, r, childOrd, children, siblings, "  ", 1);
+        printAllChildren(out, r, child, "  ", 1);
       }
-      childOrd = siblings[childOrd];
     }
   }
 
-  private static int countAllChildren(TaxonomyReader r, int ord, int[] children, int[] siblings) throws IOException {
-    int childOrd = children[ord];
+  private static int countAllChildren(TaxonomyReader r, int ord) throws IOException {
     int count = 0;
-    while(childOrd != -1) {
-      count += 1+countAllChildren(r, childOrd, children, siblings);
-      childOrd = siblings[childOrd];
+    ChildrenIterator it = r.getChildren(ord);
+    int child;
+    while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
+      count += 1 + countAllChildren(r, child);
     }
     return count;
   }
 
-  private static void printAllChildren(PrintStream out, TaxonomyReader r, int ord, int[] children, int[] siblings, String indent, int depth) throws IOException {
-    int childOrd = children[ord];
-    while(childOrd != -1) {
-      out.println(indent + "/" + r.getPath(childOrd).components[depth]);
-      printAllChildren(out, r, childOrd, children, siblings, indent + "  ", depth+1);
-      childOrd = siblings[childOrd];
+  private static void printAllChildren(PrintStream out, TaxonomyReader r, int ord, String indent, int depth) throws IOException {
+    ChildrenIterator it = r.getChildren(ord);
+    int child;
+    while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
+      out.println(indent + "/" + r.getPath(child).components[depth]);
+      printAllChildren(out, r, child, indent + "  ", depth+1);
     }
   }
 }

Modified: lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java
URL: http://svn.apache.org/viewvc/lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java?rev=1464743&r1=1464742&r2=1464743&view=diff
==============================================================================
--- lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java (original)
+++ lucene/dev/branches/branch_4x/lucene/facet/src/test/org/apache/lucene/facet/taxonomy/directory/TestDirectoryTaxonomyReader.java Thu Apr  4 20:50:22 2013
@@ -1,12 +1,16 @@
 package org.apache.lucene.facet.taxonomy.directory;
 
 import java.io.IOException;
+import java.util.Arrays;
+import java.util.HashSet;
 import java.util.Random;
+import java.util.Set;
 
 import org.apache.lucene.analysis.MockAnalyzer;
 import org.apache.lucene.facet.FacetTestCase;
 import org.apache.lucene.facet.taxonomy.CategoryPath;
 import org.apache.lucene.facet.taxonomy.TaxonomyReader;
+import org.apache.lucene.facet.taxonomy.TaxonomyReader.ChildrenIterator;
 import org.apache.lucene.facet.taxonomy.TaxonomyWriter;
 import org.apache.lucene.index.IndexWriter;
 import org.apache.lucene.index.IndexWriterConfig;
@@ -462,4 +466,68 @@ public class TestDirectoryTaxonomyReader
     src.close();
   }
 
+  @Test
+  public void testGetChildren() throws Exception {
+    Directory dir = newDirectory();
+    DirectoryTaxonomyWriter taxoWriter = new DirectoryTaxonomyWriter(dir);
+    int numCategories = atLeast(10);
+    int numA = 0, numB = 0;
+    Random random = random();
+    for (int i = 0; i < numCategories; i++) {
+      if (random.nextBoolean()) {
+        taxoWriter.addCategory(new CategoryPath("a", Integer.toString(i)));
+        ++numA;
+      } else {
+        taxoWriter.addCategory(new CategoryPath("b", Integer.toString(i)));
+        ++numB;
+      }
+    }
+    // add category with no children
+    taxoWriter.addCategory(new CategoryPath("c"));
+    taxoWriter.close();
+    
+    DirectoryTaxonomyReader taxoReader = new DirectoryTaxonomyReader(dir);
+
+    // non existing category
+    ChildrenIterator it = taxoReader.getChildren(taxoReader.getOrdinal(new CategoryPath("invalid")));
+    assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
+
+    // a category with no children
+    it = taxoReader.getChildren(taxoReader.getOrdinal(new CategoryPath("c")));
+    assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
+
+    // arbitrary negative ordinal
+    it = taxoReader.getChildren(-2);
+    assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
+
+    // root's children
+    Set<String> roots = new HashSet<String>(Arrays.asList("a", "b", "c"));
+    it = taxoReader.getChildren(TaxonomyReader.ROOT_ORDINAL);
+    while (!roots.isEmpty()) {
+      CategoryPath root = taxoReader.getPath(it.next());
+      assertEquals(1, root.length);
+      assertTrue(roots.remove(root.components[0]));
+    }
+    assertEquals(TaxonomyReader.INVALID_ORDINAL, it.next());
+    
+    for (int i = 0; i < 2; i++) {
+      CategoryPath cp = i == 0 ? new CategoryPath("a") : new CategoryPath("b");
+      int ordinal = taxoReader.getOrdinal(cp);
+      it = taxoReader.getChildren(ordinal);
+      int numChildren = 0;
+      int child;
+      while ((child = it.next()) != TaxonomyReader.INVALID_ORDINAL) {
+        CategoryPath path = taxoReader.getPath(child);
+        assertEquals(2, path.length);
+        assertEquals(path.components[0], i == 0 ? "a" : "b");
+        ++numChildren;
+      }
+      int expected = i == 0 ? numA : numB;
+      assertEquals("invalid num children", expected, numChildren);
+    }
+    taxoReader.close();
+    
+    dir.close();
+  }
+  
 }