You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/03/26 11:52:52 UTC

svn commit: r1581783 - in /opennlp/sandbox/nlp-utils/src: main/java/org/apache/opennlp/utils/cfg/ test/java/org/apache/opennlp/utils/cfg/

Author: joern
Date: Wed Mar 26 10:52:52 2014
New Revision: 1581783

URL: http://svn.apache.org/r1581783
Log:
OPENNLP-666 Support for strict CFGs non terminal rules expansion. Thanks to Tommaso Teofili for providing a patch.

Modified:
    opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
    opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
    opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/Rule.java
    opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java

Modified: opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java?rev=1581783&r1=1581782&r2=1581783&view=diff
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java (original)
+++ opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java Wed Mar 26 10:52:52 2014
@@ -18,33 +18,34 @@
  */
 package org.apache.opennlp.utils.cfg;
 
-import java.util.Set;
+import java.util.Collection;
 
 /**
  * A builder for {@link ContextFreeGrammar}s
  */
 public class CFGBuilder {
 
-    private Set<String> nonTerminalSymbols;
-    private Set<String> terminalSymbols;
-    private Set<Rule> rules;
+    private Collection<String> nonTerminalSymbols;
+    private Collection<String> terminalSymbols;
+    private Collection<Rule> rules;
     private String startSymbol;
+    private boolean randomExpansion;
 
     public static CFGBuilder createCFG() {
         return new CFGBuilder();
     }
 
-    public CFGBuilder withTerminals(Set<String> terminalSymbols) {
+    public CFGBuilder withTerminals(Collection<String> terminalSymbols) {
         this.terminalSymbols = terminalSymbols;
         return this;
     }
 
-    public CFGBuilder withNonTerminals(Set<String> nonTerminalSymbols) {
+    public CFGBuilder withNonTerminals(Collection<String> nonTerminalSymbols) {
         this.nonTerminalSymbols = nonTerminalSymbols;
         return this;
     }
 
-    public CFGBuilder withRules(Set<Rule> rules) {
+    public CFGBuilder withRules(Collection<Rule> rules) {
         this.rules = rules;
         return this;
     }
@@ -54,7 +55,13 @@ public class CFGBuilder {
         return this;
     }
 
+    public CFGBuilder withRandomExpansion(boolean randomExpansion) {
+        this.randomExpansion = randomExpansion;
+        return this;
+    }
+
     public ContextFreeGrammar build() {
-        return new ContextFreeGrammar(nonTerminalSymbols, terminalSymbols, rules, startSymbol);
+        assert nonTerminalSymbols != null && terminalSymbols != null && rules != null && startSymbol != null;
+        return new ContextFreeGrammar(nonTerminalSymbols, terminalSymbols, rules, startSymbol, randomExpansion);
     }
 }

Modified: opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java?rev=1581783&r1=1581782&r2=1581783&view=diff
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java (original)
+++ opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java Wed Mar 26 10:52:52 2014
@@ -28,20 +28,25 @@ import java.util.Set;
  * A context free grammar
  */
 public class ContextFreeGrammar {
-  private Set<String> nonTerminalSymbols;
-  private Set<String> terminalSymbols;
-  private Set<Rule> rules;
-  private String startSymbol;
+  private final Collection<String> nonTerminalSymbols;
+  private final Collection<String> terminalSymbols;
+  private final Collection<Rule> rules;
+  private final String startSymbol;
+  private final boolean randomExpansion;
 
-  public ContextFreeGrammar(Set<String> nonTerminalSymbols, Set<String> terminalSymbols, Set<Rule> rules, String startSymbol) {
+  public ContextFreeGrammar(Collection<String> nonTerminalSymbols, Collection<String> terminalSymbols, Collection<Rule> rules, String startSymbol, boolean randomExpansion) {
     assert nonTerminalSymbols.contains(startSymbol) : "start symbol doesn't belong to non-terminal symbols set";
 
     this.nonTerminalSymbols = nonTerminalSymbols;
     this.terminalSymbols = terminalSymbols;
     this.rules = rules;
     this.startSymbol = startSymbol;
+    this.randomExpansion = randomExpansion;
   }
 
+  public ContextFreeGrammar(Collection<String> nonTerminalSymbols, Collection<String> terminalSymbols, Collection<Rule> rules, String startSymbol) {
+    this(nonTerminalSymbols, terminalSymbols, rules, startSymbol, false);
+  }
 
   public String[] leftMostDerivation(String... words) {
     ArrayList<String> expansion = new ArrayList<String>(words.length);
@@ -81,6 +86,9 @@ public class ContextFreeGrammar {
     ArrayList<Rule> possibleRules = new ArrayList<Rule>();
     for (Rule r : rules) {
       if (word.equals(r.getEntry())) {
+        if (randomExpansion) {
+          return r;
+        }
         possibleRules.add(r);
       }
     }

Modified: opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/Rule.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/Rule.java?rev=1581783&r1=1581782&r2=1581783&view=diff
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/Rule.java (original)
+++ opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/Rule.java Wed Mar 26 10:52:52 2014
@@ -18,6 +18,8 @@
  */
 package org.apache.opennlp.utils.cfg;
 
+import java.util.Arrays;
+
 /**
  * A rule for context free grammars
  */
@@ -40,6 +42,27 @@ public class Rule implements Comparable<
 
   @Override
   public int compareTo(Rule o) {
-    return entry.compareTo(o.getEntry());
+      int c = entry.compareTo(o.getEntry());
+      return c != 0 ? c : Arrays.toString(expansion).compareTo(Arrays.toString(o.getExpansion()));
+  }
+
+  @Override
+  public boolean equals(Object o) {
+    if (this == o) return true;
+    if (o == null || getClass() != o.getClass()) return false;
+
+    Rule rule = (Rule) o;
+
+    if (entry != null ? !entry.equals(rule.entry) : rule.entry != null) return false;
+    if (!Arrays.equals(expansion, rule.expansion)) return false;
+
+    return true;
+  }
+
+  @Override
+  public int hashCode() {
+    int result = entry != null ? entry.hashCode() : 0;
+    result = 31 * result + (expansion != null ? Arrays.hashCode(expansion) : 0);
+    return result;
   }
 }

Modified: opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java?rev=1581783&r1=1581782&r2=1581783&view=diff
==============================================================================
--- opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java (original)
+++ opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java Wed Mar 26 10:52:52 2014
@@ -19,9 +19,13 @@
 package org.apache.opennlp.utils.cfg;
 
 import java.util.Arrays;
+import java.util.Collection;
 import java.util.HashSet;
+import java.util.LinkedList;
 import java.util.Set;
+import java.util.TreeSet;
 import org.junit.Before;
+import org.junit.BeforeClass;
 import org.junit.Test;
 
 import static org.junit.Assert.assertNotNull;
@@ -32,13 +36,14 @@ import static org.junit.Assert.assertTru
  */
 public class ContextFreeGrammarTest {
 
-  private ContextFreeGrammar contextFreeGrammar;
-  private Set<String> terminals;
-
-  @Before
-  public void setUp() throws Exception {
-
-    Set<String> nonTerminals = new HashSet<String>(); // PoS + Parse tags
+  private static Collection<String> terminals;
+  private static Collection<String> nonTerminals; // PoS + Parse tags
+  private static String startSymbol;
+  private static Collection<Rule> rules;
+
+  @BeforeClass
+  public static void setUp() throws Exception {
+    nonTerminals = new LinkedList<String>();
     nonTerminals.add("S");
     nonTerminals.add("NP");
     nonTerminals.add("VP");
@@ -53,22 +58,22 @@ public class ContextFreeGrammarTest {
     nonTerminals.add("DJ");
     nonTerminals.add("P");
 
-    String startSymbol = "S";
+    startSymbol = "S";
 
-    terminals = new HashSet<String>();
-    terminals.add("sleeps");
+    terminals = new LinkedList<String>();
+    terminals.add("works");
     terminals.add("saw");
     terminals.add("man");
     terminals.add("woman");
-    terminals.add("telescope");
+    terminals.add("car");
     terminals.add("the");
     terminals.add("with");
     terminals.add("in");
-    terminals.add("tommaso");
-    terminals.add("simone");
-    terminals.add("joao");
-    terminals.add("tigro");
-    terminals.add("michele");
+    terminals.add("joe");
+    terminals.add("john");
+    terminals.add("sam");
+    terminals.add("michael");
+    terminals.add("michelle");
     terminals.add("scarlett");
     terminals.add("and");
     terminals.add("but");
@@ -76,7 +81,7 @@ public class ContextFreeGrammarTest {
     terminals.add("of");
     terminals.add("for");
 
-    Set<Rule> rules = new HashSet<Rule>();
+    rules = new LinkedList<Rule>();
     rules.add(new Rule("S", "NP", "VP"));
     rules.add(new Rule("P", "S", "CJ", "S"));
     rules.add(new Rule("P", "S", "DJ", "S"));
@@ -87,31 +92,37 @@ public class ContextFreeGrammarTest {
     rules.add(new Rule("NP", "NP", "PP"));
     rules.add(new Rule("NP", "NNP"));
     rules.add(new Rule("PP", "IN", "NP"));
-    rules.add(new Rule("Vi", "sleeps"));
+    rules.add(new Rule("Vi", "works"));
     rules.add(new Rule("Vt", "saw"));
     rules.add(new Rule("NN", "man"));
     rules.add(new Rule("NN", "woman"));
-    rules.add(new Rule("NN", "telescope"));
+    rules.add(new Rule("NN", "car"));
     rules.add(new Rule("DT", "the"));
     rules.add(new Rule("IN", "with"));
     rules.add(new Rule("IN", "in"));
     rules.add(new Rule("IN", "for"));
     rules.add(new Rule("IN", "of"));
-    rules.add(new Rule("NNP", "tommaso"));
-    rules.add(new Rule("NNP", "simone"));
-    rules.add(new Rule("NNP", "joao"));
-    rules.add(new Rule("NNP", "tigro"));
-    rules.add(new Rule("NNP", "michele"));
+    rules.add(new Rule("NNP", "joe"));
+    rules.add(new Rule("NNP", "john"));
+    rules.add(new Rule("NNP", "sam"));
+    rules.add(new Rule("NNP", "michael"));
+    rules.add(new Rule("NNP", "michelle"));
     rules.add(new Rule("NNP", "scarlett"));
     rules.add(new Rule("CJ", "and"));
     rules.add(new Rule("DJ", "but"));
     rules.add(new Rule("DJ", "while"));
+  }
 
-    contextFreeGrammar = new ContextFreeGrammar(nonTerminals, terminals, rules, startSymbol);
+  @Test
+  public void testSingleSentenceExpansion() throws Exception {
+    ContextFreeGrammar contextFreeGrammar = new ContextFreeGrammar(nonTerminals, terminals, rules, startSymbol);
+    String[] expansion = contextFreeGrammar.leftMostDerivation("S");
+    checkExpansion(expansion);
   }
 
   @Test
-  public void testSingleExpansion() throws Exception {
+  public void testSingleSentenceRandomExpansion() throws Exception {
+    ContextFreeGrammar contextFreeGrammar = new ContextFreeGrammar(nonTerminals, terminals, rules, startSymbol, true);
     String[] expansion = contextFreeGrammar.leftMostDerivation("S");
     checkExpansion(expansion);
   }
@@ -119,6 +130,17 @@ public class ContextFreeGrammarTest {
 
   @Test
   public void testMultipleSentencesExpansion() throws Exception {
+    ContextFreeGrammar contextFreeGrammar = new ContextFreeGrammar(nonTerminals, terminals, rules, startSymbol);
+    String[] expansion = contextFreeGrammar.leftMostDerivation("S", "CJ", "S");
+    checkExpansion(expansion);
+
+    expansion = contextFreeGrammar.leftMostDerivation("S", "DJ", "S", "CJ", "P");
+    checkExpansion(expansion);
+  }
+
+  @Test
+  public void testMultipleSentencesRandomExpansion() throws Exception {
+    ContextFreeGrammar contextFreeGrammar = new ContextFreeGrammar(nonTerminals, terminals, rules, startSymbol, true);
     String[] expansion = contextFreeGrammar.leftMostDerivation("S", "CJ", "S");
     checkExpansion(expansion);
 
@@ -132,6 +154,5 @@ public class ContextFreeGrammarTest {
     for (String t : expansion) {
       assertTrue("term " + t + " is not a terminal symbol", terminals.contains(t));
     }
-    System.err.println(Arrays.toString(expansion));
   }
 }