You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2014/03/26 11:52:52 UTC
svn commit: r1581783 - in /opennlp/sandbox/nlp-utils/src:
main/java/org/apache/opennlp/utils/cfg/
test/java/org/apache/opennlp/utils/cfg/
Author: joern
Date: Wed Mar 26 10:52:52 2014
New Revision: 1581783
URL: http://svn.apache.org/r1581783
Log:
OPENNLP-666 Support for strict CFGs non terminal rules expansion. Thanks to Tommaso Teofili for providing a patch.
Modified:
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/Rule.java
opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java
Modified: opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java?rev=1581783&r1=1581782&r2=1581783&view=diff
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java (original)
+++ opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/CFGBuilder.java Wed Mar 26 10:52:52 2014
@@ -18,33 +18,34 @@
*/
package org.apache.opennlp.utils.cfg;
-import java.util.Set;
+import java.util.Collection;
/**
* A builder for {@link ContextFreeGrammar}s
*/
public class CFGBuilder {
- private Set<String> nonTerminalSymbols;
- private Set<String> terminalSymbols;
- private Set<Rule> rules;
+ private Collection<String> nonTerminalSymbols;
+ private Collection<String> terminalSymbols;
+ private Collection<Rule> rules;
private String startSymbol;
+ private boolean randomExpansion;
public static CFGBuilder createCFG() {
return new CFGBuilder();
}
- public CFGBuilder withTerminals(Set<String> terminalSymbols) {
+ public CFGBuilder withTerminals(Collection<String> terminalSymbols) {
this.terminalSymbols = terminalSymbols;
return this;
}
- public CFGBuilder withNonTerminals(Set<String> nonTerminalSymbols) {
+ public CFGBuilder withNonTerminals(Collection<String> nonTerminalSymbols) {
this.nonTerminalSymbols = nonTerminalSymbols;
return this;
}
- public CFGBuilder withRules(Set<Rule> rules) {
+ public CFGBuilder withRules(Collection<Rule> rules) {
this.rules = rules;
return this;
}
@@ -54,7 +55,13 @@ public class CFGBuilder {
return this;
}
+ public CFGBuilder withRandomExpansion(boolean randomExpansion) {
+ this.randomExpansion = randomExpansion;
+ return this;
+ }
+
public ContextFreeGrammar build() {
- return new ContextFreeGrammar(nonTerminalSymbols, terminalSymbols, rules, startSymbol);
+ assert nonTerminalSymbols != null && terminalSymbols != null && rules != null && startSymbol != null;
+ return new ContextFreeGrammar(nonTerminalSymbols, terminalSymbols, rules, startSymbol, randomExpansion);
}
}
Modified: opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java?rev=1581783&r1=1581782&r2=1581783&view=diff
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java (original)
+++ opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/ContextFreeGrammar.java Wed Mar 26 10:52:52 2014
@@ -28,20 +28,25 @@ import java.util.Set;
* A context free grammar
*/
public class ContextFreeGrammar {
- private Set<String> nonTerminalSymbols;
- private Set<String> terminalSymbols;
- private Set<Rule> rules;
- private String startSymbol;
+ private final Collection<String> nonTerminalSymbols;
+ private final Collection<String> terminalSymbols;
+ private final Collection<Rule> rules;
+ private final String startSymbol;
+ private final boolean randomExpansion;
- public ContextFreeGrammar(Set<String> nonTerminalSymbols, Set<String> terminalSymbols, Set<Rule> rules, String startSymbol) {
+ public ContextFreeGrammar(Collection<String> nonTerminalSymbols, Collection<String> terminalSymbols, Collection<Rule> rules, String startSymbol, boolean randomExpansion) {
assert nonTerminalSymbols.contains(startSymbol) : "start symbol doesn't belong to non-terminal symbols set";
this.nonTerminalSymbols = nonTerminalSymbols;
this.terminalSymbols = terminalSymbols;
this.rules = rules;
this.startSymbol = startSymbol;
+ this.randomExpansion = randomExpansion;
}
+ public ContextFreeGrammar(Collection<String> nonTerminalSymbols, Collection<String> terminalSymbols, Collection<Rule> rules, String startSymbol) {
+ this(nonTerminalSymbols, terminalSymbols, rules, startSymbol, false);
+ }
public String[] leftMostDerivation(String... words) {
ArrayList<String> expansion = new ArrayList<String>(words.length);
@@ -81,6 +86,9 @@ public class ContextFreeGrammar {
ArrayList<Rule> possibleRules = new ArrayList<Rule>();
for (Rule r : rules) {
if (word.equals(r.getEntry())) {
+ if (randomExpansion) {
+ return r;
+ }
possibleRules.add(r);
}
}
Modified: opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/Rule.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/Rule.java?rev=1581783&r1=1581782&r2=1581783&view=diff
==============================================================================
--- opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/Rule.java (original)
+++ opennlp/sandbox/nlp-utils/src/main/java/org/apache/opennlp/utils/cfg/Rule.java Wed Mar 26 10:52:52 2014
@@ -18,6 +18,8 @@
*/
package org.apache.opennlp.utils.cfg;
+import java.util.Arrays;
+
/**
* A rule for context free grammars
*/
@@ -40,6 +42,27 @@ public class Rule implements Comparable<
@Override
public int compareTo(Rule o) {
- return entry.compareTo(o.getEntry());
+ int c = entry.compareTo(o.getEntry());
+ return c != 0 ? c : Arrays.toString(expansion).compareTo(Arrays.toString(o.getExpansion()));
+ }
+
+ @Override
+ public boolean equals(Object o) {
+ if (this == o) return true;
+ if (o == null || getClass() != o.getClass()) return false;
+
+ Rule rule = (Rule) o;
+
+ if (entry != null ? !entry.equals(rule.entry) : rule.entry != null) return false;
+ if (!Arrays.equals(expansion, rule.expansion)) return false;
+
+ return true;
+ }
+
+ @Override
+ public int hashCode() {
+ int result = entry != null ? entry.hashCode() : 0;
+ result = 31 * result + (expansion != null ? Arrays.hashCode(expansion) : 0);
+ return result;
}
}
Modified: opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java
URL: http://svn.apache.org/viewvc/opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java?rev=1581783&r1=1581782&r2=1581783&view=diff
==============================================================================
--- opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java (original)
+++ opennlp/sandbox/nlp-utils/src/test/java/org/apache/opennlp/utils/cfg/ContextFreeGrammarTest.java Wed Mar 26 10:52:52 2014
@@ -19,9 +19,13 @@
package org.apache.opennlp.utils.cfg;
import java.util.Arrays;
+import java.util.Collection;
import java.util.HashSet;
+import java.util.LinkedList;
import java.util.Set;
+import java.util.TreeSet;
import org.junit.Before;
+import org.junit.BeforeClass;
import org.junit.Test;
import static org.junit.Assert.assertNotNull;
@@ -32,13 +36,14 @@ import static org.junit.Assert.assertTru
*/
public class ContextFreeGrammarTest {
- private ContextFreeGrammar contextFreeGrammar;
- private Set<String> terminals;
-
- @Before
- public void setUp() throws Exception {
-
- Set<String> nonTerminals = new HashSet<String>(); // PoS + Parse tags
+ private static Collection<String> terminals;
+ private static Collection<String> nonTerminals; // PoS + Parse tags
+ private static String startSymbol;
+ private static Collection<Rule> rules;
+
+ @BeforeClass
+ public static void setUp() throws Exception {
+ nonTerminals = new LinkedList<String>();
nonTerminals.add("S");
nonTerminals.add("NP");
nonTerminals.add("VP");
@@ -53,22 +58,22 @@ public class ContextFreeGrammarTest {
nonTerminals.add("DJ");
nonTerminals.add("P");
- String startSymbol = "S";
+ startSymbol = "S";
- terminals = new HashSet<String>();
- terminals.add("sleeps");
+ terminals = new LinkedList<String>();
+ terminals.add("works");
terminals.add("saw");
terminals.add("man");
terminals.add("woman");
- terminals.add("telescope");
+ terminals.add("car");
terminals.add("the");
terminals.add("with");
terminals.add("in");
- terminals.add("tommaso");
- terminals.add("simone");
- terminals.add("joao");
- terminals.add("tigro");
- terminals.add("michele");
+ terminals.add("joe");
+ terminals.add("john");
+ terminals.add("sam");
+ terminals.add("michael");
+ terminals.add("michelle");
terminals.add("scarlett");
terminals.add("and");
terminals.add("but");
@@ -76,7 +81,7 @@ public class ContextFreeGrammarTest {
terminals.add("of");
terminals.add("for");
- Set<Rule> rules = new HashSet<Rule>();
+ rules = new LinkedList<Rule>();
rules.add(new Rule("S", "NP", "VP"));
rules.add(new Rule("P", "S", "CJ", "S"));
rules.add(new Rule("P", "S", "DJ", "S"));
@@ -87,31 +92,37 @@ public class ContextFreeGrammarTest {
rules.add(new Rule("NP", "NP", "PP"));
rules.add(new Rule("NP", "NNP"));
rules.add(new Rule("PP", "IN", "NP"));
- rules.add(new Rule("Vi", "sleeps"));
+ rules.add(new Rule("Vi", "works"));
rules.add(new Rule("Vt", "saw"));
rules.add(new Rule("NN", "man"));
rules.add(new Rule("NN", "woman"));
- rules.add(new Rule("NN", "telescope"));
+ rules.add(new Rule("NN", "car"));
rules.add(new Rule("DT", "the"));
rules.add(new Rule("IN", "with"));
rules.add(new Rule("IN", "in"));
rules.add(new Rule("IN", "for"));
rules.add(new Rule("IN", "of"));
- rules.add(new Rule("NNP", "tommaso"));
- rules.add(new Rule("NNP", "simone"));
- rules.add(new Rule("NNP", "joao"));
- rules.add(new Rule("NNP", "tigro"));
- rules.add(new Rule("NNP", "michele"));
+ rules.add(new Rule("NNP", "joe"));
+ rules.add(new Rule("NNP", "john"));
+ rules.add(new Rule("NNP", "sam"));
+ rules.add(new Rule("NNP", "michael"));
+ rules.add(new Rule("NNP", "michelle"));
rules.add(new Rule("NNP", "scarlett"));
rules.add(new Rule("CJ", "and"));
rules.add(new Rule("DJ", "but"));
rules.add(new Rule("DJ", "while"));
+ }
- contextFreeGrammar = new ContextFreeGrammar(nonTerminals, terminals, rules, startSymbol);
+ @Test
+ public void testSingleSentenceExpansion() throws Exception {
+ ContextFreeGrammar contextFreeGrammar = new ContextFreeGrammar(nonTerminals, terminals, rules, startSymbol);
+ String[] expansion = contextFreeGrammar.leftMostDerivation("S");
+ checkExpansion(expansion);
}
@Test
- public void testSingleExpansion() throws Exception {
+ public void testSingleSentenceRandomExpansion() throws Exception {
+ ContextFreeGrammar contextFreeGrammar = new ContextFreeGrammar(nonTerminals, terminals, rules, startSymbol, true);
String[] expansion = contextFreeGrammar.leftMostDerivation("S");
checkExpansion(expansion);
}
@@ -119,6 +130,17 @@ public class ContextFreeGrammarTest {
@Test
public void testMultipleSentencesExpansion() throws Exception {
+ ContextFreeGrammar contextFreeGrammar = new ContextFreeGrammar(nonTerminals, terminals, rules, startSymbol);
+ String[] expansion = contextFreeGrammar.leftMostDerivation("S", "CJ", "S");
+ checkExpansion(expansion);
+
+ expansion = contextFreeGrammar.leftMostDerivation("S", "DJ", "S", "CJ", "P");
+ checkExpansion(expansion);
+ }
+
+ @Test
+ public void testMultipleSentencesRandomExpansion() throws Exception {
+ ContextFreeGrammar contextFreeGrammar = new ContextFreeGrammar(nonTerminals, terminals, rules, startSymbol, true);
String[] expansion = contextFreeGrammar.leftMostDerivation("S", "CJ", "S");
checkExpansion(expansion);
@@ -132,6 +154,5 @@ public class ContextFreeGrammarTest {
for (String t : expansion) {
assertTrue("term " + t + " is not a terminal symbol", terminals.contains(t));
}
- System.err.println(Arrays.toString(expansion));
}
}