You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/04/20 10:41:34 UTC

[47/50] [abbrv] opennlp git commit: OPENNLP-1015: Add tests for DataIndexers

OPENNLP-1015: Add tests for DataIndexers

Closes #152


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/4ba2a8b9
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/4ba2a8b9
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/4ba2a8b9

Branch: refs/heads/parser_regression
Commit: 4ba2a8b9745d669b4b6de645eded912e65813ed8
Parents: 0fb11cd
Author: koji <ko...@rondhuit.com>
Authored: Fri Apr 7 21:50:02 2017 +0900
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Thu Apr 20 12:40:25 2017 +0200

----------------------------------------------------------------------
 .../tools/ml/model/OnePassDataIndexerTest.java  |  64 ++++++++++
 .../model/OnePassRealValueDataIndexerTest.java  | 116 +++++++++++++++++++
 .../ml/model/SimpleEventStreamBuilder.java      |  76 ++++++++++++
 .../tools/ml/model/TwoPassDataIndexerTest.java  |  64 ++++++++++
 4 files changed, 320 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/4ba2a8b9/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java
new file mode 100644
index 0000000..e629e7a
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.IOException;
+import java.util.Collections;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+public class OnePassDataIndexerTest {
+
+  @Test
+  public void testIndex() throws IOException {
+    // He belongs to <START:org> Apache Software Foundation <END> .
+    ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
+        .add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other")
+        .add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other")
+        .add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" +
+              " powf=other,lc ppo=other")
+        .add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" +
+              " powf=other,ic ppo=other")
+        .add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" +
+              " pow=org-start,Software powf=org-start,ic ppo=other")
+        .add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" +
+              " powf=org-cont,ic ppo=org-start")
+        .add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" +
+              " ppo=org-cont")
+        .build();
+
+    DataIndexer indexer = new OnePassDataIndexer();
+    indexer.init(new TrainingParameters(Collections.emptyMap()), null);
+    indexer.index(eventStream);
+    Assert.assertEquals(3, indexer.getContexts().length);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
+    Assert.assertNull(indexer.getValues());
+    Assert.assertEquals(5, indexer.getNumEvents());
+    Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
+    Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
+    Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
+    Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
+    Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4ba2a8b9/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java
new file mode 100644
index 0000000..ab9eda3
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.IOException;
+import java.util.Collections;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+public class OnePassRealValueDataIndexerTest {
+
+  DataIndexer indexer;
+
+  @Before
+  public void setUp() throws Exception {
+    indexer = new OnePassRealValueDataIndexer();
+    indexer.init(new TrainingParameters(Collections.emptyMap()), null);
+  }
+
+  @Test
+  public void testIndex() throws IOException {
+    // He belongs to <START:org> Apache Software Foundation <END> .
+    ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
+        .add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other")
+        .add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other")
+        .add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" +
+                    " powf=other,lc ppo=other")
+        .add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" +
+                    " powf=other,ic ppo=other")
+        .add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" +
+                " pow=org-start,Software powf=org-start,ic ppo=other")
+        .add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" +
+                    " powf=org-cont,ic ppo=org-start")
+        .add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" +
+                    " ppo=org-cont")
+        .build();
+
+    indexer.index(eventStream);
+    Assert.assertEquals(3, indexer.getContexts().length);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
+    Assert.assertEquals(3, indexer.getValues().length);
+    Assert.assertNull(indexer.getValues()[0]);
+    Assert.assertNull(indexer.getValues()[1]);
+    Assert.assertNull(indexer.getValues()[2]);
+    Assert.assertEquals(5, indexer.getNumEvents());
+    Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
+    Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
+    Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
+    Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
+    Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
+  }
+
+  @Test
+  public void testIndexValues() throws IOException {
+    // He belongs to <START:org> Apache Software Foundation <END> .
+    ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
+        .add("other/w=he;0.1 n1w=belongs;0.2 n2w=to;0.1 po=other;0.1" +
+                " pow=other,He;0.1 powf=other,ic;0.1 ppo=other;0.1")
+        .add("other/w=belongs;0.1 p1w=he;0.2 n1w=to;0.1 n2w=apache;0.1" +
+                " po=other;0.1 pow=other,belongs;0.1 powf=other,lc;0.1 ppo=other;0.1")
+        .add("other/w=to;0.1 p1w=belongs;0.2 p2w=he;0.1 n1w=apache;0.1" +
+                " n2w=software;0.1 po=other;0.1 pow=other,to;0.1 powf=other,lc;0.1 ppo=other;0.1")
+        .add("org-start/w=apache;0.1 p1w=to;0.2 p2w=belongs;0.1 n1w=software;0.1 n2w=foundation;0.1" +
+                " po=other;0.1 pow=other,Apache;0.1 powf=other,ic;0.1 ppo=other;0.1")
+        .add("org-cont/w=software;0.1 p1w=apache;0.2 p2w=to;0.1 n1w=foundation;0.1" +
+                " n2w=.;0.1 po=org-start;0.1 pow=org-start,Software;0.1 powf=org-start,ic;0.1 ppo=other;0.1")
+        .add("org-cont/w=foundation;0.1 p1w=software;0.2 p2w=apache;0.1 n1w=.;0.1 po=org-cont;0.1" +
+                " pow=org-cont,Foundation;0.1 powf=org-cont,ic;0.1 ppo=org-start;0.1")
+        .add("other/w=.;0.1 p1w=foundation;0.1 p2w=software;0.1 po=org-cont;0.1 pow=org-cont,.;0.1" +
+                " powf=org-cont,other;0.1 ppo=org-cont;0.1")
+        .build();
+
+    indexer.index(eventStream);
+    System.out.println(indexer);
+    Assert.assertEquals(3, indexer.getContexts().length);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
+    Assert.assertEquals(3, indexer.getValues().length);
+    final float delta = 0.001F;
+    Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F},
+            indexer.getValues()[0], delta);
+    Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F},
+            indexer.getValues()[1], delta);
+    Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F},
+            indexer.getValues()[2], delta);
+    Assert.assertEquals(5, indexer.getNumEvents());
+    Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
+    Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
+    Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
+    Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
+    Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4ba2a8b9/opennlp-tools/src/test/java/opennlp/tools/ml/model/SimpleEventStreamBuilder.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/model/SimpleEventStreamBuilder.java b/opennlp-tools/src/test/java/opennlp/tools/ml/model/SimpleEventStreamBuilder.java
new file mode 100644
index 0000000..49fa242
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/model/SimpleEventStreamBuilder.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.util.ObjectStream;
+
+public class SimpleEventStreamBuilder {
+
+  private final List<Event> eventList = new ArrayList<>();
+  private int pos = 0;
+
+  /*
+   * the format of event should look like:
+   * without values) other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic
+   * with values) other/w=he;0.5 n1w=belongs;0.4 n2w=to;0.3 po=other;0.5 pow=other,He;0.25 powf=other,ic;0.5
+   */
+  public SimpleEventStreamBuilder add(String event) {
+    String[] ss = event.split("/");
+    if (ss.length != 2) {
+      throw new RuntimeException(String.format("format error of the event \"%s\"", event));
+    }
+
+    // look for context (and values)
+    String[] cvPairs = ss[1].split("\\s+");
+    if (cvPairs[0].contains(";")) { // has values?
+      String[] context = new String[cvPairs.length];
+      float[] values = new float[cvPairs.length];
+      for (int i = 0; i < cvPairs.length; i++) {
+        String[] pair = cvPairs[i].split(";");
+        if (pair.length != 2) {
+          throw new RuntimeException(String.format("format error of the event \"%s\". "
+                       + "\"%s\" doesn't have value", event, pair));
+        }
+        context[i] = pair[0];
+        values[i] = Float.parseFloat(pair[1]);
+      }
+      eventList.add(new Event(ss[0], context, values));
+    }
+    else {
+      eventList.add(new Event(ss[0], cvPairs));
+    }
+
+    return this;
+  }
+
+  public ObjectStream<Event> build() {
+    return new ObjectStream<Event>() {
+      @Override
+      public Event read() throws IOException {
+        if (eventList.size() <= pos) {
+          return null;
+        }
+        return eventList.get(pos++);
+      }
+    };
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/4ba2a8b9/opennlp-tools/src/test/java/opennlp/tools/ml/model/TwoPassDataIndexerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/model/TwoPassDataIndexerTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/model/TwoPassDataIndexerTest.java
new file mode 100644
index 0000000..c246936
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/model/TwoPassDataIndexerTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.IOException;
+import java.util.Collections;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+public class TwoPassDataIndexerTest {
+
+  @Test
+  public void testIndex() throws IOException {
+    // He belongs to <START:org> Apache Software Foundation <END> .
+    ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
+        .add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other")
+        .add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other")
+        .add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" +
+                    " powf=other,lc ppo=other")
+        .add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" +
+                    " powf=other,ic ppo=other")
+        .add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" +
+                    " pow=org-start,Software powf=org-start,ic ppo=other")
+        .add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" +
+                    " powf=org-cont,ic ppo=org-start")
+        .add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" +
+                    " ppo=org-cont")
+        .build();
+
+    DataIndexer indexer = new TwoPassDataIndexer();
+    indexer.init(new TrainingParameters(Collections.emptyMap()), null);
+    indexer.index(eventStream);
+    Assert.assertEquals(3, indexer.getContexts().length);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
+    Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
+    Assert.assertNull(indexer.getValues());
+    Assert.assertEquals(5, indexer.getNumEvents());
+    Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
+    Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
+    Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
+    Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
+    Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
+  }
+}