You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/04/20 10:41:34 UTC
[47/50] [abbrv] opennlp git commit: OPENNLP-1015: Add tests for
DataIndexers
OPENNLP-1015: Add tests for DataIndexers
Closes #152
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/4ba2a8b9
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/4ba2a8b9
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/4ba2a8b9
Branch: refs/heads/parser_regression
Commit: 4ba2a8b9745d669b4b6de645eded912e65813ed8
Parents: 0fb11cd
Author: koji <ko...@rondhuit.com>
Authored: Fri Apr 7 21:50:02 2017 +0900
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Thu Apr 20 12:40:25 2017 +0200
----------------------------------------------------------------------
.../tools/ml/model/OnePassDataIndexerTest.java | 64 ++++++++++
.../model/OnePassRealValueDataIndexerTest.java | 116 +++++++++++++++++++
.../ml/model/SimpleEventStreamBuilder.java | 76 ++++++++++++
.../tools/ml/model/TwoPassDataIndexerTest.java | 64 ++++++++++
4 files changed, 320 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/4ba2a8b9/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java
new file mode 100644
index 0000000..e629e7a
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassDataIndexerTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.IOException;
+import java.util.Collections;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+public class OnePassDataIndexerTest {
+
+ @Test
+ public void testIndex() throws IOException {
+ // He belongs to <START:org> Apache Software Foundation <END> .
+ ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
+ .add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other")
+ .add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other")
+ .add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" +
+ " powf=other,lc ppo=other")
+ .add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" +
+ " powf=other,ic ppo=other")
+ .add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" +
+ " pow=org-start,Software powf=org-start,ic ppo=other")
+ .add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" +
+ " powf=org-cont,ic ppo=org-start")
+ .add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" +
+ " ppo=org-cont")
+ .build();
+
+ DataIndexer indexer = new OnePassDataIndexer();
+ indexer.init(new TrainingParameters(Collections.emptyMap()), null);
+ indexer.index(eventStream);
+ Assert.assertEquals(3, indexer.getContexts().length);
+ Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
+ Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
+ Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
+ Assert.assertNull(indexer.getValues());
+ Assert.assertEquals(5, indexer.getNumEvents());
+ Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
+ Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
+ Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
+ Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
+ Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
+ }
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/4ba2a8b9/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java
new file mode 100644
index 0000000..ab9eda3
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/model/OnePassRealValueDataIndexerTest.java
@@ -0,0 +1,116 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.IOException;
+import java.util.Collections;
+
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+public class OnePassRealValueDataIndexerTest {
+
+ DataIndexer indexer;
+
+ @Before
+ public void setUp() throws Exception {
+ indexer = new OnePassRealValueDataIndexer();
+ indexer.init(new TrainingParameters(Collections.emptyMap()), null);
+ }
+
+ @Test
+ public void testIndex() throws IOException {
+ // He belongs to <START:org> Apache Software Foundation <END> .
+ ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
+ .add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other")
+ .add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other")
+ .add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" +
+ " powf=other,lc ppo=other")
+ .add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" +
+ " powf=other,ic ppo=other")
+ .add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" +
+ " pow=org-start,Software powf=org-start,ic ppo=other")
+ .add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" +
+ " powf=org-cont,ic ppo=org-start")
+ .add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" +
+ " ppo=org-cont")
+ .build();
+
+ indexer.index(eventStream);
+ Assert.assertEquals(3, indexer.getContexts().length);
+ Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
+ Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
+ Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
+ Assert.assertEquals(3, indexer.getValues().length);
+ Assert.assertNull(indexer.getValues()[0]);
+ Assert.assertNull(indexer.getValues()[1]);
+ Assert.assertNull(indexer.getValues()[2]);
+ Assert.assertEquals(5, indexer.getNumEvents());
+ Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
+ Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
+ Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
+ Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
+ Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
+ }
+
+ @Test
+ public void testIndexValues() throws IOException {
+ // He belongs to <START:org> Apache Software Foundation <END> .
+ ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
+ .add("other/w=he;0.1 n1w=belongs;0.2 n2w=to;0.1 po=other;0.1" +
+ " pow=other,He;0.1 powf=other,ic;0.1 ppo=other;0.1")
+ .add("other/w=belongs;0.1 p1w=he;0.2 n1w=to;0.1 n2w=apache;0.1" +
+ " po=other;0.1 pow=other,belongs;0.1 powf=other,lc;0.1 ppo=other;0.1")
+ .add("other/w=to;0.1 p1w=belongs;0.2 p2w=he;0.1 n1w=apache;0.1" +
+ " n2w=software;0.1 po=other;0.1 pow=other,to;0.1 powf=other,lc;0.1 ppo=other;0.1")
+ .add("org-start/w=apache;0.1 p1w=to;0.2 p2w=belongs;0.1 n1w=software;0.1 n2w=foundation;0.1" +
+ " po=other;0.1 pow=other,Apache;0.1 powf=other,ic;0.1 ppo=other;0.1")
+ .add("org-cont/w=software;0.1 p1w=apache;0.2 p2w=to;0.1 n1w=foundation;0.1" +
+ " n2w=.;0.1 po=org-start;0.1 pow=org-start,Software;0.1 powf=org-start,ic;0.1 ppo=other;0.1")
+ .add("org-cont/w=foundation;0.1 p1w=software;0.2 p2w=apache;0.1 n1w=.;0.1 po=org-cont;0.1" +
+ " pow=org-cont,Foundation;0.1 powf=org-cont,ic;0.1 ppo=org-start;0.1")
+ .add("other/w=.;0.1 p1w=foundation;0.1 p2w=software;0.1 po=org-cont;0.1 pow=org-cont,.;0.1" +
+ " powf=org-cont,other;0.1 ppo=org-cont;0.1")
+ .build();
+
+ indexer.index(eventStream);
+ System.out.println(indexer);
+ Assert.assertEquals(3, indexer.getContexts().length);
+ Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
+ Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
+ Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
+ Assert.assertEquals(3, indexer.getValues().length);
+ final float delta = 0.001F;
+ Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F},
+ indexer.getValues()[0], delta);
+ Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F},
+ indexer.getValues()[1], delta);
+ Assert.assertArrayEquals(new float[]{0.1F, 0.2F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F, 0.1F},
+ indexer.getValues()[2], delta);
+ Assert.assertEquals(5, indexer.getNumEvents());
+ Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
+ Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
+ Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
+ Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
+ Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
+ }
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/4ba2a8b9/opennlp-tools/src/test/java/opennlp/tools/ml/model/SimpleEventStreamBuilder.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/model/SimpleEventStreamBuilder.java b/opennlp-tools/src/test/java/opennlp/tools/ml/model/SimpleEventStreamBuilder.java
new file mode 100644
index 0000000..49fa242
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/model/SimpleEventStreamBuilder.java
@@ -0,0 +1,76 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.List;
+
+import opennlp.tools.util.ObjectStream;
+
+public class SimpleEventStreamBuilder {
+
+ private final List<Event> eventList = new ArrayList<>();
+ private int pos = 0;
+
+ /*
+ * the format of event should look like:
+ * without values) other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic
+ * with values) other/w=he;0.5 n1w=belongs;0.4 n2w=to;0.3 po=other;0.5 pow=other,He;0.25 powf=other,ic;0.5
+ */
+ public SimpleEventStreamBuilder add(String event) {
+ String[] ss = event.split("/");
+ if (ss.length != 2) {
+ throw new RuntimeException(String.format("format error of the event \"%s\"", event));
+ }
+
+ // look for context (and values)
+ String[] cvPairs = ss[1].split("\\s+");
+ if (cvPairs[0].contains(";")) { // has values?
+ String[] context = new String[cvPairs.length];
+ float[] values = new float[cvPairs.length];
+ for (int i = 0; i < cvPairs.length; i++) {
+ String[] pair = cvPairs[i].split(";");
+ if (pair.length != 2) {
+ throw new RuntimeException(String.format("format error of the event \"%s\". "
+ + "\"%s\" doesn't have value", event, pair));
+ }
+ context[i] = pair[0];
+ values[i] = Float.parseFloat(pair[1]);
+ }
+ eventList.add(new Event(ss[0], context, values));
+ }
+ else {
+ eventList.add(new Event(ss[0], cvPairs));
+ }
+
+ return this;
+ }
+
+ public ObjectStream<Event> build() {
+ return new ObjectStream<Event>() {
+ @Override
+ public Event read() throws IOException {
+ if (eventList.size() <= pos) {
+ return null;
+ }
+ return eventList.get(pos++);
+ }
+ };
+ }
+}
http://git-wip-us.apache.org/repos/asf/opennlp/blob/4ba2a8b9/opennlp-tools/src/test/java/opennlp/tools/ml/model/TwoPassDataIndexerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/ml/model/TwoPassDataIndexerTest.java b/opennlp-tools/src/test/java/opennlp/tools/ml/model/TwoPassDataIndexerTest.java
new file mode 100644
index 0000000..c246936
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/ml/model/TwoPassDataIndexerTest.java
@@ -0,0 +1,64 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.ml.model;
+
+import java.io.IOException;
+import java.util.Collections;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+
+public class TwoPassDataIndexerTest {
+
+ @Test
+ public void testIndex() throws IOException {
+ // He belongs to <START:org> Apache Software Foundation <END> .
+ ObjectStream<Event> eventStream = new SimpleEventStreamBuilder()
+ .add("other/w=he n1w=belongs n2w=to po=other pow=other,He powf=other,ic ppo=other")
+ .add("other/w=belongs p1w=he n1w=to n2w=apache po=other pow=other,belongs powf=other,lc ppo=other")
+ .add("other/w=to p1w=belongs p2w=he n1w=apache n2w=software po=other pow=other,to" +
+ " powf=other,lc ppo=other")
+ .add("org-start/w=apache p1w=to p2w=belongs n1w=software n2w=foundation po=other pow=other,Apache" +
+ " powf=other,ic ppo=other")
+ .add("org-cont/w=software p1w=apache p2w=to n1w=foundation n2w=. po=org-start" +
+ " pow=org-start,Software powf=org-start,ic ppo=other")
+ .add("org-cont/w=foundation p1w=software p2w=apache n1w=. po=org-cont pow=org-cont,Foundation" +
+ " powf=org-cont,ic ppo=org-start")
+ .add("other/w=. p1w=foundation p2w=software po=org-cont pow=org-cont,. powf=org-cont,other" +
+ " ppo=org-cont")
+ .build();
+
+ DataIndexer indexer = new TwoPassDataIndexer();
+ indexer.init(new TrainingParameters(Collections.emptyMap()), null);
+ indexer.index(eventStream);
+ Assert.assertEquals(3, indexer.getContexts().length);
+ Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[0]);
+ Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[1]);
+ Assert.assertArrayEquals(new int[]{0}, indexer.getContexts()[2]);
+ Assert.assertNull(indexer.getValues());
+ Assert.assertEquals(5, indexer.getNumEvents());
+ Assert.assertArrayEquals(new int[]{0, 1, 2}, indexer.getOutcomeList());
+ Assert.assertArrayEquals(new int[]{3, 1, 1}, indexer.getNumTimesEventsSeen());
+ Assert.assertArrayEquals(new String[]{"ppo=other"}, indexer.getPredLabels());
+ Assert.assertArrayEquals(new String[]{"other", "org-start", "org-cont"}, indexer.getOutcomeLabels());
+ Assert.assertArrayEquals(new int[]{5}, indexer.getPredCounts());
+ }
+}