You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2016/11/09 21:10:59 UTC
[05/16] opennlp git commit: OPENNLP-622 Fixed PosTaggerFactory and
restored test.
OPENNLP-622 Fixed PosTaggerFactory and restored test.
Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/3ceb5540
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/3ceb5540
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/3ceb5540
Branch: refs/heads/trunk
Commit: 3ceb5540ced842875c010bb81169afcb544f203e
Parents: 1314887
Author: William Colen <co...@apache.org>
Authored: Fri Jul 8 03:52:14 2016 +0000
Committer: William Colen <co...@apache.org>
Committed: Fri Jul 8 03:52:14 2016 +0000
----------------------------------------------------------------------
.../tagdict/MorfologikPOSTaggerFactory.java | 46 +++--
.../tagdict/POSTaggerFactoryTest.java | 192 ++++++++-----------
2 files changed, 106 insertions(+), 132 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/opennlp/blob/3ceb5540/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
----------------------------------------------------------------------
diff --git a/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java b/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
index 723b1ce..dcb6554 100644
--- a/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
+++ b/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
@@ -26,9 +26,11 @@ import java.io.IOException;
import java.io.InputStream;
import java.io.OutputStream;
import java.nio.file.Files;
+import java.nio.file.Path;
import java.nio.file.Paths;
import java.util.Map;
+import morfologik.stemming.DictionaryMetadata;
import opennlp.tools.dictionary.Dictionary;
import opennlp.tools.postag.POSTaggerFactory;
import opennlp.tools.postag.TagDictionary;
@@ -53,23 +55,27 @@ public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
public MorfologikPOSTaggerFactory() {
}
-
- /**
- * Creates a new {@link POSTaggerFactory} that uses the a Morfologik based {@link TagDictionary}.
- *
- * @param ngramDictionary a ngramDictionary
- * @param morfologikDictionary a Morfologik dictionary
- * @param morfologikDictionaryMetadata the dictionary metadata
- * @throws IOException invalid Morfologik dictionary
- */
- public MorfologikPOSTaggerFactory(Dictionary ngramDictionary,
- byte[] morfologikDictionary, byte[] morfologikDictionaryMetadata) throws IOException {
- super(ngramDictionary, null);
- this.dictData = morfologikDictionary;
- this.dictInfo = morfologikDictionaryMetadata;
+
+ public TagDictionary createTagDictionary(File dictionary)
+ throws InvalidFormatException, FileNotFoundException, IOException {
+
+ if(!dictionary.canRead()) {
+ throw new FileNotFoundException("Could not read dictionary: " + dictionary.getAbsolutePath());
+ }
+
+ Path dictionaryMeta = DictionaryMetadata.getExpectedMetadataLocation(dictionary.toPath());
+
+ if(dictionaryMeta == null || !dictionaryMeta.toFile().canRead()) {
+ throw new FileNotFoundException("Could not read dictionary metadata: " + dictionaryMeta.getFileName());
+ }
+
+ this.dictData = Files.readAllBytes(dictionary.toPath());
+ this.dictInfo = Files.readAllBytes(dictionaryMeta);
+
+ return createMorfologikDictionary(dictData, dictInfo);
- this.dict = createMorfologikDictionary(dictData, dictInfo);
}
+
@Override
protected void init(Dictionary ngramDictionary, TagDictionary posDictionary) {
@@ -130,8 +136,7 @@ public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
@Override
public void setTagDictionary(TagDictionary dictionary) {
- throw new UnsupportedOperationException(
- "Morfologik POS Tagger factory does not support this operation");
+ this.dict = dictionary;
}
@Override
@@ -141,13 +146,6 @@ public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
}
@Override
- public TagDictionary createTagDictionary(File dictionary)
- throws InvalidFormatException, FileNotFoundException, IOException {
- throw new UnsupportedOperationException(
- "Morfologik POS Tagger factory does not support this operation");
- }
-
- @Override
public TagDictionary createTagDictionary(InputStream in)
throws InvalidFormatException, IOException {
throw new UnsupportedOperationException(
http://git-wip-us.apache.org/repos/asf/opennlp/blob/3ceb5540/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java b/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
index 6c6814b..9233979 100644
--- a/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
+++ b/src/test/java/opennlp/morfologik/tagdict/POSTaggerFactoryTest.java
@@ -1,108 +1,84 @@
-///*
-// * Licensed to the Apache Software Foundation (ASF) under one or more
-// * contributor license agreements. See the NOTICE file distributed with
-// * this work for additional information regarding copyright ownership.
-// * The ASF licenses this file to You under the Apache License, Version 2.0
-// * (the "License"); you may not use this file except in compliance with
-// * the License. You may obtain a copy of the License at
-// *
-// * http://www.apache.org/licenses/LICENSE-2.0
-// *
-// * Unless required by applicable law or agreed to in writing, software
-// * distributed under the License is distributed on an "AS IS" BASIS,
-// * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-// * See the License for the specific language governing permissions and
-// * limitations under the License.
-// */
-//
-//package opennlp.morfologik.tagdict;
-//
-//import static org.junit.Assert.assertTrue;
-//
-//import java.io.ByteArrayInputStream;
-//import java.io.ByteArrayOutputStream;
-//import java.io.File;
-//import java.io.IOException;
-//import java.io.InputStream;
-//import java.io.InputStreamReader;
-//import java.nio.charset.Charset;
-//import java.nio.file.Files;
-//import java.nio.file.Path;
-//import java.nio.file.Paths;
-//
-//import morfologik.stemming.DictionaryMetadata;
-//import morfologik.stemming.EncoderType;
-//import opennlp.morfologik.builder.MorfologikDictionayBuilder;
-//import opennlp.morfologik.builder.POSDictionayBuilderTest;
-//import opennlp.tools.dictionary.Dictionary;
-//import opennlp.tools.postag.DefaultPOSSequenceValidator;
-//import opennlp.tools.postag.POSContextGenerator;
-//import opennlp.tools.postag.POSDictionary;
-//import opennlp.tools.postag.POSModel;
-//import opennlp.tools.postag.POSSample;
-//import opennlp.tools.postag.POSTaggerFactory;
-//import opennlp.tools.postag.POSTaggerME;
-//import opennlp.tools.postag.WordTagSampleStream;
-//import opennlp.tools.util.BaseToolFactory;
-//import opennlp.tools.util.InvalidFormatException;
-//import opennlp.tools.util.ObjectStream;
-//import opennlp.tools.util.TrainingParameters;
-//import opennlp.tools.util.model.ModelType;
-//
-//import org.junit.Test;
-//
-///**
-// * Tests for the {@link POSTaggerFactory} class.
-// */
-//public class POSTaggerFactoryTest {
-//
-// private static ObjectStream<POSSample> createSampleStream()
-// throws IOException {
-// InputStream in = POSTaggerFactoryTest.class.getClassLoader()
-// .getResourceAsStream("AnnotatedSentences.txt");
-//
-// return new WordTagSampleStream((new InputStreamReader(in)));
-// }
-//
-// static POSModel trainPOSModel(ModelType type, POSTaggerFactory factory)
-// throws IOException {
-// return POSTaggerME.train("en", createSampleStream(),
-// TrainingParameters.defaultParams(), factory);
-// }
-//
-// @Test
-// public void testPOSTaggerWithCustomFactory() throws Exception {
-//
-// MorfologikDictionayBuilder builder = new MorfologikDictionayBuilder();
-// File dictInFile = new File(POSDictionayBuilderTest.class.getResource(
-// "/dictionaryWithLemma.txt").getFile());
-//
-// File dictOutFile = File.createTempFile(
-// POSDictionayBuilderTest.class.getName(), ".dict");
-//
-// builder.build(dictInFile, dictOutFile, Charset.forName("UTF-8"), "+",
-// EncoderType.PREFIX);
-//
-// Path dictPath = dictOutFile.toPath();
-// Path metaPath = DictionaryMetadata.getExpectedMetadataLocation(dictPath);
-//
-// byte[] dic = Files.readAllBytes(dictPath);
-// byte[] meta = Files.readAllBytes(metaPath);
-//
-// POSModel posModel = trainPOSModel(ModelType.MAXENT,
-// new MorfologikPOSTaggerFactory(null, dic, meta));
-//
-// POSTaggerFactory factory = posModel.getFactory();
-// assertTrue(factory.getTagDictionary() instanceof MorfologikPOSTaggerFactory);
-//
-// ByteArrayOutputStream out = new ByteArrayOutputStream();
-// posModel.serialize(out);
-// ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
-//
-// POSModel fromSerialized = new POSModel(in);
-//
-// factory = fromSerialized.getFactory();
-// assertTrue(factory.getTagDictionary() instanceof MorfologikPOSTaggerFactory);
-// }
-//
-//}
\ No newline at end of file
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.morfologik.tagdict;
+
+import static org.junit.Assert.assertTrue;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.file.Path;
+
+import opennlp.morfologik.builder.POSDictionayBuilderTest;
+import opennlp.tools.postag.POSModel;
+import opennlp.tools.postag.POSSample;
+import opennlp.tools.postag.POSTaggerFactory;
+import opennlp.tools.postag.POSTaggerME;
+import opennlp.tools.postag.TagDictionary;
+import opennlp.tools.postag.WordTagSampleStream;
+import opennlp.tools.util.ObjectStream;
+import opennlp.tools.util.TrainingParameters;
+import opennlp.tools.util.model.ModelType;
+
+import org.junit.Test;
+
+/**
+ * Tests for the {@link POSTaggerFactory} class.
+ */
+public class POSTaggerFactoryTest {
+
+ private static ObjectStream<POSSample> createSampleStream()
+ throws IOException {
+ InputStream in = POSTaggerFactoryTest.class.getClassLoader()
+ .getResourceAsStream("AnnotatedSentences.txt");
+
+ return new WordTagSampleStream((new InputStreamReader(in)));
+ }
+
+ static POSModel trainPOSModel(ModelType type, POSTaggerFactory factory)
+ throws IOException {
+ return POSTaggerME.train("en", createSampleStream(),
+ TrainingParameters.defaultParams(), factory);
+ }
+
+ @Test
+ public void testPOSTaggerWithCustomFactory() throws Exception {
+
+ Path dictionary = POSDictionayBuilderTest.createMorfologikDictionary();
+ POSTaggerFactory inFactory = new MorfologikPOSTaggerFactory();
+ TagDictionary inDict = inFactory.createTagDictionary(dictionary.toFile());
+ inFactory.setTagDictionary(inDict);
+
+ POSModel posModel = trainPOSModel(ModelType.MAXENT, inFactory);
+
+ POSTaggerFactory factory = posModel.getFactory();
+ assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary);
+
+ ByteArrayOutputStream out = new ByteArrayOutputStream();
+ posModel.serialize(out);
+ ByteArrayInputStream in = new ByteArrayInputStream(out.toByteArray());
+
+ POSModel fromSerialized = new POSModel(in);
+
+ factory = fromSerialized.getFactory();
+ assertTrue(factory.getTagDictionary() instanceof MorfologikTagDictionary);
+ }
+
+}
\ No newline at end of file