You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by jo...@apache.org on 2017/04/20 10:40:52 UTC

[05/50] [abbrv] opennlp git commit: OPENNLP-964: Ignore LICENSE, NOTICE and README files in the model

OPENNLP-964: Ignore LICENSE, NOTICE and README files in the model


Project: http://git-wip-us.apache.org/repos/asf/opennlp/repo
Commit: http://git-wip-us.apache.org/repos/asf/opennlp/commit/639b9cf9
Tree: http://git-wip-us.apache.org/repos/asf/opennlp/tree/639b9cf9
Diff: http://git-wip-us.apache.org/repos/asf/opennlp/diff/639b9cf9

Branch: refs/heads/parser_regression
Commit: 639b9cf90a45ccb3df7fabf1d37787afe5742869
Parents: ef8fd53
Author: J�rn Kottmann <jo...@apache.org>
Authored: Thu Feb 2 19:13:02 2017 +0100
Committer: J�rn Kottmann <jo...@apache.org>
Committed: Thu Apr 20 12:40:18 2017 +0200

----------------------------------------------------------------------
 .../tagdict/MorfologikPOSTaggerFactory.java     | 15 +------
 .../tools/namefind/TokenNameFinderModel.java    | 14 +-----
 .../opennlp/tools/util/model/BaseModel.java     |  2 +
 .../tools/util/model/ByteArraySerializer.java   | 33 ++++++++++++++
 .../util/model/ByteArraySerializerTest.java     | 45 ++++++++++++++++++++
 5 files changed, 82 insertions(+), 27 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9cf9/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
----------------------------------------------------------------------
diff --git a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
index 370b4d0..592ef7d 100644
--- a/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
+++ b/opennlp-morfologik-addon/src/main/java/opennlp/morfologik/tagdict/MorfologikPOSTaggerFactory.java
@@ -22,7 +22,6 @@ import java.io.File;
 import java.io.FileNotFoundException;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
 import java.nio.file.Files;
 import java.nio.file.Path;
 import java.util.Map;
@@ -33,7 +32,7 @@ import opennlp.tools.dictionary.Dictionary;
 import opennlp.tools.postag.POSTaggerFactory;
 import opennlp.tools.postag.TagDictionary;
 import opennlp.tools.util.model.ArtifactSerializer;
-import opennlp.tools.util.model.ModelUtil;
+import opennlp.tools.util.model.ByteArraySerializer;
 
 public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
 
@@ -150,16 +149,4 @@ public class MorfologikPOSTaggerFactory extends POSTaggerFactory {
             info));
     return new MorfologikTagDictionary(dict);
   }
-
-  static class ByteArraySerializer implements ArtifactSerializer<byte[]> {
-
-    public byte[] create(InputStream in) throws IOException {
-      return ModelUtil.read(in);
-    }
-
-    public void serialize(byte[] artifact, OutputStream out) throws IOException {
-      out.write(artifact);
-    }
-  }
-
 }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9cf9/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
index 05a3615..09eefc5 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/namefind/TokenNameFinderModel.java
@@ -21,7 +21,6 @@ package opennlp.tools.namefind;
 import java.io.File;
 import java.io.IOException;
 import java.io.InputStream;
-import java.io.OutputStream;
 import java.net.URL;
 import java.util.Map;
 import java.util.Properties;
@@ -36,7 +35,7 @@ import opennlp.tools.util.featuregen.BrownCluster;
 import opennlp.tools.util.featuregen.WordClusterDictionary;
 import opennlp.tools.util.model.ArtifactSerializer;
 import opennlp.tools.util.model.BaseModel;
-import opennlp.tools.util.model.ModelUtil;
+import opennlp.tools.util.model.ByteArraySerializer;
 
 /**
  * The {@link TokenNameFinderModel} is the model used
@@ -53,17 +52,6 @@ public class TokenNameFinderModel extends BaseModel {
     }
   }
 
-  private static class ByteArraySerializer implements ArtifactSerializer<byte[]> {
-
-    public byte[] create(InputStream in) throws IOException {
-      return ModelUtil.read(in);
-    }
-
-    public void serialize(byte[] artifact, OutputStream out) throws IOException {
-      out.write(artifact);
-    }
-  }
-
   private static final String COMPONENT_NAME = "NameFinderME";
   private static final String MAXENT_MODEL_ENTRY_NAME = "nameFinder.model";
 

http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9cf9/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
index 062c787..20acd9d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/BaseModel.java
@@ -351,6 +351,8 @@ public abstract class BaseModel implements ArtifactProvider, Serializable {
     GenericModelSerializer.register(serializers);
     PropertiesSerializer.register(serializers);
     DictionarySerializer.register(serializers);
+    serializers.put("txt", new ByteArraySerializer());
+    serializers.put("html", new ByteArraySerializer());
 
     return serializers;
   }

http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9cf9/opennlp-tools/src/main/java/opennlp/tools/util/model/ByteArraySerializer.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/main/java/opennlp/tools/util/model/ByteArraySerializer.java b/opennlp-tools/src/main/java/opennlp/tools/util/model/ByteArraySerializer.java
new file mode 100644
index 0000000..aa123c4
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/util/model/ByteArraySerializer.java
@@ -0,0 +1,33 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.model;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.OutputStream;
+
+public class ByteArraySerializer implements ArtifactSerializer<byte[]> {
+
+  public byte[] create(InputStream in) throws IOException {
+    return ModelUtil.read(in);
+  }
+
+  public void serialize(byte[] artifact, OutputStream out) throws IOException {
+    out.write(artifact);
+  }
+}

http://git-wip-us.apache.org/repos/asf/opennlp/blob/639b9cf9/opennlp-tools/src/test/java/opennlp/tools/util/model/ByteArraySerializerTest.java
----------------------------------------------------------------------
diff --git a/opennlp-tools/src/test/java/opennlp/tools/util/model/ByteArraySerializerTest.java b/opennlp-tools/src/test/java/opennlp/tools/util/model/ByteArraySerializerTest.java
new file mode 100644
index 0000000..a0d7a35
--- /dev/null
+++ b/opennlp-tools/src/test/java/opennlp/tools/util/model/ByteArraySerializerTest.java
@@ -0,0 +1,45 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.util.model;
+
+import java.io.ByteArrayInputStream;
+import java.io.ByteArrayOutputStream;
+import java.io.IOException;
+import java.util.Arrays;
+import java.util.Random;
+
+import org.junit.Assert;
+import org.junit.Test;
+
+public class ByteArraySerializerTest {
+
+  @Test
+  public void testSerialization() throws IOException {
+
+    byte[] b = new byte[1024];
+    new Random(23).nextBytes(b);
+
+    ByteArraySerializer serializer = new ByteArraySerializer();
+
+    ByteArrayOutputStream bOut = new ByteArrayOutputStream();
+    serializer.serialize(Arrays.copyOf(b, b.length), bOut) ;
+
+    Assert.assertArrayEquals(b, bOut.toByteArray());
+    Assert.assertArrayEquals(b, serializer.create(new ByteArrayInputStream(b)));
+  }
+}