You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2017/08/31 12:22:19 UTC

[opennlp] branch master updated: OPENNLP-1122: Leipzig sample should allow skip initial entries

This is an automated email from the ASF dual-hosted git repository.

colen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git


The following commit(s) were added to refs/heads/master by this push:
     new d3f0ee5  OPENNLP-1122: Leipzig sample should allow skip initial entries
d3f0ee5 is described below

commit d3f0ee5e0928122b41ee25e2b4ab09bdca5bd00e
Author: William D C M SILVA <co...@apache.org>
AuthorDate: Sat Aug 19 14:44:04 2017 -0300

    OPENNLP-1122: Leipzig sample should allow skip initial entries
---
 .../LeipzigLanguageSampleStreamFactory.java        | 12 ++++-
 .../tools/formats/leipzig/SampleSkipStream.java    | 55 ++++++++++++++++++++++
 2 files changed, 65 insertions(+), 2 deletions(-)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
index f7fbc08..968d00d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
@@ -21,6 +21,7 @@ import java.io.File;
 import java.io.IOException;
 
 import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
 import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
 import opennlp.tools.cmdline.StreamFactoryRegistry;
 import opennlp.tools.cmdline.TerminateToolException;
@@ -47,6 +48,11 @@ public class LeipzigLanguageSampleStreamFactory
     @ParameterDescription(valueName = "samplesPerLanguage",
         description = "number of samples per language")
     String getSamplesPerLanguage();
+
+    @ParameterDescription(valueName = "samplesToSkip",
+        description = "number of samples to skip before returning")
+    @OptionalParameter(defaultValue = "0")
+    String getSamplesToSkip();
   }
 
   protected <P> LeipzigLanguageSampleStreamFactory(Class<P> params) {
@@ -64,9 +70,11 @@ public class LeipzigLanguageSampleStreamFactory
     File sentencesFileDir = params.getSentencesDir();
 
     try {
-      return new SampleShuffleStream(new LeipzigLanguageSampleStream(sentencesFileDir,
+      return new SampleSkipStream(new SampleShuffleStream(
+          new LeipzigLanguageSampleStream(sentencesFileDir,
           Integer.parseInt(params.getSentencesPerSample()),
-          Integer.parseInt(params.getSamplesPerLanguage())));
+          Integer.parseInt(params.getSamplesPerLanguage()) + Integer.parseInt(params.getSamplesToSkip()))),
+          Integer.parseInt(params.getSamplesToSkip()));
     } catch (IOException e) {
       throw new TerminateToolException(-1, "IO error while opening sample data.", e);
     }
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/SampleSkipStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/SampleSkipStream.java
new file mode 100644
index 0000000..1347275
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/SampleSkipStream.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.leipzig;
+
+import java.io.IOException;
+
+import opennlp.tools.util.ObjectStream;
+
+class SampleSkipStream<T> implements ObjectStream<T> {
+
+
+  private final ObjectStream<T> samples;
+  private final int samplesToSkip;
+
+  SampleSkipStream(ObjectStream<T> samples, int samplesToSkip) throws IOException {
+    this.samples = samples;
+    this.samplesToSkip = samplesToSkip;
+
+    skipSamples();
+  }
+
+  @Override
+  public T read() throws IOException {
+    return samples.read();
+  }
+
+  @Override
+  public void reset() throws IOException, UnsupportedOperationException {
+    this.samples.reset();
+    skipSamples();
+  }
+
+  private void skipSamples() throws IOException {
+    int i = 0;
+
+    while (i < samplesToSkip && (samples.read()) != null) {
+      i++;
+    }
+  }
+}

-- 
To stop receiving notification emails like this one, please contact
['"commits@opennlp.apache.org" <co...@opennlp.apache.org>'].