You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@opennlp.apache.org by co...@apache.org on 2017/08/31 12:22:19 UTC
[opennlp] branch master updated: OPENNLP-1122: Leipzig sample
should allow skip initial entries
This is an automated email from the ASF dual-hosted git repository.
colen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/master by this push:
new d3f0ee5 OPENNLP-1122: Leipzig sample should allow skip initial entries
d3f0ee5 is described below
commit d3f0ee5e0928122b41ee25e2b4ab09bdca5bd00e
Author: William D C M SILVA <co...@apache.org>
AuthorDate: Sat Aug 19 14:44:04 2017 -0300
OPENNLP-1122: Leipzig sample should allow skip initial entries
---
.../LeipzigLanguageSampleStreamFactory.java | 12 ++++-
.../tools/formats/leipzig/SampleSkipStream.java | 55 ++++++++++++++++++++++
2 files changed, 65 insertions(+), 2 deletions(-)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
index f7fbc08..968d00d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/LeipzigLanguageSampleStreamFactory.java
@@ -21,6 +21,7 @@ import java.io.File;
import java.io.IOException;
import opennlp.tools.cmdline.ArgumentParser;
+import opennlp.tools.cmdline.ArgumentParser.OptionalParameter;
import opennlp.tools.cmdline.ArgumentParser.ParameterDescription;
import opennlp.tools.cmdline.StreamFactoryRegistry;
import opennlp.tools.cmdline.TerminateToolException;
@@ -47,6 +48,11 @@ public class LeipzigLanguageSampleStreamFactory
@ParameterDescription(valueName = "samplesPerLanguage",
description = "number of samples per language")
String getSamplesPerLanguage();
+
+ @ParameterDescription(valueName = "samplesToSkip",
+ description = "number of samples to skip before returning")
+ @OptionalParameter(defaultValue = "0")
+ String getSamplesToSkip();
}
protected <P> LeipzigLanguageSampleStreamFactory(Class<P> params) {
@@ -64,9 +70,11 @@ public class LeipzigLanguageSampleStreamFactory
File sentencesFileDir = params.getSentencesDir();
try {
- return new SampleShuffleStream(new LeipzigLanguageSampleStream(sentencesFileDir,
+ return new SampleSkipStream(new SampleShuffleStream(
+ new LeipzigLanguageSampleStream(sentencesFileDir,
Integer.parseInt(params.getSentencesPerSample()),
- Integer.parseInt(params.getSamplesPerLanguage())));
+ Integer.parseInt(params.getSamplesPerLanguage()) + Integer.parseInt(params.getSamplesToSkip()))),
+ Integer.parseInt(params.getSamplesToSkip()));
} catch (IOException e) {
throw new TerminateToolException(-1, "IO error while opening sample data.", e);
}
diff --git a/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/SampleSkipStream.java b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/SampleSkipStream.java
new file mode 100644
index 0000000..1347275
--- /dev/null
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/leipzig/SampleSkipStream.java
@@ -0,0 +1,55 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.formats.leipzig;
+
+import java.io.IOException;
+
+import opennlp.tools.util.ObjectStream;
+
+class SampleSkipStream<T> implements ObjectStream<T> {
+
+
+ private final ObjectStream<T> samples;
+ private final int samplesToSkip;
+
+ SampleSkipStream(ObjectStream<T> samples, int samplesToSkip) throws IOException {
+ this.samples = samples;
+ this.samplesToSkip = samplesToSkip;
+
+ skipSamples();
+ }
+
+ @Override
+ public T read() throws IOException {
+ return samples.read();
+ }
+
+ @Override
+ public void reset() throws IOException, UnsupportedOperationException {
+ this.samples.reset();
+ skipSamples();
+ }
+
+ private void skipSamples() throws IOException {
+ int i = 0;
+
+ while (i < samplesToSkip && (samples.read()) != null) {
+ i++;
+ }
+ }
+}
--
To stop receiving notification emails like this one, please contact
['"commits@opennlp.apache.org" <co...@opennlp.apache.org>'].