You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by to...@apache.org on 2017/03/07 13:16:32 UTC
incubator-joshua git commit: JOSHUA-331 - berkeleyLM bin/gz files
generated from plain text one
Repository: incubator-joshua
Updated Branches:
refs/heads/master dd681a10a -> 66c5aad5a
JOSHUA-331 - berkeleyLM bin/gz files generated from plain text one
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/66c5aad5
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/66c5aad5
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/66c5aad5
Branch: refs/heads/master
Commit: 66c5aad5acf84f24990136558f882a5c33ec0114
Parents: dd681a1
Author: Tommaso Teofili <to...@apache.org>
Authored: Tue Mar 7 14:16:18 2017 +0100
Committer: Tommaso Teofili <to...@apache.org>
Committed: Tue Mar 7 14:16:18 2017 +0100
----------------------------------------------------------------------
pom.xml | 6 +++
.../lm/berkeley_lm/LMGrammarBerkeleyTest.java | 52 ++++++++++++++++---
src/test/resources/berkeley_lm/lm.berkeleylm | Bin 4310 -> 0 bytes
src/test/resources/berkeley_lm/lm.berkeleylm.gz | Bin 1796 -> 0 bytes
src/test/resources/berkeley_lm/lm.gz | Bin 168 -> 0 bytes
5 files changed, 52 insertions(+), 6 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/66c5aad5/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index a04a194..a5824db 100644
--- a/pom.xml
+++ b/pom.xml
@@ -466,6 +466,12 @@
<version>2.0.52-beta</version>
<scope>test</scope>
</dependency>
+ <dependency>
+ <groupId>commons-io</groupId>
+ <artifactId>commons-io</artifactId>
+ <version>2.5</version>
+ <scope>test</scope>
+ </dependency>
</dependencies>
<profiles>
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/66c5aad5/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
----------------------------------------------------------------------
diff --git a/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java b/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
index 32c0762..aec6215 100644
--- a/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
+++ b/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
@@ -18,11 +18,23 @@
*/
package org.apache.joshua.decoder.ff.lm.berkeley_lm;
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.FileOutputStream;
+import java.io.IOException;
+import java.nio.file.FileAlreadyExistsException;
+import java.nio.file.Files;
+import java.nio.file.Paths;
+import java.util.zip.GZIPOutputStream;
+
+import edu.berkeley.nlp.lm.io.MakeLmBinaryFromArpa;
+import org.apache.commons.io.IOUtils;
import org.apache.joshua.decoder.Decoder;
import org.apache.joshua.decoder.JoshuaConfiguration;
import org.apache.joshua.decoder.Translation;
import org.apache.joshua.decoder.segment_file.Sentence;
import org.testng.annotations.AfterMethod;
+import org.testng.annotations.BeforeClass;
import org.testng.annotations.DataProvider;
import org.testng.annotations.Test;
@@ -39,15 +51,43 @@ public class LMGrammarBerkeleyTest {
private static final String EXPECTED_OUTPUT_WITH_OOV = "tm_glue_0=2.000 lm_0=-7.153 lm_0_oov=0.000\n";
private static final String[] OPTIONS = "-v 0 -output-format %f".split(" ");
+ private static final String lmFile = "src/test/resources/berkeley_lm/lm";
+ private static final String compressedLmFile = "target/lm.gz";
+ private static final String lmFileBin = "target/lm.berkeleylm";
+ private static final String compressedLmFileBin = "target/lm.berkeleylm.gz";
+
private JoshuaConfiguration joshuaConfig;
private Decoder decoder;
+ @BeforeClass
+ public static void before() throws Exception {
+ // generate lm.gz
+ FileInputStream lmFileStream = new FileInputStream(new File(lmFile));
+ compress(lmFileStream, compressedLmFile);
+
+ // generate lm.berkeleylm
+ MakeLmBinaryFromArpa.main(new String[] { lmFile, lmFileBin });
+
+ // generate lm.berkeleylm.gz
+ FileInputStream lmFileBinStream = new FileInputStream(new File(lmFileBin));
+ compress(lmFileBinStream, compressedLmFileBin);
+ }
+
+ private static void compress(FileInputStream lmFileStream, String target) throws IOException {
+ try {
+ Files.createFile(Paths.get(target));
+ GZIPOutputStream gzipOutputStream = new GZIPOutputStream(new FileOutputStream(target));
+ IOUtils.copy(lmFileStream, gzipOutputStream);
+ gzipOutputStream.finish();
+ } catch (FileAlreadyExistsException fae) {
+ // the file already exists, no need to recreate it
+ }
+ }
+
@DataProvider(name = "languageModelFiles")
public Object[][] lmFiles() {
- return new Object[][]{{"src/test/resources/berkeley_lm/lm"},
- {"src/test/resources/berkeley_lm/lm.gz"},
- {"src/test/resources/berkeley_lm/lm.berkeleylm"},
- {"src/test/resources/berkeley_lm/lm.berkeleylm.gz"}};
+ return new Object[][] { { lmFile }, { compressedLmFile }, { lmFileBin },
+ { compressedLmFileBin } };
}
@AfterMethod
@@ -74,12 +114,12 @@ public class LMGrammarBerkeleyTest {
public void givenLmWithOovFeature_whenDecoder_thenCorrectFeaturesReturned() {
joshuaConfig = new JoshuaConfiguration();
joshuaConfig.processCommandLineOptions(OPTIONS);
- joshuaConfig.features.add("LanguageModel -lm_type berkeleylm -oov_feature -lm_order 2 -lm_file src/test/resources/berkeley_lm/lm");
+ joshuaConfig.features.add(
+ "LanguageModel -lm_type berkeleylm -oov_feature -lm_order 2 -lm_file src/test/resources/berkeley_lm/lm");
decoder = new Decoder(joshuaConfig, null);
final String translation = decode(INPUT).toString();
assertEquals(Decoder.weights.getDenseFeatures().size(), 3);
assertEquals(translation, EXPECTED_OUTPUT_WITH_OOV);
}
-
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/66c5aad5/src/test/resources/berkeley_lm/lm.berkeleylm
----------------------------------------------------------------------
diff --git a/src/test/resources/berkeley_lm/lm.berkeleylm b/src/test/resources/berkeley_lm/lm.berkeleylm
deleted file mode 100644
index 9472c18..0000000
Binary files a/src/test/resources/berkeley_lm/lm.berkeleylm and /dev/null differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/66c5aad5/src/test/resources/berkeley_lm/lm.berkeleylm.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/berkeley_lm/lm.berkeleylm.gz b/src/test/resources/berkeley_lm/lm.berkeleylm.gz
deleted file mode 100644
index 57c319e..0000000
Binary files a/src/test/resources/berkeley_lm/lm.berkeleylm.gz and /dev/null differ
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/66c5aad5/src/test/resources/berkeley_lm/lm.gz
----------------------------------------------------------------------
diff --git a/src/test/resources/berkeley_lm/lm.gz b/src/test/resources/berkeley_lm/lm.gz
deleted file mode 100644
index 3a4f2c0..0000000
Binary files a/src/test/resources/berkeley_lm/lm.gz and /dev/null differ