You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/06/23 18:45:27 UTC
[16/60] [partial] incubator-joshua git commit: maven multi-module
layout 1st commit: moving files into joshua-core
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java
new file mode 100644
index 0000000..88b2350
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/io/DeNormalizeTest.java
@@ -0,0 +1,273 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.io;
+
+import static org.testng.Assert.assertEquals;
+
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.Test;
+
+/**
+ *
+ */
+public class DeNormalizeTest {
+
+ private String tokenized;
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @BeforeMethod
+ protected void setUp() throws Exception {
+ tokenized = "my son 's friend , however , plays a high - risk game .";
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#processSingleLine(java.lang.String)}.
+ */
+ @Test(enabled = true)
+ public void testProcessSingleLine() {
+ tokenized =
+ "my son 's friend , ( dr . -rrb- robotnik , phd , however , wo n't play a high - risk game .";
+ String expected = "My son's friend, (Dr.) robotnik, PhD, however, won't play a high-risk game.";
+ String actual = DeNormalize.processSingleLine(tokenized);
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#processSingleLine(java.lang.String)}.
+ */
+ @Test
+ public void testProcessSingleLine_interspersed() {
+ tokenized = "phd mphil";
+ String expected = "PhD MPhil";
+ String actual = DeNormalize.processSingleLine(tokenized);
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for
+ * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+ */
+ @Test
+ public void testCapitalizeLineFirstLetter() throws Exception {
+ String actual = DeNormalize.capitalizeLineFirstLetter(tokenized);
+ String expected = "My son 's friend , however , plays a high - risk game .";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for
+ * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+ */
+ @Test
+ public void testCapitalizeLineFirstLetter_empty() throws Exception {
+ String actual = DeNormalize.capitalizeLineFirstLetter("");
+ String expected = "";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for
+ * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+ */
+ @Test
+ public void testCapitalizeLineFirstLetter_singleNumberCharacter() throws Exception {
+ String actual = DeNormalize.capitalizeLineFirstLetter("1");
+ String expected = "1";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for
+ * {@link joshua.decoder.io.DeNormalize#capitalizeLineFirstLetter(java.lang.String)}.
+ */
+ @Test
+ public void testCapitalizeLineFirstLetter_singleLetterCharacter() throws Exception {
+ String actual = DeNormalize.capitalizeLineFirstLetter("a");
+ String expected = "A";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinPunctuationMarks(java.lang.String)}.
+ */
+ @Test
+ public void testJoinPunctuationMarks() throws Exception {
+ String actual = DeNormalize.joinPunctuationMarks(tokenized);
+ String expected = "my son 's friend, however, plays a high - risk game.";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinPunctuationMarks(java.lang.String)}.
+ */
+ @Test
+ public void testJoinPunctuationMarks_empty() throws Exception {
+ String actual = DeNormalize.joinPunctuationMarks("");
+ String expected = "";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+ */
+ @Test
+ public void testJoinHyphen() throws Exception {
+ String actual = DeNormalize.joinHyphen(tokenized);
+ String expected = "my son 's friend , however , plays a high-risk game .";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+ */
+ @Test
+ public void testJoinHypen_empty() throws Exception {
+ String actual = DeNormalize.joinHyphen("");
+ String expected = "";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+ */
+ @Test
+ public void testJoinHyphen_1space_btw_2hyphens() throws Exception {
+ String actual = DeNormalize.joinHyphen("a - - b");
+ String expected = "a-- b";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinHyphen(java.lang.String)}.
+ */
+ @Test
+ public void testJoinHyphen_2spaces_btw_2hyphens() throws Exception {
+ String actual = DeNormalize.joinHyphen("a - - b");
+ String expected = "a--b";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinContractions(java.lang.String)}.
+ */
+ @Test
+ public void testJoinContractions() throws Exception {
+ tokenized = "my son 's friend , however , wo n't play a high - risk game .";
+ String actual = DeNormalize.joinContractions(tokenized);
+ String expected = "my son's friend , however , won't play a high - risk game .";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#joinContractions(java.lang.String)}.
+ */
+ @Test
+ public void testJoinContractions_empty() throws Exception {
+ String actual = DeNormalize.joinContractions("");
+ String expected = "";
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for
+ * {@link joshua.decoder.io.DeNormalize#capitalizeNameTitleAbbrvs(java.lang.String)}.
+ */
+ @Test
+ public void testCapitalizeNameTitleAbbrvs() throws Exception {
+ String actual, expected;
+ tokenized =
+ "my son 's friend , dr . robotnik , phd , however , wo n't play a high - risk game .";
+ expected =
+ "my son 's friend , Dr . robotnik , PhD , however , wo n't play a high - risk game .";
+ actual = DeNormalize.capitalizeNameTitleAbbrvs(tokenized);
+ assertEquals(actual, expected);
+
+ tokenized = "mr mrs ms miss dr prof";
+ expected = "Mr Mrs Ms Miss Dr Prof";
+ actual = DeNormalize.capitalizeNameTitleAbbrvs(tokenized);
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#capitalizeI(java.lang.String)}.
+ */
+ @Test
+ public void testCapitalizeI() throws Exception {
+ String expected, actual;
+
+ tokenized = "sam i am";
+ expected = "sam I am";
+ actual = DeNormalize.capitalizeI(tokenized);
+ assertEquals(actual, expected);
+
+ tokenized = "sam iam";
+ expected = "sam iam";
+ actual = DeNormalize.capitalizeI(tokenized);
+ assertEquals(actual, expected);
+
+ tokenized = "sami am";
+ expected = "sami am";
+ actual = DeNormalize.capitalizeI(tokenized);
+ assertEquals(actual, expected);
+
+ tokenized = "samiam";
+ expected = "samiam";
+ actual = DeNormalize.capitalizeI(tokenized);
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#replaceBracketTokens(java.lang.String)}.
+ */
+ @Test
+ public void testReplaceBracketTokens() throws Exception {
+ String expected, actual;
+
+ tokenized = "-lrb- i -rrb-";
+ expected = "( i )";
+ actual = DeNormalize.replaceBracketTokens(tokenized);
+ assertEquals(actual, expected);
+
+ tokenized = "-LRB- i -RRB-";
+ expected = "( i )";
+ actual = DeNormalize.replaceBracketTokens(tokenized);
+ assertEquals(actual, expected);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.DeNormalize#detokenizeBracketTokens(java.lang.String)}
+ */
+ @Test
+ public void testDetokenizeBracketTokens() throws Exception {
+ String expected, actual;
+
+ tokenized = "( i )";
+ expected = "(i)";
+ actual = DeNormalize.joinPunctuationMarks(tokenized);
+ assertEquals(actual, expected);
+
+ tokenized = "[ i } j";
+ expected = "[i} j";
+ actual = DeNormalize.joinPunctuationMarks(tokenized);
+ assertEquals(actual, expected);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestTest.java
new file mode 100644
index 0000000..5a1c3ab
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/io/TranslationRequestTest.java
@@ -0,0 +1,149 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.io;
+
+import java.io.BufferedReader;
+import java.io.ByteArrayInputStream;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.charset.Charset;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+
+import org.testng.annotations.*;
+import static org.testng.Assert.*;
+import static org.mockito.Mockito.*;
+
+/**
+ * This class verifies the following behaviors:
+ *
+ * - A blank input, i.e. "", does not cause a translation to be created.
+ *
+ * - A non-blank input that is not followed by a newline, e.g. "1", causes a translation to be
+ * created.
+ *
+ * - An input that contains whitespace or nothing followed by a newline causes a translation to be
+ * created, with "" as the source.
+ */
+public class TranslationRequestTest {
+
+ private final JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+ @BeforeMethod
+ public void createTranslationRequest() throws Exception {
+ }
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @BeforeMethod
+ protected void setUp() throws Exception {
+ }
+
+ /**
+ * @throws java.lang.Exception
+ */
+ @AfterMethod
+ protected void tearDown() throws Exception {
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.TranslationRequest#TranslationRequest(java.io.InputStream)}.
+ */
+ @Test(enabled = false)
+ public void testTranslationRequest() {
+ fail("Not yet implemented");
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.TranslationRequest#size()}.
+ */
+ @Test(enabled = true)
+ public void testSize_uponConstruction() {
+ InputStream in = mock(InputStream.class);
+ TranslationRequestStream request = new TranslationRequestStream(
+ new BufferedReader(new InputStreamReader(in, Charset.defaultCharset())), joshuaConfiguration);
+ assertEquals(request.size(), 0);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.TranslationRequest#size()}.
+ * @throws Exception
+ */
+ @Test(enabled = true)
+ public void testSize_1() throws Exception {
+ byte[] data = "1".getBytes();
+ ByteArrayInputStream input = new ByteArrayInputStream(data);
+ TranslationRequestStream request = new TranslationRequestStream(
+ new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+ request.next();
+ assertEquals(request.size(), 1);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.TranslationRequest#size()}.
+ * @throws Exception
+ */
+ @Test(enabled = true)
+ public void testSize_newline() throws Exception {
+ byte[] data = "\n".getBytes();
+ ByteArrayInputStream input = new ByteArrayInputStream(data);
+ TranslationRequestStream request = new TranslationRequestStream(
+ new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+ request.next();
+ assertEquals(request.size(), 1);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.TranslationRequest#size()}.
+ * @throws Exception
+ */
+ @Test(enabled = true)
+ public void testSize_2newlines() throws Exception {
+ byte[] data = "\n\n".getBytes();
+ ByteArrayInputStream input = new ByteArrayInputStream(data);
+ TranslationRequestStream request = new TranslationRequestStream(
+ new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+ request.next();
+ request.next();
+ assertEquals(request.size(), 2);
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.TranslationRequest#next()}.
+ * @throws Exception
+ */
+ @Test(enabled = true)
+ public void testNext_2Newlines() throws Exception {
+ byte[] data = "\n\n".getBytes();
+ ByteArrayInputStream input = new ByteArrayInputStream(data);
+ TranslationRequestStream request = new TranslationRequestStream(
+ new BufferedReader(new InputStreamReader(input, Charset.defaultCharset())), joshuaConfiguration);
+ assertEquals(request.next().source(), "");
+ assertEquals(request.next().source(), "");
+ }
+
+ /**
+ * Test method for {@link joshua.decoder.io.TranslationRequest#remove()}.
+ */
+ @Test(enabled = false)
+ public void testRemove() {
+ fail("Not yet implemented");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
new file mode 100644
index 0000000..44ef35d
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ package org.apache.joshua.decoder.kbest_extraction;
+
+import static com.google.common.base.Charsets.UTF_8;
+import static java.nio.file.Files.readAllBytes;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Reimplements the kbest extraction regression test
+ * TODO (fhieber): this test strangely only works with StateMinimizing KenLM.
+ * This is to be investigated
+ */
+@Ignore("re-enable as soon as kenlm native library support will be in place")
+public class KBestExtractionTest {
+
+ private static final String CONFIG = "resources/kbest_extraction/joshua.config";
+ private static final String INPUT = "a b c d e";
+ private static final Path GOLD_PATH = Paths.get("resources/kbest_extraction/output.scores.gold");
+
+ private JoshuaConfiguration joshuaConfig = null;
+ private Decoder decoder = null;
+
+ @Before
+ public void setUp() throws Exception {
+ joshuaConfig = new JoshuaConfiguration();
+ joshuaConfig.readConfigFile(CONFIG);
+ joshuaConfig.outputFormat = "%i ||| %s ||| %c";
+ decoder = new Decoder(joshuaConfig, "");
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ decoder.cleanUp();
+ decoder = null;
+ }
+
+ @Test
+ public void givenInput_whenKbestExtraction_thenOutputIsAsExpected() throws IOException {
+ final String translation = decode(INPUT).toString();
+ final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
+ assertEquals(gold, translation);
+ }
+
+ private Translation decode(String input) {
+ final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ return decoder.decode(sentence);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/CoverageTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/CoverageTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/CoverageTest.java
new file mode 100644
index 0000000..7526b1f
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/CoverageTest.java
@@ -0,0 +1,140 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.phrase;
+
+import static org.junit.Assert.*;
+
+import java.util.BitSet;
+
+import org.junit.Test;
+
+public class CoverageTest {
+
+ @Test
+ public void testSet() {
+ Coverage cov = new Coverage();
+ cov.set(1,2);
+ cov.set(3,4);
+ cov.set(2,3);
+ cov.set(0,1);
+
+ assertFalse(cov.compatible(0, 1));
+ assertFalse(cov.compatible(0, 5));
+ assertTrue(cov.compatible(4, 6));
+
+ assertEquals(cov.toString(), "4 ..........");
+ }
+
+ @Test
+ public void testPattern() {
+ Coverage cov = new Coverage();
+ cov.set(5,6);
+ cov.set(0,4);
+ BitSet bits = cov.pattern(4, 5);
+ BitSet answerBits = new BitSet();
+ answerBits.set(0);
+ assertEquals(bits, answerBits);
+ }
+
+ @Test
+ public void testCopyConstructor() {
+ Coverage a = new Coverage();
+ a.set(2,3);
+ Coverage b = new Coverage(a);
+ b.set(4,5);
+
+ assertFalse(a.toString().equals(b.toString()));
+ }
+
+ @Test
+ public void testCompatible() {
+ Coverage a = new Coverage();
+ a.set(10, 14);
+
+ assertTrue(a.compatible(14, 16));
+ assertTrue(a.compatible(6, 10));
+ assertTrue(a.compatible(1, 10));
+ assertTrue(a.compatible(1, 9));
+ assertFalse(a.compatible(9, 11));
+ assertFalse(a.compatible(13, 15));
+ assertFalse(a.compatible(9, 15));
+ assertFalse(a.compatible(9, 14));
+ assertFalse(a.compatible(10, 15));
+
+ a.set(0,9);
+
+ for (int width = 1; width <= 3; width++) {
+ for (int i = 0; i < 20; i++) {
+ int j = i + width;
+ if ((i == 9 && j == 10) || i >= 14)
+ assertTrue(a.compatible(i,j));
+ else {
+// System.err.println(String.format("%d,%d -> %s %s", i, j, a.compatible(i,j), a));
+ assertFalse(a.compatible(i,j));
+ }
+ }
+ }
+ }
+
+ @Test
+ public void testFirstZero() {
+ Coverage cov = new Coverage();
+ cov.set(2, 5);
+ assertEquals(cov.firstZero(), 0);
+ cov.set(8,10);
+ assertEquals(cov.firstZero(), 0);
+ cov.set(0, 2);
+ assertEquals(cov.firstZero(), 5);
+ cov.set(5, 7);
+ assertEquals(cov.firstZero(), 7);
+ cov.set(7,8);
+ assertEquals(cov.firstZero(), 10);
+ }
+
+ @Test
+ public void testOpenings() {
+ Coverage cov = new Coverage();
+ cov.set(0, 2);
+ cov.set(8, 10);
+
+ for (int i = 2; i < 7; i++) {
+ assertEquals(cov.leftOpening(i), 2);
+ assertEquals(cov.rightOpening(i, 17), 8);
+ assertEquals(cov.rightOpening(i, 7), 7);
+ }
+ }
+
+ @Test
+ public void testEquals() {
+ Coverage cov = new Coverage();
+ cov.set(9, 11);
+ Coverage cov2 = new Coverage();
+ cov2.set(9,10);
+ cov2.set(10,11);
+ assertEquals(cov, cov2);
+ }
+
+ @Test
+ public void testToString() {
+ Coverage cov = new Coverage();
+ cov.set(0, 40);
+ cov.set(44, 49);
+ assertEquals(cov.toString(), "40 ....xxxxx.");
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
new file mode 100644
index 0000000..a99338a
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/constrained/ConstrainedPhraseDecodingTest.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ package org.apache.joshua.decoder.phrase.constrained;
+
+import static com.google.common.base.Charsets.UTF_8;
+import static java.nio.file.Files.readAllBytes;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Reimplements the constrained phrase decoding test
+ */
+@Ignore("re-enable as soon as kenlm native library support will be in place")
+public class ConstrainedPhraseDecodingTest {
+
+ private static final String CONFIG = "resources/phrase_decoder/constrained.config";
+ private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama ||| President Obama to hinder a strategy for Republican re @-@ election";
+ private static final Path GOLD_PATH = Paths.get("resources/phrase_decoder/constrained.output.gold");
+
+ private JoshuaConfiguration joshuaConfig = null;
+ private Decoder decoder = null;
+
+ @Before
+ public void setUp() throws Exception {
+ joshuaConfig = new JoshuaConfiguration();
+ joshuaConfig.readConfigFile(CONFIG);
+ decoder = new Decoder(joshuaConfig, "");
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ decoder.cleanUp();
+ decoder = null;
+ }
+
+ @Test
+ public void givenInput_whenConstrainedPhraseDecoding_thenOutputIsAsExpected() throws IOException {
+ final String translation = decode(INPUT).toString();
+ final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
+ assertEquals(gold, translation);
+ }
+
+ private Translation decode(String input) {
+ final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ return decoder.decode(sentence);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
new file mode 100644
index 0000000..b5bd612
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@ -0,0 +1,77 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+ package org.apache.joshua.decoder.phrase.decode;
+
+import static com.google.common.base.Charsets.UTF_8;
+import static java.nio.file.Files.readAllBytes;
+import static org.junit.Assert.assertEquals;
+
+import java.io.IOException;
+import java.nio.file.Path;
+import java.nio.file.Paths;
+
+import org.apache.joshua.decoder.Decoder;
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.apache.joshua.decoder.Translation;
+import org.apache.joshua.decoder.segment_file.Sentence;
+
+import org.junit.After;
+import org.junit.Before;
+import org.junit.Ignore;
+import org.junit.Test;
+
+/**
+ * Reimplements the constrained phrase decoding test
+ */
+@Ignore("re-enable as soon as kenlm native library support will be in place")
+public class PhraseDecodingTest {
+
+ private static final String CONFIG = "resources/phrase_decoder/config";
+ private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
+ private static final Path GOLD_PATH = Paths.get("resources/phrase_decoder/output.gold");
+
+ private JoshuaConfiguration joshuaConfig = null;
+ private Decoder decoder = null;
+
+ @Before
+ public void setUp() throws Exception {
+ joshuaConfig = new JoshuaConfiguration();
+ joshuaConfig.readConfigFile(CONFIG);
+ decoder = new Decoder(joshuaConfig, "");
+ }
+
+ @After
+ public void tearDown() throws Exception {
+ decoder.cleanUp();
+ decoder = null;
+ }
+
+ @Test
+ public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
+ final String translation = decode(INPUT).toString();
+ final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
+ assertEquals(gold, translation);
+ }
+
+ private Translation decode(String input) {
+ final Sentence sentence = new Sentence(input, 0, joshuaConfig);
+ return decoder.decode(sentence);
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
new file mode 100644
index 0000000..3b2852c
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/AlmostTooLongSentenceTest.java
@@ -0,0 +1,96 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.segment_file;
+
+import org.testng.annotations.Test;
+
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.AfterMethod;
+import static org.testng.Assert.*;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+
+public class AlmostTooLongSentenceTest {
+ private JoshuaConfiguration joshuaConfiguration;
+ private String almostTooLongInput;
+ private Sentence sentencePlusTarget;
+
+ @BeforeMethod
+ public void setUp() {
+ joshuaConfiguration = new JoshuaConfiguration();
+ almostTooLongInput = concatStrings(".", joshuaConfiguration.maxlen);
+ sentencePlusTarget = new Sentence(this.almostTooLongInput + " ||| target side", 0,joshuaConfiguration);
+ }
+
+ @AfterMethod
+ public void tearDown() {
+ }
+
+ @Test
+ public void testConstructor() {
+ Sentence sent = new Sentence("", 0,joshuaConfiguration);
+ assertNotNull(sent);
+ }
+
+ @Test
+ public void testEmpty() {
+ assertTrue(new Sentence("", 0,joshuaConfiguration).isEmpty());
+ }
+
+ @Test
+ public void testNotEmpty() {
+ assertFalse(new Sentence("hello , world", 0, joshuaConfiguration).isEmpty());
+ }
+
+ /**
+ * Return a string consisting of repeatedToken concatenated MAX_SENTENCE_NODES times.
+ *
+ * @param repeatedToken
+ * @param repeatedTimes
+ * @return
+ */
+ private String concatStrings(String repeatedToken, int repeatedTimes) {
+ String result = "";
+ for (int i = 0; i < repeatedTimes; i++) {
+ result += repeatedToken;
+ }
+ return result;
+ }
+
+ @Test
+ public void testAlmostButNotTooManyTokensSourceOnlyNotEmpty() {
+ assertFalse(new Sentence(this.almostTooLongInput, 0, joshuaConfiguration).isEmpty());
+ }
+
+ @Test
+ public void testAlmostButNotTooManyTokensSourceOnlyTargetNull() {
+ assertNull(new Sentence(this.almostTooLongInput, 0, joshuaConfiguration).target);
+ }
+
+ @Test
+ public void testAlmostButNotTooManyTokensSourceAndTargetTargetIsNotEmpty() {
+ assertFalse(this.sentencePlusTarget.isEmpty());
+ }
+
+ @Test
+ public void testAlmostButNotTooManyTokensSourceAndTargetTargetNull() {
+ assertEquals(this.sentencePlusTarget.target, "target side");
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
new file mode 100644
index 0000000..8e0d171
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/segment_file/SentenceTest.java
@@ -0,0 +1,109 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.decoder.segment_file;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+
+import org.testng.annotations.Test;
+import org.testng.annotations.BeforeMethod;
+import org.testng.annotations.AfterMethod;
+import static org.testng.Assert.*;
+
+public class SentenceTest {
+ private String tooLongInput;
+ private final JoshuaConfiguration joshuaConfiguration = new JoshuaConfiguration();
+
+
+
+ @BeforeMethod
+ public void setUp() {
+ tooLongInput = concatTokens("*", joshuaConfiguration.maxlen * 2);
+ }
+
+ @AfterMethod
+ public void tearDown() {
+ }
+
+ @Test
+ public void testConstructor() {
+ Sentence sent = new Sentence("", 0, joshuaConfiguration);
+ assertNotNull(sent);
+ }
+
+ @Test
+ public void testEmpty() {
+ assertTrue(new Sentence("", 0, joshuaConfiguration).isEmpty());
+ }
+
+ @Test
+ public void testNotEmpty() {
+ assertFalse(new Sentence("hello , world", 0, joshuaConfiguration).isEmpty());
+ }
+
+ /**
+ * Return a string consisting of repeatedToken concatenated MAX_SENTENCE_NODES times, joined by a
+ * space.
+ *
+ * @param repeatedToken
+ * @param repeatedTimes
+ * @return
+ */
+ private String concatTokens(String repeatedToken, int repeatedTimes) {
+ String result = "";
+ for (int i = 0; i < repeatedTimes - 1; i++) {
+ result += repeatedToken + " ";
+ }
+ result += repeatedToken;
+ return result;
+ }
+
+ /**
+ * The too long input sentence should be truncated from 799 to 202 characters
+ * TODO is this a bug? maxlen is defined as 200 not 202 characters
+ */
+ @Test
+ public void testTooManyTokensSourceTruncated() {
+ assertTrue(new Sentence(this.tooLongInput, 0, joshuaConfiguration).length() == 202);
+ }
+
+ @Test
+ public void testTooManyTokensSourceOnlyNotNull() {
+ assertNotNull(new Sentence(this.tooLongInput, 0, joshuaConfiguration));
+ }
+
+ @Test
+ public void testTooManyTokensSourceAndTargetIsEmpty() {
+ Sentence sentence = new Sentence(this.tooLongInput + " ||| target side", 0, joshuaConfiguration);
+ assertEquals(sentence.target, "");
+ }
+
+ @Test
+ public void testTooManyTokensSourceAndTargetTruncated() {
+ Sentence sentence = new Sentence(this.tooLongInput + " ||| target side", 0, joshuaConfiguration);
+ assertTrue(sentence.length() == 202);
+ }
+
+ @Test
+ public void testClearlyNotTooManyTokens() {
+ // Concatenate MAX_SENTENCE_NODES, each shorter than the average length, joined by a space.
+ String input = "token";
+ assertFalse(new Sentence(input, 0, joshuaConfiguration).isEmpty());
+ }
+
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/lattice/ArcTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/lattice/ArcTest.java b/joshua-core/src/test/java/org/apache/joshua/lattice/ArcTest.java
new file mode 100644
index 0000000..a26a593
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/lattice/ArcTest.java
@@ -0,0 +1,86 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.lattice;
+
+import org.apache.joshua.lattice.Arc;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for Arc class.
+ *
+ * @author Lane Schwartz
+ * @since 2008-07-09
+ * @version $LastChangedDate$
+ */
+@Test(groups = { "lattice_arc" })
+public class ArcTest {
+
+ private final Node<String> head = new Node<String>(1);
+ private final Node<String> tail = new Node<String>(2);
+ private final float cost = (float) Math.PI;
+ private final String label = "pi";
+
+ private Arc<String> arc;
+
+ @Test(dependsOnMethods = { "org.apache.joshua.lattice.NodeTest.constructNode" })
+ //@Test(dependsOnGroups = {"lattice_node" })
+ public void constructArc() {
+
+ arc = new Arc<String>(tail, head, (float)cost, label);
+
+ Assert.assertEquals(arc.getHead(), head);
+ Assert.assertEquals(arc.getTail(), tail);
+ Assert.assertEquals(arc.getCost(), cost);
+ Assert.assertEquals(arc.getLabel(), label);
+
+ }
+
+ @Test(dependsOnMethods = { "constructArc" })
+ public void getHead() {
+
+ Assert.assertEquals(arc.getHead(), head);
+
+ }
+
+
+ @Test(dependsOnMethods = { "constructArc" })
+ public void getTail() {
+
+ Assert.assertEquals(arc.getTail(), tail);
+
+ }
+
+
+ @Test(dependsOnMethods = { "constructArc" })
+ public void getCost() {
+
+ Assert.assertEquals(arc.getCost(), cost);
+
+ }
+
+
+ @Test(dependsOnMethods = { "constructArc" })
+ public void getLabel() {
+
+ Assert.assertEquals(arc.getLabel(), label);
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/lattice/LatticeTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/lattice/LatticeTest.java b/joshua-core/src/test/java/org/apache/joshua/lattice/LatticeTest.java
new file mode 100644
index 0000000..1522120
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/lattice/LatticeTest.java
@@ -0,0 +1,192 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.lattice;
+
+import java.util.ArrayList;
+import java.util.List;
+
+import org.apache.joshua.decoder.JoshuaConfiguration;
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for Lattice class.
+ *
+ * @author Lane Schwartz
+ * @since 2008-07-09
+ * @version $LastChangedDate$
+ */
+@Test(groups = { "lattice" })
+public class LatticeTest {
+
+ @Test
+ public void allPairsShortestPath() {
+
+ List<Node<String>> nodes = new ArrayList<Node<String>>();
+ for (int i=0; i<4; i++) {
+ nodes.add(new Node<String>(i));
+ }
+
+ nodes.get(0).addArc(nodes.get(1), (float) 1.0, "x");
+ nodes.get(1).addArc(nodes.get(2), (float) 1.0, "y");
+ nodes.get(0).addArc(nodes.get(2), (float) 1.5, "a");
+ nodes.get(2).addArc(nodes.get(3), (float) 3.0, "b");
+ nodes.get(2).addArc(nodes.get(3), (float) 5.0, "c");
+
+ Lattice<String> graph = new Lattice<String>(nodes, new JoshuaConfiguration());
+
+ Assert.assertEquals(graph.getShortestPath(0, 1), 1);
+ Assert.assertEquals(graph.getShortestPath(0, 2), 1);
+ Assert.assertEquals(graph.getShortestPath(1, 2), 1);
+ Assert.assertEquals(graph.getShortestPath(0, 3), 2);
+ Assert.assertEquals(graph.getShortestPath(1, 3), 2);
+ Assert.assertEquals(graph.getShortestPath(2, 3), 1);
+ }
+
+ @Test
+ public void createFromString() {
+
+ String data =
+
+ // Start of lattice
+ "("+
+
+ // Node 0
+ "("+
+ "('A',1.0,5),"+ // Arc with label A and cost 1.0. Destination is Node 5 (Node 0 + span of 5)
+ "('B',1.0,2),"+ // Arc with label B and cost 1.0. Destination is Node 2 (Node 0 + span of 2)
+ "('C',1.0,3),"+ // Arc with label C and cost 1.0. Destination is Node 3 (Node 0 + span of 3)
+ "('D',1.0,1),"+ // Arc with label D and cost 1.0. Destination is Node 1 (Node 0 + span of 1)
+ ")," +
+
+ // Node 1
+ "(" +
+ "('E',1.0,4)," + // Arc with label E and cost 1.0. Destination is Node 5 (Node 1 + span of 4)
+ ")," +
+
+ // Node 2
+ "(" +
+ "('C',1.0,3)," + // Arc with label C and cost 1.0. Destination is Node 5 (Node 2 + span of 3)
+ ")," +
+
+ // Node 3
+ "(" +
+ "('D',1.0,1)," + // Arc with label D and cost 1.0. Destination is Node 4 (Node 3 + span of 1)
+ ")," +
+
+ // Node 4
+ "(" +
+ "('E',1.0,1)," + // Arc with label E and cost 1.0. Destination is Node 5 (Node 4 + span of 1)
+ ")," +
+
+ // Node 5
+ "(" +
+ "('X',1.0,1)," + // Arc with label X and cost 1.0. Destination is Node 6 (Node 5 + span of 1)
+ ")," +
+
+ // There is an implicit final state (Node 6).
+
+ ")"; // End of lattice
+
+
+ Lattice<String> lattice = Lattice.createFromString(data);
+
+ int numberOfNodes = 7;
+
+ Assert.assertEquals(lattice.size(), numberOfNodes);
+
+ Node<String> node0 = lattice.getNode(0);
+ Node<String> node1 = lattice.getNode(1);
+ Node<String> node2 = lattice.getNode(2);
+ Node<String> node3 = lattice.getNode(3);
+ Node<String> node4 = lattice.getNode(4);
+ Node<String> node5 = lattice.getNode(5);
+ Node<String> node6 = lattice.getNode(6);
+
+ Assert.assertEquals(node0.size(), 4);
+ Assert.assertEquals(node1.size(), 1);
+ Assert.assertEquals(node2.size(), 1);
+ Assert.assertEquals(node3.size(), 1);
+ Assert.assertEquals(node4.size(), 1);
+ Assert.assertEquals(node5.size(), 1);
+ Assert.assertEquals(node6.size(), 0);
+
+ // Node 0 outgoing arcs
+
+ Arc<String> arcA_0_5 = node0.getOutgoingArcs().get(0);
+ Assert.assertEquals(arcA_0_5.getLabel(), "A");
+ Assert.assertEquals(arcA_0_5.getHead(), node5);
+ Assert.assertEquals(arcA_0_5.getTail(), node0);
+
+ Assert.assertEquals(arcA_0_5.getCost(), (float) 1.0);
+
+ Arc<String> arcB_0_2 = node0.getOutgoingArcs().get(1);
+ Assert.assertEquals(arcB_0_2.getLabel(), "B");
+ Assert.assertEquals(arcB_0_2.getHead(), node2);
+ Assert.assertEquals(arcB_0_2.getTail(), node0);
+ Assert.assertEquals(arcB_0_2.getCost(), (float) 1.0);
+
+ Arc<String> arcC_0_3 = node0.getOutgoingArcs().get(2);
+ Assert.assertEquals(arcC_0_3.getLabel(), "C");
+ Assert.assertEquals(arcC_0_3.getHead(), node3);
+ Assert.assertEquals(arcC_0_3.getTail(), node0);
+ Assert.assertEquals(arcC_0_3.getCost(), (float) 1.0);
+
+ Arc<String> arcD_0_1 = node0.getOutgoingArcs().get(3);
+ Assert.assertEquals(arcD_0_1.getLabel(), "D");
+ Assert.assertEquals(arcD_0_1.getHead(), node1);
+ Assert.assertEquals(arcD_0_1.getTail(), node0);
+ Assert.assertEquals(arcD_0_1.getCost(), (float) 1.0);
+
+ // Node 1 outgoing arcs
+ Arc<String> arcE_1_5 = node1.getOutgoingArcs().get(0);
+ Assert.assertEquals(arcE_1_5.getLabel(), "E");
+ Assert.assertEquals(arcE_1_5.getHead(), node5);
+ Assert.assertEquals(arcE_1_5.getTail(), node1);
+ Assert.assertEquals(arcE_1_5.getCost(), (float) 1.0);
+
+ // Node 2 outgoing arcs
+ Arc<String> arcC_2_5 = node2.getOutgoingArcs().get(0);
+ Assert.assertEquals(arcC_2_5.getLabel(), "C");
+ Assert.assertEquals(arcC_2_5.getHead(), node5);
+ Assert.assertEquals(arcC_2_5.getTail(), node2);
+ Assert.assertEquals(arcC_2_5.getCost(), (float) 1.0);
+
+ // Node 3 outgoing arcs
+ Arc<String> arcD_3_4 = node3.getOutgoingArcs().get(0);
+ Assert.assertEquals(arcD_3_4.getLabel(), "D");
+ Assert.assertEquals(arcD_3_4.getHead(), node4);
+ Assert.assertEquals(arcD_3_4.getTail(), node3);
+ Assert.assertEquals(arcD_3_4.getCost(), (float) 1.0);
+
+ // Node 4 outgoing arcs
+ Arc<String> arcE_4_5 = node4.getOutgoingArcs().get(0);
+ Assert.assertEquals(arcE_4_5.getLabel(), "E");
+ Assert.assertEquals(arcE_4_5.getHead(), node5);
+ Assert.assertEquals(arcE_4_5.getTail(), node4);
+ Assert.assertEquals(arcE_1_5.getCost(), (float) 1.0);
+
+ // Node 5 outgoing arcs
+ Arc<String> arcX_5_6 = node5.getOutgoingArcs().get(0);
+ Assert.assertEquals(arcX_5_6.getLabel(), "X");
+ Assert.assertEquals(arcX_5_6.getHead(), node6);
+ Assert.assertEquals(arcX_5_6.getTail(), node5);
+ Assert.assertEquals(arcX_5_6.getCost(), (float) 1.0);
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/lattice/NodeTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/lattice/NodeTest.java b/joshua-core/src/test/java/org/apache/joshua/lattice/NodeTest.java
new file mode 100644
index 0000000..b58ba1e
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/lattice/NodeTest.java
@@ -0,0 +1,105 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.lattice;
+
+import org.testng.Assert;
+import org.testng.annotations.Test;
+
+/**
+ * Unit tests for Node class.
+ *
+ * @author Lane Schwartz
+ * @since 2008-07-09
+ * @version $LastChangedDate$
+ */
+@Test(groups = { "lattice_node" })
+public class NodeTest {
+
+ private final int id = 12345;
+
+ private Node<String> node;
+
+ @Test
+ public void constructNode() {
+ node = new Node<String>(id);
+ Assert.assertEquals((int) node.id(), (int) id);
+ Assert.assertTrue(node.getOutgoingArcs().isEmpty());
+ Assert.assertEquals(node.size(), 0);
+ }
+
+
+ @Test(dependsOnMethods = { "constructNode" })
+ public void getNumber() {
+
+ Assert.assertEquals(node.getNumber(), id);
+
+ }
+
+
+ @Test(dependsOnMethods = { "constructNode" })
+ public void toStringTest() {
+
+ Assert.assertEquals(node.toString(), "Node-"+id);
+
+ }
+
+
+ @Test(dependsOnMethods = { "constructNode" })
+ public void addArc() {
+
+ Node<String> n2 = new Node<String>(2);
+ float w2 = (float) 0.123;
+ String l2 = "somthing cool";
+
+ Node<String> n3 = new Node<String>(3);
+ float w3 = (float) 124.78;
+ String l3 = "hurray!";
+
+ Node<String> n4 = new Node<String>(4);
+ float w4 = (float) Double.POSITIVE_INFINITY;
+ String l4 = "\u0000";
+
+ Assert.assertEquals(node.size(), 0);
+
+ node.addArc(n2,(float) w2, l2);
+ Assert.assertEquals(node.size(), 1);
+ Arc<String> a2 = node.getOutgoingArcs().get(0);
+ Assert.assertEquals(a2.getHead(), n2);
+ Assert.assertEquals(a2.getTail(), node);
+ Assert.assertEquals(a2.getCost(), w2);
+ Assert.assertEquals(a2.getLabel(), l2);
+
+ node.addArc(n3,(float) w3, l3);
+ Assert.assertEquals(node.size(), 2);
+ Arc<String> a3 = node.getOutgoingArcs().get(1);
+ Assert.assertEquals(a3.getHead(), n3);
+ Assert.assertEquals(a3.getTail(), node);
+ Assert.assertEquals(a3.getCost(), w3);
+ Assert.assertEquals(a3.getLabel(), l3);
+
+ node.addArc(n4, (float) w4, l4);
+ Assert.assertEquals(node.size(), 3);
+ Arc<String> a4 = node.getOutgoingArcs().get(2);
+ Assert.assertEquals(a4.getHead(), n4);
+ Assert.assertEquals(a4.getTail(), node);
+ Assert.assertEquals(a4.getCost(), w4);
+ Assert.assertEquals(a4.getLabel(), l4);
+
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/packed/Benchmark.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/Benchmark.java b/joshua-core/src/test/java/org/apache/joshua/packed/Benchmark.java
new file mode 100644
index 0000000..41cf2a0
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/packed/Benchmark.java
@@ -0,0 +1,126 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.packed;
+
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.IntBuffer;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileChannel.MapMode;
+import java.util.Random;
+
+/**
+ * This program runs a little benchmark to check reading speed on various data
+ * representations.
+ *
+ * Usage: java Benchmark PACKED_GRAMMAR_DIR TIMES
+ */
+
+public class Benchmark {
+
+
+ private static final Logger LOG = LoggerFactory.getLogger(Benchmark.class);
+
+ private IntBuffer intBuffer;
+ private MappedByteBuffer byteBuffer;
+ private int[] intArray;
+
+ public Benchmark(String dir) throws IOException {
+ File file = new File(dir + "/slice_00000.source");
+
+ FileChannel source_channel = new FileInputStream(file).getChannel();
+ int byte_size = (int) source_channel.size();
+ int int_size = byte_size / 4;
+
+ byteBuffer = source_channel.map(MapMode.READ_ONLY, 0, byte_size);
+ intBuffer = byteBuffer.asIntBuffer();
+
+ intArray = new int[int_size];
+ intBuffer.get(intArray);
+ }
+
+ public void benchmark(int times) {
+ LOG.info("Beginning benchmark.");
+
+ Random r = new Random();
+ r.setSeed(1234567890);
+ int[] positions = new int[1000];
+ for (int i = 0; i < positions.length; i++)
+ positions[i] = r.nextInt(intArray.length);
+
+ long sum;
+
+ long start_time = System.currentTimeMillis();
+
+ sum = 0;
+ for (int t = 0; t < times; t++)
+ for (int i = 0; i < positions.length; i++)
+ sum += byteBuffer.getInt(positions[i] * 4);
+ LOG.info("Sum: {}", sum);
+ long byte_time = System.currentTimeMillis();
+
+ sum = 0;
+ for (int t = 0; t < times; t++)
+ for (int i = 0; i < positions.length; i++)
+ sum += intBuffer.get(positions[i]);
+ LOG.info("Sum: {}", sum);
+ long int_time = System.currentTimeMillis();
+
+ sum = 0;
+ for (int t = 0; t < times; t++)
+ for (int i = 0; i < positions.length; i++)
+ sum += intArray[positions[i]];
+ LOG.info("Sum: {}", sum);
+ long array_time = System.currentTimeMillis();
+
+ sum = 0;
+ for (int t = 0; t < times; t++)
+ for (int i = 0; i < (intArray.length / 8); i++)
+ sum += intArray[i * 6] + intArray[i * 6 + 2];
+ LOG.info("Sum: {}", sum);
+ long mult_time = System.currentTimeMillis();
+
+ sum = 0;
+ for (int t = 0; t < times; t++) {
+ int index = 0;
+ for (int i = 0; i < (intArray.length / 8); i++) {
+ sum += intArray[index] + intArray[index + 2];
+ index += 6;
+ }
+ }
+ LOG.info("Sum: {}", sum);
+ long add_time = System.currentTimeMillis();
+
+ LOG.info("ByteBuffer: {}", (byte_time - start_time));
+ LOG.info("IntBuffer: {}", (int_time - byte_time));
+ LOG.info("Array: {}", (array_time - int_time));
+ LOG.info("Multiply: {}", (mult_time - array_time));
+ LOG.info("Add: {}", (add_time - mult_time));
+ }
+
+ public static void main(String args[]) throws IOException {
+ Benchmark pr = new Benchmark(args[0]);
+ pr.benchmark( Integer.parseInt(args[1]));
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/packed/CountRules.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/CountRules.java b/joshua-core/src/test/java/org/apache/joshua/packed/CountRules.java
new file mode 100644
index 0000000..5ada5ab
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/packed/CountRules.java
@@ -0,0 +1,110 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.packed;
+
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileChannel.MapMode;
+
+import org.apache.joshua.corpus.Vocabulary;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+
+/**
+ * This program reads a packed representation and prints out some
+ * basic information about it.
+ *
+ * Usage: java CountRules PACKED_GRAMMAR_DIR
+ */
+
+public class CountRules {
+
+ public static void main(String args[]) {
+
+ String dir = args[0];
+
+ File file = new File(dir + "/chunk_00000.source");
+ FileInputStream stream = null;
+ FileChannel channel = null;
+ try {
+ // read the vocabulary
+ Vocabulary.read(new File(dir + "/vocabulary"));
+
+ // get the channel etc
+ stream = new FileInputStream(file);
+ channel = stream.getChannel();
+ int size = (int) channel.size();
+
+ MappedByteBuffer buffer = channel.map(MapMode.READ_ONLY, 0, size);
+ // byte[] bytes = new bytes[size];
+ // buffer.get(bytes);
+
+ // read the number of rules
+ int numRules = buffer.getInt();
+ System.out.println(String.format("There are %d source sides at the root", numRules));
+
+ // read the first symbol and its offset
+ for (int i = 0; i < numRules; i++) {
+ // String symbol = Vocabulary.word(buffer.getInt());
+ int symbol = buffer.getInt();
+ String string = Vocabulary.word(symbol);
+ int offset = buffer.getInt();
+ System.out.println(String.format("-> %s/%d [%d]", string, symbol, offset));
+ }
+
+ } catch (IOException e) {
+
+ e.printStackTrace();
+
+ } finally {
+ try {
+ if (stream != null)
+ stream.close();
+
+ if (channel != null)
+ channel.close();
+
+ } catch (IOException e) {
+
+ e.printStackTrace();
+
+ }
+ }
+
+
+ // // Read in the bytes
+ // int offset = 0;
+ // int numRead = 0;
+ // while (offset < bytes.length
+ // && (numRead=is.read(bytes, offset, bytes.length-offset)) >= 0) {
+ // offset += numRead;
+ // }
+
+ // // Ensure all the bytes have been read in
+ // if (offset < bytes.length) {
+ // throw new IOException("Could not completely read file "+file.getName());
+ // }
+
+ // // Close the input stream and return bytes
+ // is.close();
+ // return bytes;
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/packed/PrintRules.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/PrintRules.java b/joshua-core/src/test/java/org/apache/joshua/packed/PrintRules.java
new file mode 100644
index 0000000..af6507f
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/packed/PrintRules.java
@@ -0,0 +1,199 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.packed;
+
+import java.io.File;
+import java.io.FileInputStream;
+import java.io.IOException;
+import java.nio.IntBuffer;
+import java.nio.MappedByteBuffer;
+import java.nio.channels.FileChannel;
+import java.nio.channels.FileChannel.MapMode;
+
+import org.apache.joshua.util.quantization.Quantizer;
+import org.apache.joshua.util.quantization.QuantizerConfiguration;
+import org.apache.joshua.corpus.Vocabulary;
+
+/**
+ * This program reads a packed representation and prints out some basic
+ * information about it.
+ *
+ * Usage: java PrintRules PACKED_GRAMMAR_DIR
+ */
+
+public class PrintRules {
+
+ private QuantizerConfiguration quantization;
+
+ private int[] source;
+ private int[] target;
+ private MappedByteBuffer features;
+ private MappedByteBuffer alignments;
+
+ private int[] featureLookup;
+ private int[] alignmentLookup;
+
+ private boolean have_alignments;
+
+ public PrintRules(String dir) throws IOException {
+ File source_file = new File(dir + "/slice_00000.source");
+ File target_file = new File(dir + "/slice_00000.target");
+ File feature_file = new File(dir + "/slice_00000.features");
+ File alignment_file = new File(dir + "/slice_00000.alignments");
+
+ have_alignments = alignment_file.exists();
+
+ // Read the vocabulary.
+ Vocabulary.read(new File(dir + "/vocabulary"));
+
+ // Read the quantizer setup.
+ quantization = new QuantizerConfiguration();
+ quantization.read(dir + "/quantization");
+
+ // Get the channels etc.
+ @SuppressWarnings("resource")
+ FileChannel source_channel = new FileInputStream(source_file).getChannel();
+ int source_size = (int) source_channel.size();
+ IntBuffer source_buffer = source_channel.map(MapMode.READ_ONLY, 0,
+ source_size).asIntBuffer();
+ source = new int[source_size / 4];
+ source_buffer.get(source);
+
+ @SuppressWarnings("resource")
+ FileChannel target_channel = new FileInputStream(target_file).getChannel();
+ int target_size = (int) target_channel.size();
+ IntBuffer target_buffer = target_channel.map(MapMode.READ_ONLY, 0,
+ target_size).asIntBuffer();
+ target = new int[target_size / 4];
+ target_buffer.get(target);
+
+ @SuppressWarnings("resource")
+ FileChannel feature_channel = new FileInputStream(feature_file).getChannel();
+ int feature_size = (int) feature_channel.size();
+ features = feature_channel.map(MapMode.READ_ONLY, 0, feature_size);
+
+ if (have_alignments) {
+ @SuppressWarnings("resource")
+ FileChannel alignment_channel = new FileInputStream(alignment_file).getChannel();
+ int alignment_size = (int) alignment_channel.size();
+ alignments = alignment_channel.map(MapMode.READ_ONLY, 0, alignment_size);
+ }
+
+ int num_feature_blocks = features.getInt();
+ featureLookup = new int[num_feature_blocks];
+ // Read away data size.
+ features.getInt();
+ for (int i = 0; i < num_feature_blocks; i++)
+ featureLookup[i] = features.getInt();
+
+ int num_alignment_blocks = alignments.getInt();
+ alignmentLookup = new int[num_alignment_blocks];
+ // Read away data size.
+ alignments.getInt();
+ for (int i = 0; i < num_alignment_blocks; i++)
+ alignmentLookup[i] = alignments.getInt();
+
+ if (num_alignment_blocks != num_feature_blocks)
+ throw new RuntimeException("Number of blocks doesn't match up.");
+ }
+
+ public void traverse() {
+ traverse(0, "");
+ }
+
+ private void traverse(int position, String src_side) {
+ int num_children = source[position];
+ int[] addresses = new int[num_children];
+ int[] symbols = new int[num_children];
+ int j = position + 1;
+ for (int i = 0; i < num_children; i++) {
+ symbols[i] = source[j++];
+ addresses[i] = source[j++];
+ }
+ int num_rules = source[j++];
+ for (int i = 0; i < num_rules; i++) {
+ int lhs = source[j++];
+ int tgt_address = source[j++];
+ int data_address = source[j++];
+ printRule(src_side, lhs, tgt_address, data_address);
+ }
+ for (int i = 0; i < num_children; i++) {
+ traverse(addresses[i], src_side + " " + Vocabulary.word(symbols[i]));
+ }
+ }
+
+ private String getTarget(int pointer) {
+ StringBuilder sb = new StringBuilder();
+ do {
+ pointer = target[pointer];
+ if (pointer != -1) {
+ int symbol = target[pointer + 1];
+ if (symbol < 0)
+ sb.append(" ").append("NT" + symbol);
+ else
+ sb.append(" ").append(Vocabulary.word(symbol));
+ }
+ } while (pointer != -1);
+ return sb.toString();
+ }
+
+ private String getFeatures(int block_id) {
+ StringBuilder sb = new StringBuilder();
+
+ int data_position = featureLookup[block_id];
+ int num_features = features.getInt(data_position);
+ data_position += 4;
+ for (int i = 0; i < num_features; i++) {
+ int feature_id = features.getInt(data_position);
+ Quantizer quantizer = quantization.get(feature_id);
+ sb.append(" " + Vocabulary.word(feature_id) + "=" +
+ quantizer.read(features, data_position));
+ data_position += 4 + quantizer.size();
+ }
+ return sb.toString();
+ }
+
+ private String getAlignments(int block_id) {
+ StringBuilder sb = new StringBuilder();
+
+ int data_position = alignmentLookup[block_id];
+ byte num_points = alignments.get(data_position);
+ for (int i = 0; i < num_points; i++) {
+ byte src = alignments.get(data_position + 1 + 2 * i);
+ byte tgt = alignments.get(data_position + 2 + 2 * i);
+
+ sb.append(" " + src + "-" + tgt);
+ }
+ return sb.toString();
+ }
+
+ private void printRule(String src_side, int lhs, int tgt_address,
+ int data_address) {
+ System.out.println(Vocabulary.word(lhs) + " |||" +
+ src_side + " |||" +
+ getTarget(tgt_address) + " |||" +
+ getFeatures(data_address) +
+ (have_alignments ? " |||" + getAlignments(data_address) : ""));
+ }
+
+ public static void main(String args[]) throws IOException {
+ PrintRules pr = new PrintRules(args[0]);
+ pr.traverse();
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/packed/README
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/README b/joshua-core/src/test/java/org/apache/joshua/packed/README
new file mode 100644
index 0000000..3cb52b8
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/packed/README
@@ -0,0 +1,6 @@
+# This code generates the packed grammar representation from the grammar file
+rm -rf small_packed
+java -cp /home/hltcoe/mpost/code/joshua/bin:. joshua.tools.GrammarPacker packer.config small_packed small_grammar
+
+# This compiles and reads the grammar file
+java -cp $JOSHUA/bin:. CountRules small_packed
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/packed/VocabTest.java
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/VocabTest.java b/joshua-core/src/test/java/org/apache/joshua/packed/VocabTest.java
new file mode 100644
index 0000000..523df4c
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/packed/VocabTest.java
@@ -0,0 +1,58 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.joshua.packed;
+
+import java.io.File;
+import java.io.IOException;
+
+import org.apache.joshua.corpus.Vocabulary;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+public class VocabTest {
+
+ private static final Logger LOG = LoggerFactory.getLogger(VocabTest.class);
+
+ //FIXME: no main() in automated test case,
+ public static void main(String args[]) {
+
+ int numWords = 0;
+ try {
+ String dir = args[0];
+
+ boolean read = Vocabulary.read(new File(dir + "/vocabulary"));
+ if (! read) {
+ System.err.println("VocabTest: Failed to read the vocabulary.");
+ System.exit(1);
+ }
+
+ int id = 0;
+ while (Vocabulary.hasId(id)) {
+ String word = Vocabulary.word(id);
+ System.out.println(String.format("VOCAB: %d\t%s", id, word));
+ numWords++;
+ id++;
+ }
+ } catch (IOException e) {
+ LOG.error(e.getMessage(), e);
+ }
+
+ System.out.println("read " + numWords + " words");
+ }
+}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/e2734396/joshua-core/src/test/java/org/apache/joshua/packed/packer.config
----------------------------------------------------------------------
diff --git a/joshua-core/src/test/java/org/apache/joshua/packed/packer.config b/joshua-core/src/test/java/org/apache/joshua/packed/packer.config
new file mode 100644
index 0000000..73edb1a
--- /dev/null
+++ b/joshua-core/src/test/java/org/apache/joshua/packed/packer.config
@@ -0,0 +1,6 @@
+#chunk_size 30000
+chunk_size 2500000
+
+quantizer boolean Abstract,Adjacent,ContainsX,GlueRule,Lexical,Monotonic,TargetTerminalsButNoSource
+quantizer float LexprobSourceGivenTarget,LexprobTargetGivenSource,PhrasePenalty,RarityPenalty,SourcePhraseGivenTarget,SourceTerminalsButNoTarget,TargetPhraseGivenSource
+quantizer byte TargetWords