You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/29 17:43:07 UTC

[1/6] incubator-joshua git commit: added berkeleyaligner

Repository: incubator-joshua
Updated Branches:
  refs/heads/7 f90cf3e4d -> 0b543276e


added berkeleyaligner


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/d3a2291c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/d3a2291c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/d3a2291c

Branch: refs/heads/7
Commit: d3a2291c004d383c5cb65ab466af582ab6f39d6c
Parents: 25d28fe
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 24 09:43:01 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 24 09:43:01 2016 -0400

----------------------------------------------------------------------
 download-deps.sh | 4 ++++
 1 file changed, 4 insertions(+)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d3a2291c/download-deps.sh
----------------------------------------------------------------------
diff --git a/download-deps.sh b/download-deps.sh
index 6e4797b..8fbdf69 100755
--- a/download-deps.sh
+++ b/download-deps.sh
@@ -15,3 +15,7 @@ git clone https://github.com/joshua-decoder/giza-pp.git ext/giza-pp
 
 git clone https://github.com/joshua-decoder/symal.git ext/symal
 (make -C ext/symal all)
+
+git clone https://github.com/joshua-decoder/berkeleyaligner ext/berkeleyaligner
+(cd ext/berkeleyaligner; ant)
+


[2/6] incubator-joshua git commit: formatting fixes, updated path to JAR

Posted by mj...@apache.org.
formatting fixes, updated path to JAR


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/255927db
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/255927db
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/255927db

Branch: refs/heads/7
Commit: 255927dba585965822c3d35f4d90e35d3199994f
Parents: d3a2291
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 24 10:07:38 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 24 10:07:38 2016 -0400

----------------------------------------------------------------------
 scripts/training/paralign.pl                      |  2 +-
 .../training/templates/alignment/word-align.conf  | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/255927db/scripts/training/paralign.pl
----------------------------------------------------------------------
diff --git a/scripts/training/paralign.pl b/scripts/training/paralign.pl
index d5159a7..2f04fc1 100755
--- a/scripts/training/paralign.pl
+++ b/scripts/training/paralign.pl
@@ -78,7 +78,7 @@ sub run_berkeley_aligner {
 
   # run the job
   $cachepipe->cmd("berkeley-aligner-chunk-$chunkno",
-                  "java -d64 -Xmx$args{aligner_mem} -jar $JOSHUA/lib/berkeleyaligner.jar ++alignments/$chunkno/word-align.conf",
+                  "java -d64 -Xmx$args{aligner_mem} -jar $JOSHUA/ext/berkeleyaligner/distribution/berkeleyaligner.jar ++alignments/$chunkno/word-align.conf",
                   "alignments/$chunkno/word-align.conf",
                   "$args{train_dir}/splits/corpus.$args{source}.$chunkno",
                   "$args{train_dir}/splits/corpus.$args{target}.$chunkno",

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/255927db/scripts/training/templates/alignment/word-align.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/alignment/word-align.conf b/scripts/training/templates/alignment/word-align.conf
index 83904d3..5fe3e0c 100644
--- a/scripts/training/templates/alignment/word-align.conf
+++ b/scripts/training/templates/alignment/word-align.conf
@@ -11,8 +11,8 @@
 
 forwardModels	MODEL1 HMM
 reverseModels	MODEL1 HMM
-mode			JOINT JOINT
-iters			5 5
+mode	JOINT JOINT
+iters	5 5
 
 ###############################################
 # Execution: Controls output and program flow 
@@ -20,9 +20,9 @@ iters			5 5
 
 execDir	alignments/<CHUNK>
 create
-saveParams		false
-numThreads		1
-msPerLine		10000
+saveParams	false
+numThreads	1
+msPerLine	10000
 alignTraining
 
 #################
@@ -33,10 +33,10 @@ foreignSuffix	<SOURCE>
 englishSuffix	<TARGET>
 
 # Choose the training sources, which can either be directories or files that list files/directories
-trainSources <TRAIN_DIR>/splits/corpus
-sentences	 MAX
-testSources /dev/null
-overwriteExecDir true
+trainSources	<TRAIN_DIR>/splits/corpus
+sentences	MAX
+testSources	/dev/null
+overwriteExecDir	true
 
 #################
 # 1-best output 


[3/6] incubator-joshua git commit: Merge branch 'master' into JOSHUA-304

Posted by mj...@apache.org.
Merge branch 'master' into JOSHUA-304


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/fb5d35da
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/fb5d35da
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/fb5d35da

Branch: refs/heads/7
Commit: fb5d35da0999fe6533b91b6076a4c4032d5d6710
Parents: 255927d 0744ebf
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 24 15:44:44 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 24 15:44:44 2016 -0400

----------------------------------------------------------------------
 CHANGELOG                                       |    7 +
 demo/README.md                                  |    2 +-
 demo/apache_joshua_logo.png                     |  Bin 0 -> 306617 bytes
 demo/apache_joshua_logo_faded.png               |  Bin 0 -> 309216 bytes
 demo/demo.config                                |    3 +
 demo/demo.js                                    |   19 +-
 demo/index.html                                 |   37 +-
 examples/README.md                              |   36 +-
 pom.xml                                         |   30 +-
 resources/berkeley_lm/lm                        |   16 -
 resources/berkeley_lm/lm.berkeleylm             |  Bin 4294 -> 0 bytes
 resources/berkeley_lm/lm.berkeleylm.gz          |  Bin 1786 -> 0 bytes
 resources/berkeley_lm/lm.gz                     |  Bin 162 -> 0 bytes
 resources/grammar.glue                          |    4 -
 resources/kbest_extraction/glue-grammar         |    3 -
 resources/kbest_extraction/grammar              |   25 -
 resources/kbest_extraction/joshua.config        |   27 -
 resources/kbest_extraction/lm.gz                |  Bin 2466496 -> 0 bytes
 resources/kbest_extraction/output.gold          | 3126 ------------------
 resources/kbest_extraction/output.scores.gold   | 3126 ------------------
 resources/kenlm/oilers.kenlm                    |  Bin 49011 -> 0 bytes
 resources/lm_oov/joshua.config                  |   17 -
 resources/phrase_decoder/config                 |   29 -
 resources/phrase_decoder/constrained.config     |   28 -
 .../phrase_decoder/constrained.output.gold      |    5 -
 resources/phrase_decoder/lm.1.gz                |  Bin 2235 -> 0 bytes
 resources/phrase_decoder/output.gold            |    1 -
 resources/phrase_decoder/rules.1.gz             |  Bin 2998042 -> 0 bytes
 resources/wa_grammar                            |    3 -
 resources/wa_grammar.packed/config              |    2 -
 resources/wa_grammar.packed/encoding            |  Bin 154 -> 0 bytes
 .../wa_grammar.packed/slice_00000.alignments    |  Bin 45 -> 0 bytes
 .../wa_grammar.packed/slice_00000.features      |  Bin 47 -> 0 bytes
 resources/wa_grammar.packed/slice_00000.source  |  Bin 204 -> 0 bytes
 resources/wa_grammar.packed/slice_00000.target  |  Bin 128 -> 0 bytes
 .../wa_grammar.packed/slice_00000.target.lookup |  Bin 32 -> 0 bytes
 resources/wa_grammar.packed/vocabulary          |  Bin 238 -> 0 bytes
 .../org/apache/joshua/adagrad/AdaGradCore.java  |  101 +-
 .../org/apache/joshua/adagrad/Optimizer.java    |  348 +-
 .../org/apache/joshua/corpus/BasicPhrase.java   |    2 +-
 .../apache/joshua/corpus/ContiguousPhrase.java  |    8 +-
 .../java/org/apache/joshua/corpus/Phrase.java   |    2 +-
 .../java/org/apache/joshua/corpus/Span.java     |    6 +-
 .../org/apache/joshua/corpus/SymbolTable.java   |    2 +-
 .../org/apache/joshua/corpus/Vocabulary.java    |   10 +-
 .../joshua/corpus/syntax/ArraySyntaxTree.java   |   51 +-
 .../apache/joshua/corpus/syntax/SyntaxTree.java |   10 +-
 .../org/apache/joshua/decoder/ArgsParser.java   |    8 +-
 .../java/org/apache/joshua/decoder/BLEU.java    |   72 +-
 .../java/org/apache/joshua/decoder/Decoder.java |   28 +-
 .../apache/joshua/decoder/DecoderThread.java    |    2 +-
 .../joshua/decoder/JoshuaConfiguration.java     |   45 +-
 .../joshua/decoder/NbestMinRiskReranker.java    |   33 +-
 .../joshua/decoder/StructuredTranslation.java   |    9 +-
 .../decoder/StructuredTranslationFactory.java   |    5 +-
 .../org/apache/joshua/decoder/Translation.java  |    8 +-
 .../org/apache/joshua/decoder/Translations.java |    2 +-
 .../joshua/decoder/chart_parser/Cell.java       |   12 +-
 .../joshua/decoder/chart_parser/Chart.java      |   51 +-
 .../decoder/chart_parser/ComputeNodeResult.java |    8 +-
 .../decoder/chart_parser/CubePruneState.java    |   20 +-
 .../joshua/decoder/chart_parser/DotChart.java   |   24 +-
 .../joshua/decoder/chart_parser/SourcePath.java |    4 +-
 .../decoder/chart_parser/StateConstraint.java   |    5 +-
 .../joshua/decoder/chart_parser/SuperNode.java  |    2 +-
 .../joshua/decoder/ff/FeatureFunction.java      |   24 +-
 .../apache/joshua/decoder/ff/FeatureVector.java |   21 +-
 .../joshua/decoder/ff/LabelCombinationFF.java   |    2 +-
 .../joshua/decoder/ff/LabelSubstitutionFF.java  |    8 +-
 .../joshua/decoder/ff/LexicalFeatures.java      |    2 +-
 .../apache/joshua/decoder/ff/OOVPenalty.java    |    7 +-
 .../apache/joshua/decoder/ff/PhraseModel.java   |    2 +-
 .../apache/joshua/decoder/ff/PhrasePenalty.java |    4 +-
 .../org/apache/joshua/decoder/ff/RuleFF.java    |    6 +-
 .../decoder/ff/RulePropertiesQuerying.java      |    6 +-
 .../org/apache/joshua/decoder/ff/RuleShape.java |    2 +-
 .../joshua/decoder/ff/SourceDependentFF.java    |    4 +-
 .../apache/joshua/decoder/ff/SourcePathFF.java  |    2 +-
 .../apache/joshua/decoder/ff/TargetBigram.java  |   13 +-
 .../ff/fragmentlm/ConcatenationIterator.java    |   10 +-
 .../decoder/ff/fragmentlm/FragmentLMFF.java     |   59 +-
 .../ff/fragmentlm/PennTreebankReader.java       |   17 +-
 .../joshua/decoder/ff/fragmentlm/Tree.java      |   56 +-
 .../joshua/decoder/ff/fragmentlm/Trees.java     |    8 +-
 .../org/apache/joshua/decoder/ff/lm/KenLM.java  |   26 +-
 .../joshua/decoder/ff/lm/LanguageModelFF.java   |   39 +-
 .../ff/lm/berkeley_lm/LMGrammarBerkeley.java    |    4 +-
 .../ff/lm/bloomfilter_lm/BloomFilter.java       |    2 +-
 .../BloomFilterLanguageModel.java               |   18 +-
 .../joshua/decoder/ff/lm/buildin_lm/TrieLM.java |   25 +-
 .../joshua/decoder/ff/phrase/Distortion.java    |    2 +-
 .../ff/similarity/EdgePhraseSimilarityFF.java   |   17 +-
 .../ff/state_maintenance/NgramDPState.java      |    6 +-
 .../joshua/decoder/ff/tm/AbstractGrammar.java   |   12 +-
 .../decoder/ff/tm/BasicRuleCollection.java      |    2 +-
 .../joshua/decoder/ff/tm/CreateGlueGrammar.java |    2 +-
 .../joshua/decoder/ff/tm/GrammarReader.java     |    2 +-
 .../apache/joshua/decoder/ff/tm/OwnerMap.java   |    2 +-
 .../org/apache/joshua/decoder/ff/tm/Rule.java   |   67 +-
 .../decoder/ff/tm/SentenceFilteredGrammar.java  |   12 +-
 .../decoder/ff/tm/format/MosesFormatReader.java |    2 +-
 .../ff/tm/hash_based/ExtensionIterator.java     |    2 +-
 .../tm/hash_based/MemoryBasedBatchGrammar.java  |    8 +-
 .../decoder/ff/tm/packed/PackedGrammar.java     |   87 +-
 .../ff/tm/packed/SliceAggregatingTrie.java      |    4 +-
 .../decoder/hypergraph/AlignedSourceTokens.java |    2 +-
 .../decoder/hypergraph/AllSpansWalker.java      |   19 +-
 .../hypergraph/DefaultInsideOutside.java        |   34 +-
 .../joshua/decoder/hypergraph/ForestWalker.java |   10 +-
 .../GrammarBuilderWalkerFunction.java           |   14 +-
 .../joshua/decoder/hypergraph/HGNode.java       |   54 +-
 .../joshua/decoder/hypergraph/HyperEdge.java    |    6 +-
 .../joshua/decoder/hypergraph/HyperGraph.java   |   30 +-
 .../decoder/hypergraph/HyperGraphPruning.java   |    9 +-
 .../decoder/hypergraph/KBestExtractor.java      |   51 +-
 .../hypergraph/OutputStringExtractor.java       |    8 +-
 .../hypergraph/StringToTreeConverter.java       |   16 +-
 .../decoder/hypergraph/ViterbiExtractor.java    |   10 +-
 .../hypergraph/WordAlignmentExtractor.java      |    2 +-
 .../decoder/hypergraph/WordAlignmentState.java  |    8 +-
 .../apache/joshua/decoder/io/JSONMessage.java   |   18 +-
 .../decoder/io/TranslationRequestStream.java    |    6 +-
 .../apache/joshua/decoder/phrase/Candidate.java |   38 +-
 .../apache/joshua/decoder/phrase/Coverage.java  |    2 +-
 .../apache/joshua/decoder/phrase/Future.java    |    8 +-
 .../apache/joshua/decoder/phrase/Header.java    |   87 +
 .../joshua/decoder/phrase/Hypothesis.java       |    5 +-
 .../joshua/decoder/phrase/PhraseChart.java      |   73 +-
 .../joshua/decoder/phrase/PhraseNodes.java      |   58 +
 .../joshua/decoder/phrase/PhraseTable.java      |    4 +-
 .../org/apache/joshua/decoder/phrase/Stack.java |   13 +-
 .../apache/joshua/decoder/phrase/Stacks.java    |   25 +-
 .../joshua/decoder/phrase/TargetPhrases.java    |   87 -
 .../decoder/segment_file/ConstraintRule.java    |    4 +-
 .../joshua/decoder/segment_file/Sentence.java   |   18 +-
 .../joshua/decoder/segment_file/Token.java      |    9 +-
 .../java/org/apache/joshua/pro/PROCore.java     |   22 +-
 .../org/apache/joshua/server/ServerThread.java  |    9 +-
 .../LMBerkeleySentenceProbablityTest.java       |    4 +-
 .../lm/berkeley_lm/LMGrammarBerkeleyTest.java   |   10 +-
 .../class_lm/ClassBasedLanguageModelTest.java   |    4 +-
 .../kbest_extraction/KBestExtractionTest.java   |    4 +-
 .../phrase/decode/PhraseDecodingTest.java       |   14 +-
 .../org/apache/joshua/system/KenLmTest.java     |    2 +-
 .../apache/joshua/system/LmOovFeatureTest.java  |   13 +-
 .../system/MultithreadedTranslationTests.java   |    4 +-
 .../joshua/system/StructuredOutputTest.java     |    4 +-
 .../system/StructuredTranslationTest.java       |    4 +-
 src/test/resources/berkeley_lm/lm               |   16 +
 src/test/resources/berkeley_lm/lm.berkeleylm    |  Bin 0 -> 4294 bytes
 src/test/resources/berkeley_lm/lm.berkeleylm.gz |  Bin 0 -> 1786 bytes
 src/test/resources/berkeley_lm/lm.gz            |  Bin 0 -> 162 bytes
 src/test/resources/grammar.glue                 |    4 +
 .../resources/kbest_extraction/glue-grammar     |    3 +
 src/test/resources/kbest_extraction/grammar     |   25 +
 .../resources/kbest_extraction/joshua.config    |   27 +
 src/test/resources/kbest_extraction/lm.gz       |  Bin 0 -> 2466496 bytes
 src/test/resources/kbest_extraction/output.gold | 3126 ++++++++++++++++++
 .../kbest_extraction/output.scores.gold         | 3126 ++++++++++++++++++
 src/test/resources/kenlm/oilers.kenlm           |  Bin 0 -> 49011 bytes
 src/test/resources/lm_oov/joshua.config         |   17 +
 src/test/resources/phrase_decoder/config        |   29 +
 .../resources/phrase_decoder/constrained.config |   28 +
 .../phrase_decoder/constrained.output.gold      |    5 +
 src/test/resources/phrase_decoder/lm.1.gz       |  Bin 0 -> 2235 bytes
 src/test/resources/phrase_decoder/output.gold   |    1 +
 src/test/resources/phrase_decoder/rules.1.gz    |  Bin 0 -> 2998042 bytes
 src/test/resources/wa_grammar                   |    3 +
 src/test/resources/wa_grammar.packed/config     |    2 +
 src/test/resources/wa_grammar.packed/encoding   |  Bin 0 -> 154 bytes
 .../wa_grammar.packed/slice_00000.alignments    |  Bin 0 -> 45 bytes
 .../wa_grammar.packed/slice_00000.features      |  Bin 0 -> 47 bytes
 .../wa_grammar.packed/slice_00000.source        |  Bin 0 -> 204 bytes
 .../wa_grammar.packed/slice_00000.target        |  Bin 0 -> 128 bytes
 .../wa_grammar.packed/slice_00000.target.lookup |  Bin 0 -> 32 bytes
 src/test/resources/wa_grammar.packed/vocabulary |  Bin 0 -> 238 bytes
 176 files changed, 7633 insertions(+), 7619 deletions(-)
----------------------------------------------------------------------



[4/6] incubator-joshua git commit: updated format of corpus splits to get berkeley aligner working again

Posted by mj...@apache.org.
updated format of corpus splits to get berkeley aligner working again

I'm not sure why, but the Berkeley Aligner broke. It seems that the jar
file that used to be included with Joshua was an old version, despite the
fact that the Berkeley Aligner itself hasn't been updated for almost
a decade. This change introduces some minor differences that get it working
again.


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/38eebb3b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/38eebb3b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/38eebb3b

Branch: refs/heads/7
Commit: 38eebb3b58375d0da584f470de66df68476ab938
Parents: fb5d35d
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 24 16:16:29 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 24 16:16:29 2016 -0400

----------------------------------------------------------------------
 scripts/training/paralign.pl                       | 12 ++++++------
 scripts/training/pipeline.pl                       | 17 +++++++----------
 .../training/templates/alignment/word-align.conf   |  2 +-
 3 files changed, 14 insertions(+), 17 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/38eebb3b/scripts/training/paralign.pl
----------------------------------------------------------------------
diff --git a/scripts/training/paralign.pl b/scripts/training/paralign.pl
index 2f04fc1..8b0fd28 100755
--- a/scripts/training/paralign.pl
+++ b/scripts/training/paralign.pl
@@ -54,9 +54,9 @@ sub run_giza {
   my ($chunkdir,$chunkno,$do_parallel) = @_;
   my $parallel = ($do_parallel == 1) ? "-parallel" : "";
   $cachepipe->cmd("giza-$chunkno",
-                  "rm -f $chunkdir/corpus.0-0.*; $args{giza_trainer} --root-dir $chunkdir -e $args{target}.$chunkno -f $args{source}.$chunkno -corpus $args{train_dir}/splits/corpus -merge $args{giza_merge} $parallel > $chunkdir/giza.log 2>&1",
-                  "$args{train_dir}/splits/corpus.$args{source}.$chunkno",
-                  "$args{train_dir}/splits/corpus.$args{target}.$chunkno",
+                  "rm -f $chunkdir/corpus.0-0.*; $args{giza_trainer} --root-dir $chunkdir -e $args{target} -f $args{source} -corpus $args{train_dir}/splits/$chunkno/corpus -merge $args{giza_merge} $parallel > $chunkdir/giza.log 2>&1",
+                  "$args{train_dir}/splits/$chunkno/corpus.$args{source}",
+                  "$args{train_dir}/splits/$chunkno/corpus.$args{target}",
                   "$chunkdir/model/aligned.$args{giza_merge}");
 }
 
@@ -67,8 +67,8 @@ sub run_berkeley_aligner {
   open FROM, $aligner_conf or die "can't read berkeley alignment template";
   open TO, ">", "alignments/$chunkno/word-align.conf" or die "can't write to 'alignments/$chunkno/word-align.conf'";
   while (<FROM>) {
-    s/<SOURCE>/$args{source}.$chunkno/g;
-    s/<TARGET>/$args{target}.$chunkno/g;
+    s/<SOURCE>/$args{source}/g;
+    s/<TARGET>/$args{target}/g;
     s/<CHUNK>/$chunkno/g;
     s/<TRAIN_DIR>/$args{train_dir}/g;
     print TO;
@@ -91,5 +91,5 @@ sub run_jacana_aligner {
 
   # run the job
   $cachepipe->cmd("jacana-aligner-chunk-$chunkno",
-                  "java -d64 -Xmx$args{aligner_mem} -DJACANA_HOME=$jacana_home -jar $JOSHUA/lib/jacana-xy.jar -m $jacana_home/resources/model/fr-en.model -src fr -tgt en -a $args{train_dir}/splits/corpus.$args{source}.$chunkno -b $args{train_dir}/splits/corpus.$args{target}.$chunkno -o $chunkdir/training.align");
+                  "java -d64 -Xmx$args{aligner_mem} -DJACANA_HOME=$jacana_home -jar $JOSHUA/lib/jacana-xy.jar -m $jacana_home/resources/model/fr-en.model -src fr -tgt en -a $args{train_dir}/splits/$chunkno/corpus.$args{source} -b $args{train_dir}/splits/$chunkno/corpus.$args{target} -o $chunkdir/training.align");
 }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/38eebb3b/scripts/training/pipeline.pl
----------------------------------------------------------------------
diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl
index 08933ec..c0e33d3 100755
--- a/scripts/training/pipeline.pl
+++ b/scripts/training/pipeline.pl
@@ -797,8 +797,11 @@ if (! defined $ALIGNMENT) {
 		if ($chunk != $lastchunk) {
 			close CHUNK_SOURCE;
 			close CHUNK_TARGET;
-			open CHUNK_SOURCE, ">", "$DATA_DIRS{train}/splits/corpus.$SOURCE.$chunk" or die;
-			open CHUNK_TARGET, ">", "$DATA_DIRS{train}/splits/corpus.$TARGET.$chunk" or die;
+
+      mkdir("$DATA_DIRS{train}/splits/$chunk");
+
+			open CHUNK_SOURCE, ">", "$DATA_DIRS{train}/splits/$chunk/corpus.$SOURCE" or die;
+			open CHUNK_TARGET, ">", "$DATA_DIRS{train}/splits/$chunk/corpus.$TARGET" or die;
 
 			$lastchunk = $chunk;
 		}
@@ -817,13 +820,7 @@ if (! defined $ALIGNMENT) {
   #   $max_aligner_threads /= 2;
   # }
 
-  # # With multi-threading, we can use a pool to set up concurrent GIZA jobs on the chunks.
-  #
-  # TODO: implement this.  There appears to be a problem with calling system() in threads.
-  #
-  # my $pool = new Thread::Pool(Min => 1, Max => $max_aligner_threads);
-
-  system("mkdir alignments") unless -d "alignments";
+  mkdir("alignments") unless -d "alignments";
 
   my $aligner_cmd = (
     "$SCRIPTDIR/training/paralign.pl "
@@ -875,7 +872,7 @@ if (! defined $ALIGNMENT) {
   if ($ALIGNER eq "giza") {
     @aligned_files = map { "alignments/$_/model/aligned.$GIZA_MERGE" } (0..$lastchunk);
   } elsif ($ALIGNER eq "berkeley") {
-    @aligned_files = map { "alignments/$_/training.align" } (0..$lastchunk);
+    @aligned_files = map { "alignments/$_/training.$TARGET-$SOURCE.align" } (0..$lastchunk);
   } elsif ($ALIGNER eq "jacana") {
     @aligned_files = map { "alignments/$_/training.align" } (0..$lastchunk);
   }

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/38eebb3b/scripts/training/templates/alignment/word-align.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/alignment/word-align.conf b/scripts/training/templates/alignment/word-align.conf
index 5fe3e0c..1622fb9 100644
--- a/scripts/training/templates/alignment/word-align.conf
+++ b/scripts/training/templates/alignment/word-align.conf
@@ -33,7 +33,7 @@ foreignSuffix	<SOURCE>
 englishSuffix	<TARGET>
 
 # Choose the training sources, which can either be directories or files that list files/directories
-trainSources	<TRAIN_DIR>/splits/corpus
+trainSources	<TRAIN_DIR>/splits/<CHUNK>
 sentences	MAX
 testSources	/dev/null
 overwriteExecDir	true


[6/6] incubator-joshua git commit: Merge branch 'master' into 7

Posted by mj...@apache.org.
Merge branch 'master' into 7


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/0b543276
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/0b543276
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/0b543276

Branch: refs/heads/7
Commit: 0b543276e6e39ab26fa0f8819f7e56945651fd6a
Parents: f90cf3e 2d106df
Author: Matt Post <po...@cs.jhu.edu>
Authored: Mon Aug 29 13:42:54 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Mon Aug 29 13:42:54 2016 -0400

----------------------------------------------------------------------
 download-deps.sh                                  |  4 ++++
 scripts/training/paralign.pl                      | 14 +++++++-------
 scripts/training/pipeline.pl                      | 17 +++++++----------
 .../training/templates/alignment/word-align.conf  | 18 +++++++++---------
 4 files changed, 27 insertions(+), 26 deletions(-)
----------------------------------------------------------------------



[5/6] incubator-joshua git commit: Merge branch 'master' into JOSHUA-304

Posted by mj...@apache.org.
Merge branch 'master' into JOSHUA-304


Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2d106df4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2d106df4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2d106df4

Branch: refs/heads/7
Commit: 2d106df46ca6f5c1130adf5c793041e33d8a7f59
Parents: 38eebb3 6d8f684
Author: Matt Post <po...@cs.jhu.edu>
Authored: Mon Aug 29 13:35:56 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Mon Aug 29 13:35:56 2016 -0400

----------------------------------------------------------------------
 README.md                                       |   1 +
 .../java/org/apache/joshua/decoder/Decoder.java | 235 +++++--------------
 .../org/apache/joshua/decoder/DecoderTask.java  | 197 ++++++++++++++++
 .../apache/joshua/decoder/DecoderThread.java    | 201 ----------------
 .../joshua/decoder/JoshuaConfiguration.java     |   3 +
 .../apache/joshua/decoder/JoshuaDecoder.java    |   6 +-
 .../org/apache/joshua/decoder/Translation.java  |   2 +-
 .../decoder/TranslationResponseStream.java      | 176 ++++++++++++++
 .../org/apache/joshua/decoder/Translations.java | 158 -------------
 .../joshua/decoder/chart_parser/Chart.java      |   2 +-
 .../org/apache/joshua/server/ServerThread.java  |  11 +-
 .../system/MultithreadedTranslationTests.java   |  51 +++-
 12 files changed, 480 insertions(+), 563 deletions(-)
----------------------------------------------------------------------