You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/29 17:43:07 UTC
[1/6] incubator-joshua git commit: added berkeleyaligner
Repository: incubator-joshua
Updated Branches:
refs/heads/7 f90cf3e4d -> 0b543276e
added berkeleyaligner
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/d3a2291c
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/d3a2291c
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/d3a2291c
Branch: refs/heads/7
Commit: d3a2291c004d383c5cb65ab466af582ab6f39d6c
Parents: 25d28fe
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 24 09:43:01 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 24 09:43:01 2016 -0400
----------------------------------------------------------------------
download-deps.sh | 4 ++++
1 file changed, 4 insertions(+)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/d3a2291c/download-deps.sh
----------------------------------------------------------------------
diff --git a/download-deps.sh b/download-deps.sh
index 6e4797b..8fbdf69 100755
--- a/download-deps.sh
+++ b/download-deps.sh
@@ -15,3 +15,7 @@ git clone https://github.com/joshua-decoder/giza-pp.git ext/giza-pp
git clone https://github.com/joshua-decoder/symal.git ext/symal
(make -C ext/symal all)
+
+git clone https://github.com/joshua-decoder/berkeleyaligner ext/berkeleyaligner
+(cd ext/berkeleyaligner; ant)
+
[2/6] incubator-joshua git commit: formatting fixes,
updated path to JAR
Posted by mj...@apache.org.
formatting fixes, updated path to JAR
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/255927db
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/255927db
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/255927db
Branch: refs/heads/7
Commit: 255927dba585965822c3d35f4d90e35d3199994f
Parents: d3a2291
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 24 10:07:38 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 24 10:07:38 2016 -0400
----------------------------------------------------------------------
scripts/training/paralign.pl | 2 +-
.../training/templates/alignment/word-align.conf | 18 +++++++++---------
2 files changed, 10 insertions(+), 10 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/255927db/scripts/training/paralign.pl
----------------------------------------------------------------------
diff --git a/scripts/training/paralign.pl b/scripts/training/paralign.pl
index d5159a7..2f04fc1 100755
--- a/scripts/training/paralign.pl
+++ b/scripts/training/paralign.pl
@@ -78,7 +78,7 @@ sub run_berkeley_aligner {
# run the job
$cachepipe->cmd("berkeley-aligner-chunk-$chunkno",
- "java -d64 -Xmx$args{aligner_mem} -jar $JOSHUA/lib/berkeleyaligner.jar ++alignments/$chunkno/word-align.conf",
+ "java -d64 -Xmx$args{aligner_mem} -jar $JOSHUA/ext/berkeleyaligner/distribution/berkeleyaligner.jar ++alignments/$chunkno/word-align.conf",
"alignments/$chunkno/word-align.conf",
"$args{train_dir}/splits/corpus.$args{source}.$chunkno",
"$args{train_dir}/splits/corpus.$args{target}.$chunkno",
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/255927db/scripts/training/templates/alignment/word-align.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/alignment/word-align.conf b/scripts/training/templates/alignment/word-align.conf
index 83904d3..5fe3e0c 100644
--- a/scripts/training/templates/alignment/word-align.conf
+++ b/scripts/training/templates/alignment/word-align.conf
@@ -11,8 +11,8 @@
forwardModels MODEL1 HMM
reverseModels MODEL1 HMM
-mode JOINT JOINT
-iters 5 5
+mode JOINT JOINT
+iters 5 5
###############################################
# Execution: Controls output and program flow
@@ -20,9 +20,9 @@ iters 5 5
execDir alignments/<CHUNK>
create
-saveParams false
-numThreads 1
-msPerLine 10000
+saveParams false
+numThreads 1
+msPerLine 10000
alignTraining
#################
@@ -33,10 +33,10 @@ foreignSuffix <SOURCE>
englishSuffix <TARGET>
# Choose the training sources, which can either be directories or files that list files/directories
-trainSources <TRAIN_DIR>/splits/corpus
-sentences MAX
-testSources /dev/null
-overwriteExecDir true
+trainSources <TRAIN_DIR>/splits/corpus
+sentences MAX
+testSources /dev/null
+overwriteExecDir true
#################
# 1-best output
[3/6] incubator-joshua git commit: Merge branch 'master' into
JOSHUA-304
Posted by mj...@apache.org.
Merge branch 'master' into JOSHUA-304
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/fb5d35da
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/fb5d35da
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/fb5d35da
Branch: refs/heads/7
Commit: fb5d35da0999fe6533b91b6076a4c4032d5d6710
Parents: 255927d 0744ebf
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 24 15:44:44 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 24 15:44:44 2016 -0400
----------------------------------------------------------------------
CHANGELOG | 7 +
demo/README.md | 2 +-
demo/apache_joshua_logo.png | Bin 0 -> 306617 bytes
demo/apache_joshua_logo_faded.png | Bin 0 -> 309216 bytes
demo/demo.config | 3 +
demo/demo.js | 19 +-
demo/index.html | 37 +-
examples/README.md | 36 +-
pom.xml | 30 +-
resources/berkeley_lm/lm | 16 -
resources/berkeley_lm/lm.berkeleylm | Bin 4294 -> 0 bytes
resources/berkeley_lm/lm.berkeleylm.gz | Bin 1786 -> 0 bytes
resources/berkeley_lm/lm.gz | Bin 162 -> 0 bytes
resources/grammar.glue | 4 -
resources/kbest_extraction/glue-grammar | 3 -
resources/kbest_extraction/grammar | 25 -
resources/kbest_extraction/joshua.config | 27 -
resources/kbest_extraction/lm.gz | Bin 2466496 -> 0 bytes
resources/kbest_extraction/output.gold | 3126 ------------------
resources/kbest_extraction/output.scores.gold | 3126 ------------------
resources/kenlm/oilers.kenlm | Bin 49011 -> 0 bytes
resources/lm_oov/joshua.config | 17 -
resources/phrase_decoder/config | 29 -
resources/phrase_decoder/constrained.config | 28 -
.../phrase_decoder/constrained.output.gold | 5 -
resources/phrase_decoder/lm.1.gz | Bin 2235 -> 0 bytes
resources/phrase_decoder/output.gold | 1 -
resources/phrase_decoder/rules.1.gz | Bin 2998042 -> 0 bytes
resources/wa_grammar | 3 -
resources/wa_grammar.packed/config | 2 -
resources/wa_grammar.packed/encoding | Bin 154 -> 0 bytes
.../wa_grammar.packed/slice_00000.alignments | Bin 45 -> 0 bytes
.../wa_grammar.packed/slice_00000.features | Bin 47 -> 0 bytes
resources/wa_grammar.packed/slice_00000.source | Bin 204 -> 0 bytes
resources/wa_grammar.packed/slice_00000.target | Bin 128 -> 0 bytes
.../wa_grammar.packed/slice_00000.target.lookup | Bin 32 -> 0 bytes
resources/wa_grammar.packed/vocabulary | Bin 238 -> 0 bytes
.../org/apache/joshua/adagrad/AdaGradCore.java | 101 +-
.../org/apache/joshua/adagrad/Optimizer.java | 348 +-
.../org/apache/joshua/corpus/BasicPhrase.java | 2 +-
.../apache/joshua/corpus/ContiguousPhrase.java | 8 +-
.../java/org/apache/joshua/corpus/Phrase.java | 2 +-
.../java/org/apache/joshua/corpus/Span.java | 6 +-
.../org/apache/joshua/corpus/SymbolTable.java | 2 +-
.../org/apache/joshua/corpus/Vocabulary.java | 10 +-
.../joshua/corpus/syntax/ArraySyntaxTree.java | 51 +-
.../apache/joshua/corpus/syntax/SyntaxTree.java | 10 +-
.../org/apache/joshua/decoder/ArgsParser.java | 8 +-
.../java/org/apache/joshua/decoder/BLEU.java | 72 +-
.../java/org/apache/joshua/decoder/Decoder.java | 28 +-
.../apache/joshua/decoder/DecoderThread.java | 2 +-
.../joshua/decoder/JoshuaConfiguration.java | 45 +-
.../joshua/decoder/NbestMinRiskReranker.java | 33 +-
.../joshua/decoder/StructuredTranslation.java | 9 +-
.../decoder/StructuredTranslationFactory.java | 5 +-
.../org/apache/joshua/decoder/Translation.java | 8 +-
.../org/apache/joshua/decoder/Translations.java | 2 +-
.../joshua/decoder/chart_parser/Cell.java | 12 +-
.../joshua/decoder/chart_parser/Chart.java | 51 +-
.../decoder/chart_parser/ComputeNodeResult.java | 8 +-
.../decoder/chart_parser/CubePruneState.java | 20 +-
.../joshua/decoder/chart_parser/DotChart.java | 24 +-
.../joshua/decoder/chart_parser/SourcePath.java | 4 +-
.../decoder/chart_parser/StateConstraint.java | 5 +-
.../joshua/decoder/chart_parser/SuperNode.java | 2 +-
.../joshua/decoder/ff/FeatureFunction.java | 24 +-
.../apache/joshua/decoder/ff/FeatureVector.java | 21 +-
.../joshua/decoder/ff/LabelCombinationFF.java | 2 +-
.../joshua/decoder/ff/LabelSubstitutionFF.java | 8 +-
.../joshua/decoder/ff/LexicalFeatures.java | 2 +-
.../apache/joshua/decoder/ff/OOVPenalty.java | 7 +-
.../apache/joshua/decoder/ff/PhraseModel.java | 2 +-
.../apache/joshua/decoder/ff/PhrasePenalty.java | 4 +-
.../org/apache/joshua/decoder/ff/RuleFF.java | 6 +-
.../decoder/ff/RulePropertiesQuerying.java | 6 +-
.../org/apache/joshua/decoder/ff/RuleShape.java | 2 +-
.../joshua/decoder/ff/SourceDependentFF.java | 4 +-
.../apache/joshua/decoder/ff/SourcePathFF.java | 2 +-
.../apache/joshua/decoder/ff/TargetBigram.java | 13 +-
.../ff/fragmentlm/ConcatenationIterator.java | 10 +-
.../decoder/ff/fragmentlm/FragmentLMFF.java | 59 +-
.../ff/fragmentlm/PennTreebankReader.java | 17 +-
.../joshua/decoder/ff/fragmentlm/Tree.java | 56 +-
.../joshua/decoder/ff/fragmentlm/Trees.java | 8 +-
.../org/apache/joshua/decoder/ff/lm/KenLM.java | 26 +-
.../joshua/decoder/ff/lm/LanguageModelFF.java | 39 +-
.../ff/lm/berkeley_lm/LMGrammarBerkeley.java | 4 +-
.../ff/lm/bloomfilter_lm/BloomFilter.java | 2 +-
.../BloomFilterLanguageModel.java | 18 +-
.../joshua/decoder/ff/lm/buildin_lm/TrieLM.java | 25 +-
.../joshua/decoder/ff/phrase/Distortion.java | 2 +-
.../ff/similarity/EdgePhraseSimilarityFF.java | 17 +-
.../ff/state_maintenance/NgramDPState.java | 6 +-
.../joshua/decoder/ff/tm/AbstractGrammar.java | 12 +-
.../decoder/ff/tm/BasicRuleCollection.java | 2 +-
.../joshua/decoder/ff/tm/CreateGlueGrammar.java | 2 +-
.../joshua/decoder/ff/tm/GrammarReader.java | 2 +-
.../apache/joshua/decoder/ff/tm/OwnerMap.java | 2 +-
.../org/apache/joshua/decoder/ff/tm/Rule.java | 67 +-
.../decoder/ff/tm/SentenceFilteredGrammar.java | 12 +-
.../decoder/ff/tm/format/MosesFormatReader.java | 2 +-
.../ff/tm/hash_based/ExtensionIterator.java | 2 +-
.../tm/hash_based/MemoryBasedBatchGrammar.java | 8 +-
.../decoder/ff/tm/packed/PackedGrammar.java | 87 +-
.../ff/tm/packed/SliceAggregatingTrie.java | 4 +-
.../decoder/hypergraph/AlignedSourceTokens.java | 2 +-
.../decoder/hypergraph/AllSpansWalker.java | 19 +-
.../hypergraph/DefaultInsideOutside.java | 34 +-
.../joshua/decoder/hypergraph/ForestWalker.java | 10 +-
.../GrammarBuilderWalkerFunction.java | 14 +-
.../joshua/decoder/hypergraph/HGNode.java | 54 +-
.../joshua/decoder/hypergraph/HyperEdge.java | 6 +-
.../joshua/decoder/hypergraph/HyperGraph.java | 30 +-
.../decoder/hypergraph/HyperGraphPruning.java | 9 +-
.../decoder/hypergraph/KBestExtractor.java | 51 +-
.../hypergraph/OutputStringExtractor.java | 8 +-
.../hypergraph/StringToTreeConverter.java | 16 +-
.../decoder/hypergraph/ViterbiExtractor.java | 10 +-
.../hypergraph/WordAlignmentExtractor.java | 2 +-
.../decoder/hypergraph/WordAlignmentState.java | 8 +-
.../apache/joshua/decoder/io/JSONMessage.java | 18 +-
.../decoder/io/TranslationRequestStream.java | 6 +-
.../apache/joshua/decoder/phrase/Candidate.java | 38 +-
.../apache/joshua/decoder/phrase/Coverage.java | 2 +-
.../apache/joshua/decoder/phrase/Future.java | 8 +-
.../apache/joshua/decoder/phrase/Header.java | 87 +
.../joshua/decoder/phrase/Hypothesis.java | 5 +-
.../joshua/decoder/phrase/PhraseChart.java | 73 +-
.../joshua/decoder/phrase/PhraseNodes.java | 58 +
.../joshua/decoder/phrase/PhraseTable.java | 4 +-
.../org/apache/joshua/decoder/phrase/Stack.java | 13 +-
.../apache/joshua/decoder/phrase/Stacks.java | 25 +-
.../joshua/decoder/phrase/TargetPhrases.java | 87 -
.../decoder/segment_file/ConstraintRule.java | 4 +-
.../joshua/decoder/segment_file/Sentence.java | 18 +-
.../joshua/decoder/segment_file/Token.java | 9 +-
.../java/org/apache/joshua/pro/PROCore.java | 22 +-
.../org/apache/joshua/server/ServerThread.java | 9 +-
.../LMBerkeleySentenceProbablityTest.java | 4 +-
.../lm/berkeley_lm/LMGrammarBerkeleyTest.java | 10 +-
.../class_lm/ClassBasedLanguageModelTest.java | 4 +-
.../kbest_extraction/KBestExtractionTest.java | 4 +-
.../phrase/decode/PhraseDecodingTest.java | 14 +-
.../org/apache/joshua/system/KenLmTest.java | 2 +-
.../apache/joshua/system/LmOovFeatureTest.java | 13 +-
.../system/MultithreadedTranslationTests.java | 4 +-
.../joshua/system/StructuredOutputTest.java | 4 +-
.../system/StructuredTranslationTest.java | 4 +-
src/test/resources/berkeley_lm/lm | 16 +
src/test/resources/berkeley_lm/lm.berkeleylm | Bin 0 -> 4294 bytes
src/test/resources/berkeley_lm/lm.berkeleylm.gz | Bin 0 -> 1786 bytes
src/test/resources/berkeley_lm/lm.gz | Bin 0 -> 162 bytes
src/test/resources/grammar.glue | 4 +
.../resources/kbest_extraction/glue-grammar | 3 +
src/test/resources/kbest_extraction/grammar | 25 +
.../resources/kbest_extraction/joshua.config | 27 +
src/test/resources/kbest_extraction/lm.gz | Bin 0 -> 2466496 bytes
src/test/resources/kbest_extraction/output.gold | 3126 ++++++++++++++++++
.../kbest_extraction/output.scores.gold | 3126 ++++++++++++++++++
src/test/resources/kenlm/oilers.kenlm | Bin 0 -> 49011 bytes
src/test/resources/lm_oov/joshua.config | 17 +
src/test/resources/phrase_decoder/config | 29 +
.../resources/phrase_decoder/constrained.config | 28 +
.../phrase_decoder/constrained.output.gold | 5 +
src/test/resources/phrase_decoder/lm.1.gz | Bin 0 -> 2235 bytes
src/test/resources/phrase_decoder/output.gold | 1 +
src/test/resources/phrase_decoder/rules.1.gz | Bin 0 -> 2998042 bytes
src/test/resources/wa_grammar | 3 +
src/test/resources/wa_grammar.packed/config | 2 +
src/test/resources/wa_grammar.packed/encoding | Bin 0 -> 154 bytes
.../wa_grammar.packed/slice_00000.alignments | Bin 0 -> 45 bytes
.../wa_grammar.packed/slice_00000.features | Bin 0 -> 47 bytes
.../wa_grammar.packed/slice_00000.source | Bin 0 -> 204 bytes
.../wa_grammar.packed/slice_00000.target | Bin 0 -> 128 bytes
.../wa_grammar.packed/slice_00000.target.lookup | Bin 0 -> 32 bytes
src/test/resources/wa_grammar.packed/vocabulary | Bin 0 -> 238 bytes
176 files changed, 7633 insertions(+), 7619 deletions(-)
----------------------------------------------------------------------
[4/6] incubator-joshua git commit: updated format of corpus splits to
get berkeley aligner working again
Posted by mj...@apache.org.
updated format of corpus splits to get berkeley aligner working again
I'm not sure why, but the Berkeley Aligner broke. It seems that the jar
file that used to be included with Joshua was an old version, despite the
fact that the Berkeley Aligner itself hasn't been updated for almost
a decade. This change introduces some minor differences that get it working
again.
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/38eebb3b
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/38eebb3b
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/38eebb3b
Branch: refs/heads/7
Commit: 38eebb3b58375d0da584f470de66df68476ab938
Parents: fb5d35d
Author: Matt Post <po...@cs.jhu.edu>
Authored: Wed Aug 24 16:16:29 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Wed Aug 24 16:16:29 2016 -0400
----------------------------------------------------------------------
scripts/training/paralign.pl | 12 ++++++------
scripts/training/pipeline.pl | 17 +++++++----------
.../training/templates/alignment/word-align.conf | 2 +-
3 files changed, 14 insertions(+), 17 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/38eebb3b/scripts/training/paralign.pl
----------------------------------------------------------------------
diff --git a/scripts/training/paralign.pl b/scripts/training/paralign.pl
index 2f04fc1..8b0fd28 100755
--- a/scripts/training/paralign.pl
+++ b/scripts/training/paralign.pl
@@ -54,9 +54,9 @@ sub run_giza {
my ($chunkdir,$chunkno,$do_parallel) = @_;
my $parallel = ($do_parallel == 1) ? "-parallel" : "";
$cachepipe->cmd("giza-$chunkno",
- "rm -f $chunkdir/corpus.0-0.*; $args{giza_trainer} --root-dir $chunkdir -e $args{target}.$chunkno -f $args{source}.$chunkno -corpus $args{train_dir}/splits/corpus -merge $args{giza_merge} $parallel > $chunkdir/giza.log 2>&1",
- "$args{train_dir}/splits/corpus.$args{source}.$chunkno",
- "$args{train_dir}/splits/corpus.$args{target}.$chunkno",
+ "rm -f $chunkdir/corpus.0-0.*; $args{giza_trainer} --root-dir $chunkdir -e $args{target} -f $args{source} -corpus $args{train_dir}/splits/$chunkno/corpus -merge $args{giza_merge} $parallel > $chunkdir/giza.log 2>&1",
+ "$args{train_dir}/splits/$chunkno/corpus.$args{source}",
+ "$args{train_dir}/splits/$chunkno/corpus.$args{target}",
"$chunkdir/model/aligned.$args{giza_merge}");
}
@@ -67,8 +67,8 @@ sub run_berkeley_aligner {
open FROM, $aligner_conf or die "can't read berkeley alignment template";
open TO, ">", "alignments/$chunkno/word-align.conf" or die "can't write to 'alignments/$chunkno/word-align.conf'";
while (<FROM>) {
- s/<SOURCE>/$args{source}.$chunkno/g;
- s/<TARGET>/$args{target}.$chunkno/g;
+ s/<SOURCE>/$args{source}/g;
+ s/<TARGET>/$args{target}/g;
s/<CHUNK>/$chunkno/g;
s/<TRAIN_DIR>/$args{train_dir}/g;
print TO;
@@ -91,5 +91,5 @@ sub run_jacana_aligner {
# run the job
$cachepipe->cmd("jacana-aligner-chunk-$chunkno",
- "java -d64 -Xmx$args{aligner_mem} -DJACANA_HOME=$jacana_home -jar $JOSHUA/lib/jacana-xy.jar -m $jacana_home/resources/model/fr-en.model -src fr -tgt en -a $args{train_dir}/splits/corpus.$args{source}.$chunkno -b $args{train_dir}/splits/corpus.$args{target}.$chunkno -o $chunkdir/training.align");
+ "java -d64 -Xmx$args{aligner_mem} -DJACANA_HOME=$jacana_home -jar $JOSHUA/lib/jacana-xy.jar -m $jacana_home/resources/model/fr-en.model -src fr -tgt en -a $args{train_dir}/splits/$chunkno/corpus.$args{source} -b $args{train_dir}/splits/$chunkno/corpus.$args{target} -o $chunkdir/training.align");
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/38eebb3b/scripts/training/pipeline.pl
----------------------------------------------------------------------
diff --git a/scripts/training/pipeline.pl b/scripts/training/pipeline.pl
index 08933ec..c0e33d3 100755
--- a/scripts/training/pipeline.pl
+++ b/scripts/training/pipeline.pl
@@ -797,8 +797,11 @@ if (! defined $ALIGNMENT) {
if ($chunk != $lastchunk) {
close CHUNK_SOURCE;
close CHUNK_TARGET;
- open CHUNK_SOURCE, ">", "$DATA_DIRS{train}/splits/corpus.$SOURCE.$chunk" or die;
- open CHUNK_TARGET, ">", "$DATA_DIRS{train}/splits/corpus.$TARGET.$chunk" or die;
+
+ mkdir("$DATA_DIRS{train}/splits/$chunk");
+
+ open CHUNK_SOURCE, ">", "$DATA_DIRS{train}/splits/$chunk/corpus.$SOURCE" or die;
+ open CHUNK_TARGET, ">", "$DATA_DIRS{train}/splits/$chunk/corpus.$TARGET" or die;
$lastchunk = $chunk;
}
@@ -817,13 +820,7 @@ if (! defined $ALIGNMENT) {
# $max_aligner_threads /= 2;
# }
- # # With multi-threading, we can use a pool to set up concurrent GIZA jobs on the chunks.
- #
- # TODO: implement this. There appears to be a problem with calling system() in threads.
- #
- # my $pool = new Thread::Pool(Min => 1, Max => $max_aligner_threads);
-
- system("mkdir alignments") unless -d "alignments";
+ mkdir("alignments") unless -d "alignments";
my $aligner_cmd = (
"$SCRIPTDIR/training/paralign.pl "
@@ -875,7 +872,7 @@ if (! defined $ALIGNMENT) {
if ($ALIGNER eq "giza") {
@aligned_files = map { "alignments/$_/model/aligned.$GIZA_MERGE" } (0..$lastchunk);
} elsif ($ALIGNER eq "berkeley") {
- @aligned_files = map { "alignments/$_/training.align" } (0..$lastchunk);
+ @aligned_files = map { "alignments/$_/training.$TARGET-$SOURCE.align" } (0..$lastchunk);
} elsif ($ALIGNER eq "jacana") {
@aligned_files = map { "alignments/$_/training.align" } (0..$lastchunk);
}
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/38eebb3b/scripts/training/templates/alignment/word-align.conf
----------------------------------------------------------------------
diff --git a/scripts/training/templates/alignment/word-align.conf b/scripts/training/templates/alignment/word-align.conf
index 5fe3e0c..1622fb9 100644
--- a/scripts/training/templates/alignment/word-align.conf
+++ b/scripts/training/templates/alignment/word-align.conf
@@ -33,7 +33,7 @@ foreignSuffix <SOURCE>
englishSuffix <TARGET>
# Choose the training sources, which can either be directories or files that list files/directories
-trainSources <TRAIN_DIR>/splits/corpus
+trainSources <TRAIN_DIR>/splits/<CHUNK>
sentences MAX
testSources /dev/null
overwriteExecDir true
[6/6] incubator-joshua git commit: Merge branch 'master' into 7
Posted by mj...@apache.org.
Merge branch 'master' into 7
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/0b543276
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/0b543276
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/0b543276
Branch: refs/heads/7
Commit: 0b543276e6e39ab26fa0f8819f7e56945651fd6a
Parents: f90cf3e 2d106df
Author: Matt Post <po...@cs.jhu.edu>
Authored: Mon Aug 29 13:42:54 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Mon Aug 29 13:42:54 2016 -0400
----------------------------------------------------------------------
download-deps.sh | 4 ++++
scripts/training/paralign.pl | 14 +++++++-------
scripts/training/pipeline.pl | 17 +++++++----------
.../training/templates/alignment/word-align.conf | 18 +++++++++---------
4 files changed, 27 insertions(+), 26 deletions(-)
----------------------------------------------------------------------
[5/6] incubator-joshua git commit: Merge branch 'master' into
JOSHUA-304
Posted by mj...@apache.org.
Merge branch 'master' into JOSHUA-304
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/2d106df4
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/2d106df4
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/2d106df4
Branch: refs/heads/7
Commit: 2d106df46ca6f5c1130adf5c793041e33d8a7f59
Parents: 38eebb3 6d8f684
Author: Matt Post <po...@cs.jhu.edu>
Authored: Mon Aug 29 13:35:56 2016 -0400
Committer: Matt Post <po...@cs.jhu.edu>
Committed: Mon Aug 29 13:35:56 2016 -0400
----------------------------------------------------------------------
README.md | 1 +
.../java/org/apache/joshua/decoder/Decoder.java | 235 +++++--------------
.../org/apache/joshua/decoder/DecoderTask.java | 197 ++++++++++++++++
.../apache/joshua/decoder/DecoderThread.java | 201 ----------------
.../joshua/decoder/JoshuaConfiguration.java | 3 +
.../apache/joshua/decoder/JoshuaDecoder.java | 6 +-
.../org/apache/joshua/decoder/Translation.java | 2 +-
.../decoder/TranslationResponseStream.java | 176 ++++++++++++++
.../org/apache/joshua/decoder/Translations.java | 158 -------------
.../joshua/decoder/chart_parser/Chart.java | 2 +-
.../org/apache/joshua/server/ServerThread.java | 11 +-
.../system/MultithreadedTranslationTests.java | 51 +++-
12 files changed, 480 insertions(+), 563 deletions(-)
----------------------------------------------------------------------