You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/09 05:10:49 UTC

[40/44] incubator-joshua-site git commit: First attempt

http://git-wip-us.apache.org/repos/asf/incubator-joshua-site/blob/53cc3005/4.0/thrax.html
----------------------------------------------------------------------
diff --git a/4.0/thrax.html b/4.0/thrax.html
new file mode 100644
index 0000000..87ec518
--- /dev/null
+++ b/4.0/thrax.html
@@ -0,0 +1,264 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" type="text/css" media="screen,print" href="../joshua4.css" />
+    <title>Joshua | Grammar extraction with Thrax</title>
+  </head>
+
+  <body>
+
+    <div id="navbar">
+      <a href="http://joshua-decoder.org/">
+        <img src="../images/joshua-logo-small.png" width="130px" 
+             alt="Joshua logo (picture of a Joshua tree)" />
+      </a>
+
+      <p class="infobox">
+        <b>Stable version</b><br />
+        4.1<br/><br/>
+        <b>Release date</b><br />
+        2013 January
+      </p>
+
+<!--       <div class="infobox"> -->
+<!--         <b>AUTO LINKS</b><br/> -->
+<!--         <ul> -->
+<!--            -->
+<!--           <li> Advanced features</li> -->
+<!--            -->
+<!--           <li> Advanced features</li> -->
+<!--            -->
+<!--           <li> Advanced features</li> -->
+<!--            -->
+<!--           <li> Building a language pack</li> -->
+<!--            -->
+<!--           <li> Building a language pack</li> -->
+<!--            -->
+<!--           <li> Bundling a configuration</li> -->
+<!--            -->
+<!--           <li> Contributors</li> -->
+<!--            -->
+<!--           <li> Decoder configuration parameters</li> -->
+<!--            -->
+<!--           <li> Decoder configuration parameters</li> -->
+<!--            -->
+<!--           <li> Decoder configuration parameters</li> -->
+<!--            -->
+<!--           <li> Decoder configuration parameters</li> -->
+<!--            -->
+<!--           <li> Frequently Asked Questions</li> -->
+<!--            -->
+<!--           <li> Common problems</li> -->
+<!--            -->
+<!--           <li> Frequently Asked Questions</li> -->
+<!--            -->
+<!--           <li> Common problems</li> -->
+<!--            -->
+<!--           <li> Features</li> -->
+<!--            -->
+<!--           <li> Features</li> -->
+<!--            -->
+<!--           <li> Features</li> -->
+<!--            -->
+<!--           <li> Features</li> -->
+<!--            -->
+<!--           <li> Joshua file formats</li> -->
+<!--            -->
+<!--           <li> Joshua file formats</li> -->
+<!--            -->
+<!--           <li> Joshua file formats</li> -->
+<!--            -->
+<!--           <li> Joshua file formats</li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> Fisher and CALLHOME Spanish English Speech Translation Corpus</li> -->
+<!--            -->
+<!--           <li> Indian Languages Parallel Corpora</li> -->
+<!--            -->
+<!--           <li> Joshua 4.0 User Documentation</li> -->
+<!--            -->
+<!--           <li> Language packs</li> -->
+<!--            -->
+<!--           <li> Paraphrase Packs</li> -->
+<!--            -->
+<!--           <li> Joshua releases</li> -->
+<!--            -->
+<!--           <li> Support</li> -->
+<!--            -->
+<!--           <li> Getting Started</li> -->
+<!--            -->
+<!--           <li> Welcome to Joshua</li> -->
+<!--            -->
+<!--           <li> Joshua documentation</li> -->
+<!--            -->
+<!--           <li> Joshua documentation</li> -->
+<!--            -->
+<!--           <li> Installation</li> -->
+<!--            -->
+<!--           <li> Installation</li> -->
+<!--            -->
+<!--           <li> Alignment with Jacana</li> -->
+<!--            -->
+<!--           <li> Alignment with Jacana</li> -->
+<!--            -->
+<!--           <li> Alignment with Jacana</li> -->
+<!--            -->
+<!--           <li> Building large LMs with SRILM</li> -->
+<!--            -->
+<!--           <li> Building large LMs with SRILM</li> -->
+<!--            -->
+<!--           <li> Building large LMs with SRILM</li> -->
+<!--            -->
+<!--           <li> Building large LMs with SRILM</li> -->
+<!--            -->
+<!--           <li> Lattice decoding</li> -->
+<!--            -->
+<!--           <li> Grammar Packing</li> -->
+<!--            -->
+<!--           <li> Grammar Packing</li> -->
+<!--            -->
+<!--           <li> Grammar Packing</li> -->
+<!--            -->
+<!--           <li> Grammar Packing</li> -->
+<!--            -->
+<!--           <li> The Joshua Pipeline</li> -->
+<!--            -->
+<!--           <li> The Joshua Pipeline</li> -->
+<!--            -->
+<!--           <li> The Joshua Pipeline</li> -->
+<!--            -->
+<!--           <li> The Joshua Pipeline</li> -->
+<!--            -->
+<!--           <li> Quick Start</li> -->
+<!--            -->
+<!--           <li> Quick Start</li> -->
+<!--            -->
+<!--           <li> Releases</li> -->
+<!--            -->
+<!--           <li> Server mode</li> -->
+<!--            -->
+<!--           <li> Server mode</li> -->
+<!--            -->
+<!--           <li> Server mode</li> -->
+<!--            -->
+<!--           <li> Installing and running the Joshua Decoder</li> -->
+<!--            -->
+<!--           <li> Grammar extraction with Thrax</li> -->
+<!--            -->
+<!--           <li> Grammar extraction with Thrax</li> -->
+<!--            -->
+<!--           <li> Grammar extraction with Thrax</li> -->
+<!--            -->
+<!--           <li> Grammar extraction with Thrax</li> -->
+<!--            -->
+<!--           <li> Building Translation Models</li> -->
+<!--            -->
+<!--           <li> Building Translation Models</li> -->
+<!--            -->
+<!--           <li> Building Translation Models</li> -->
+<!--            -->
+<!--           <li> Building Translation Models</li> -->
+<!--            -->
+<!--           <li> Pipeline tutorial</li> -->
+<!--            -->
+<!--           <li> Pipeline tutorial</li> -->
+<!--            -->
+<!--           <li> Pipeline tutorial</li> -->
+<!--            -->
+<!--           <li> What's New</li> -->
+<!--            -->
+<!--           <li> What's New</li> -->
+<!--            -->
+<!--           <li> Z-MERT</li> -->
+<!--            -->
+<!--           <li> Z-MERT</li> -->
+<!--            -->
+<!--           <li> Z-MERT</li> -->
+<!--            -->
+<!--           <li> Z-MERT</li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--         </ul> -->
+<!--       </div>   -->
+
+      <div class="infobox">
+
+        <b>Links</b><br />
+        <ul>
+          <li> <a href="../index.html">Main</a> </li>
+          <li> <a href="pipeline.html">Pipeline</a> </li>
+          <li> <a href="step-by-step-instructions.html">Manual walkthrough</a> </li>
+          <li> <a href="decoder.html">Decoder</a> </li>
+          <li> <a href="server.html">Decoder Server</a> </li>
+          <li> <a href="file-formats.html">File formats</a> </li>
+          <li> <a href="thrax.html">Grammar Extraction</a> </li>
+          <li> <a href="../releases.html">Releases</a> </li>
+        </ul>
+      </div>
+
+      <div class="infobox">
+        <b>Advanced</b><br />
+        <ul>
+<!--          <li> <a href="packing.html">Grammar packing</a> </li> -->
+          <li> <a href="large-lms.html">Building large LMs</a> </li>
+          <li> <a href="zmert.html">Running Z-MERT</a> </li>
+          <li> <a href="lattice.html">Lattices</a> </li>
+          <li> <a href="server.html">TCP/IP server</a> </li>
+          <li> <a href="bundle.html">Bundled configuration</a> </li>
+        </ul>
+      </div>
+
+      <div class="infobox">
+        <b>Help</b><br />
+        <ul>
+          <li> <a href="faq.html">Answers</a> </li>
+          <li> <a href="https://groups.google.com/d/forum/joshua_support">Archive</a> </li>
+        </ul>
+      </div>
+
+      <div class="footer">
+        Last updated on April 08, 2016
+      </div>
+
+    </div>
+
+    <div id="main">
+      <div id="title">
+        <h1>Grammar extraction with Thrax</h1>
+      </div>
+
+      <div id="content">
+        
+        <p>One day, this will hold Thrax documentation, including how to use Thrax, how to do grammar
+filtering, and details on the configuration file options.  It will also include details about our
+experience setting up and maintaining Hadoop cluster installations, knowledge wrought of hard-fought
+sweat and tears.</p>
+
+<p>In the meantime, please bother <a href="http://cs.jhu.edu/~jonny/">Jonny Weese</a> if there is something you
+need to do that you don’t understand.  You might also be able to dig up some information <a href="http://cs.jhu.edu/~jonny/thrax/">on the old
+Thrax page</a>.</p>
+
+
+      </div>
+    </div>
+
+  </body>
+</html>
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-joshua-site/blob/53cc3005/4.0/thrax.md
----------------------------------------------------------------------
diff --git a/4.0/thrax.md b/4.0/thrax.md
deleted file mode 100644
index 6b276b0..0000000
--- a/4.0/thrax.md
+++ /dev/null
@@ -1,14 +0,0 @@
----
-layout: default4
-category: advanced
-title: Grammar extraction with Thrax
----
-
-One day, this will hold Thrax documentation, including how to use Thrax, how to do grammar
-filtering, and details on the configuration file options.  It will also include details about our
-experience setting up and maintaining Hadoop cluster installations, knowledge wrought of hard-fought
-sweat and tears.
-
-In the meantime, please bother [Jonny Weese](http://cs.jhu.edu/~jonny/) if there is something you
-need to do that you don't understand.  You might also be able to dig up some information [on the old
-Thrax page](http://cs.jhu.edu/~jonny/thrax/).

http://git-wip-us.apache.org/repos/asf/incubator-joshua-site/blob/53cc3005/4.0/tms.html
----------------------------------------------------------------------
diff --git a/4.0/tms.html b/4.0/tms.html
new file mode 100644
index 0000000..1e38df8
--- /dev/null
+++ b/4.0/tms.html
@@ -0,0 +1,377 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" type="text/css" media="screen,print" href="../joshua4.css" />
+    <title>Joshua | Building Translation Models</title>
+  </head>
+
+  <body>
+
+    <div id="navbar">
+      <a href="http://joshua-decoder.org/">
+        <img src="../images/joshua-logo-small.png" width="130px" 
+             alt="Joshua logo (picture of a Joshua tree)" />
+      </a>
+
+      <p class="infobox">
+        <b>Stable version</b><br />
+        4.1<br/><br/>
+        <b>Release date</b><br />
+        2013 January
+      </p>
+
+<!--       <div class="infobox"> -->
+<!--         <b>AUTO LINKS</b><br/> -->
+<!--         <ul> -->
+<!--            -->
+<!--           <li> Advanced features</li> -->
+<!--            -->
+<!--           <li> Advanced features</li> -->
+<!--            -->
+<!--           <li> Advanced features</li> -->
+<!--            -->
+<!--           <li> Building a language pack</li> -->
+<!--            -->
+<!--           <li> Building a language pack</li> -->
+<!--            -->
+<!--           <li> Bundling a configuration</li> -->
+<!--            -->
+<!--           <li> Contributors</li> -->
+<!--            -->
+<!--           <li> Decoder configuration parameters</li> -->
+<!--            -->
+<!--           <li> Decoder configuration parameters</li> -->
+<!--            -->
+<!--           <li> Decoder configuration parameters</li> -->
+<!--            -->
+<!--           <li> Decoder configuration parameters</li> -->
+<!--            -->
+<!--           <li> Frequently Asked Questions</li> -->
+<!--            -->
+<!--           <li> Common problems</li> -->
+<!--            -->
+<!--           <li> Frequently Asked Questions</li> -->
+<!--            -->
+<!--           <li> Common problems</li> -->
+<!--            -->
+<!--           <li> Features</li> -->
+<!--            -->
+<!--           <li> Features</li> -->
+<!--            -->
+<!--           <li> Features</li> -->
+<!--            -->
+<!--           <li> Features</li> -->
+<!--            -->
+<!--           <li> Joshua file formats</li> -->
+<!--            -->
+<!--           <li> Joshua file formats</li> -->
+<!--            -->
+<!--           <li> Joshua file formats</li> -->
+<!--            -->
+<!--           <li> Joshua file formats</li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> Fisher and CALLHOME Spanish English Speech Translation Corpus</li> -->
+<!--            -->
+<!--           <li> Indian Languages Parallel Corpora</li> -->
+<!--            -->
+<!--           <li> Joshua 4.0 User Documentation</li> -->
+<!--            -->
+<!--           <li> Language packs</li> -->
+<!--            -->
+<!--           <li> Paraphrase Packs</li> -->
+<!--            -->
+<!--           <li> Joshua releases</li> -->
+<!--            -->
+<!--           <li> Support</li> -->
+<!--            -->
+<!--           <li> Getting Started</li> -->
+<!--            -->
+<!--           <li> Welcome to Joshua</li> -->
+<!--            -->
+<!--           <li> Joshua documentation</li> -->
+<!--            -->
+<!--           <li> Joshua documentation</li> -->
+<!--            -->
+<!--           <li> Installation</li> -->
+<!--            -->
+<!--           <li> Installation</li> -->
+<!--            -->
+<!--           <li> Alignment with Jacana</li> -->
+<!--            -->
+<!--           <li> Alignment with Jacana</li> -->
+<!--            -->
+<!--           <li> Alignment with Jacana</li> -->
+<!--            -->
+<!--           <li> Building large LMs with SRILM</li> -->
+<!--            -->
+<!--           <li> Building large LMs with SRILM</li> -->
+<!--            -->
+<!--           <li> Building large LMs with SRILM</li> -->
+<!--            -->
+<!--           <li> Building large LMs with SRILM</li> -->
+<!--            -->
+<!--           <li> Lattice decoding</li> -->
+<!--            -->
+<!--           <li> Grammar Packing</li> -->
+<!--            -->
+<!--           <li> Grammar Packing</li> -->
+<!--            -->
+<!--           <li> Grammar Packing</li> -->
+<!--            -->
+<!--           <li> Grammar Packing</li> -->
+<!--            -->
+<!--           <li> The Joshua Pipeline</li> -->
+<!--            -->
+<!--           <li> The Joshua Pipeline</li> -->
+<!--            -->
+<!--           <li> The Joshua Pipeline</li> -->
+<!--            -->
+<!--           <li> The Joshua Pipeline</li> -->
+<!--            -->
+<!--           <li> Quick Start</li> -->
+<!--            -->
+<!--           <li> Quick Start</li> -->
+<!--            -->
+<!--           <li> Releases</li> -->
+<!--            -->
+<!--           <li> Server mode</li> -->
+<!--            -->
+<!--           <li> Server mode</li> -->
+<!--            -->
+<!--           <li> Server mode</li> -->
+<!--            -->
+<!--           <li> Installing and running the Joshua Decoder</li> -->
+<!--            -->
+<!--           <li> Grammar extraction with Thrax</li> -->
+<!--            -->
+<!--           <li> Grammar extraction with Thrax</li> -->
+<!--            -->
+<!--           <li> Grammar extraction with Thrax</li> -->
+<!--            -->
+<!--           <li> Grammar extraction with Thrax</li> -->
+<!--            -->
+<!--           <li> Building Translation Models</li> -->
+<!--            -->
+<!--           <li> Building Translation Models</li> -->
+<!--            -->
+<!--           <li> Building Translation Models</li> -->
+<!--            -->
+<!--           <li> Building Translation Models</li> -->
+<!--            -->
+<!--           <li> Pipeline tutorial</li> -->
+<!--            -->
+<!--           <li> Pipeline tutorial</li> -->
+<!--            -->
+<!--           <li> Pipeline tutorial</li> -->
+<!--            -->
+<!--           <li> What's New</li> -->
+<!--            -->
+<!--           <li> What's New</li> -->
+<!--            -->
+<!--           <li> Z-MERT</li> -->
+<!--            -->
+<!--           <li> Z-MERT</li> -->
+<!--            -->
+<!--           <li> Z-MERT</li> -->
+<!--            -->
+<!--           <li> Z-MERT</li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--         </ul> -->
+<!--       </div>   -->
+
+      <div class="infobox">
+
+        <b>Links</b><br />
+        <ul>
+          <li> <a href="../index.html">Main</a> </li>
+          <li> <a href="pipeline.html">Pipeline</a> </li>
+          <li> <a href="step-by-step-instructions.html">Manual walkthrough</a> </li>
+          <li> <a href="decoder.html">Decoder</a> </li>
+          <li> <a href="server.html">Decoder Server</a> </li>
+          <li> <a href="file-formats.html">File formats</a> </li>
+          <li> <a href="thrax.html">Grammar Extraction</a> </li>
+          <li> <a href="../releases.html">Releases</a> </li>
+        </ul>
+      </div>
+
+      <div class="infobox">
+        <b>Advanced</b><br />
+        <ul>
+<!--          <li> <a href="packing.html">Grammar packing</a> </li> -->
+          <li> <a href="large-lms.html">Building large LMs</a> </li>
+          <li> <a href="zmert.html">Running Z-MERT</a> </li>
+          <li> <a href="lattice.html">Lattices</a> </li>
+          <li> <a href="server.html">TCP/IP server</a> </li>
+          <li> <a href="bundle.html">Bundled configuration</a> </li>
+        </ul>
+      </div>
+
+      <div class="infobox">
+        <b>Help</b><br />
+        <ul>
+          <li> <a href="faq.html">Answers</a> </li>
+          <li> <a href="https://groups.google.com/d/forum/joshua_support">Archive</a> </li>
+        </ul>
+      </div>
+
+      <div class="footer">
+        Last updated on April 08, 2016
+      </div>
+
+    </div>
+
+    <div id="main">
+      <div id="title">
+        <h1>Building Translation Models</h1>
+      </div>
+
+      <div id="content">
+        
+        <h1 id="build-a-translation-model">Build a translation model</h1>
+
+<p>Extracting a grammar from a large amount of data is a multi-step process. The first requirement is parallel data. The Europarl, Call Home, and Fisher corpora all contain parallel translations of Spanish and English sentences.</p>
+
+<p>We will copy (or symlink) the parallel source text files in a subdirectory called <code class="highlighter-rouge">input/</code>.</p>
+
+<p>Then, we concatenate all the training files on each side. The pipeline script normally does tokenization and normalization, but in this instance we have a custom tokenizer we need to apply to the source side, so we have to do it manually and then skip that step using the <code class="highlighter-rouge">pipeline.pl</code> option <code class="highlighter-rouge">--first-step alignment</code>.</p>
+
+<ul>
+  <li>
+    <p>to tokenize the English data, do</p>
+
+    <table>
+      <tbody>
+        <tr>
+          <td>cat callhome.en europarl.en fisher.en &gt; all.en</td>
+          <td>$JOSHUA/scripts/training/normalize-punctuation.pl en</td>
+          <td>$JOSHUA/scripts/training/penn-treebank-tokenizer.perl</td>
+          <td>$JOSHUA/scripts/lowercase.perl &gt; all.norm.tok.lc.en</td>
+        </tr>
+      </tbody>
+    </table>
+  </li>
+</ul>
+
+<p>The same can be done for the Spanish side of the input data:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>cat callhome.es europarl.es fisher.es &gt; all.es | $JOSHUA/scripts/training/normalize-punctuation.pl es | $JOSHUA/scripts/training/penn-treebank-tokenizer.perl | $JOSHUA/scripts/lowercase.perl &gt; all.norm.tok.lc.es
+</code></pre>
+</div>
+
+<p>By the way, an alternative tokenizer is a Twitter tokenizer found in the <a href="http://github.com/vandurme/jerboa">Jerboa</a> project.</p>
+
+<p>The final step in the training data preparation is to remove all examples in which either of the language sides is a blank line.</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>paste all.norm.tok.lc.es all.norm.tok.lc.en | grep -Pv "^\t|\t$" \
+  | ./splittabs.pl all.norm.tok.lc.noblanks.es all.norm.tok.lc.noblanks.en
+</code></pre>
+</div>
+
+<p>contents of <code class="highlighter-rouge">splittabls.pl</code> by Matt Post:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span class="c1">#!/usr/bin/perl</span>
+
+<span class="c1"># splits on tab, printing respective chunks to the list of files given</span>
+<span class="c1"># as script arguments</span>
+
+<span class="k">use</span> <span class="nv">FileHandle</span><span class="p">;</span>
+
+<span class="k">my</span> <span class="nv">@fh</span><span class="p">;</span>
+<span class="vg">$|</span> <span class="o">=</span> <span class="mi">1</span><span class="p">;</span>   <span class="c1"># don't buffer output</span>
+
+<span class="k">if</span> <span class="p">(</span><span class="nv">@ARGV</span> <span class="o">&lt;</span> <span class="mi">0</span><span class="p">)</span> <span class="p">{</span>
+  <span class="k">print</span> <span class="s">"Usage: splittabs.pl &lt; tabbed-file\n"</span><span class="p">;</span>
+  <span class="nb">exit</span><span class="p">;</span>
+<span class="p">}</span>
+
+<span class="k">my</span> <span class="nv">@fh</span> <span class="o">=</span> <span class="nb">map</span> <span class="p">{</span> <span class="nv">get_filehandle</span><span class="p">(</span><span class="nv">$_</span><span class="p">)</span> <span class="p">}</span> <span class="nv">@ARGV</span><span class="p">;</span>
+<span class="nv">@ARGV</span> <span class="o">=</span> <span class="p">();</span>
+
+<span class="k">while</span> <span class="p">(</span><span class="k">my</span> <span class="nv">$line</span> <span class="o">=</span> <span class="o">&lt;&gt;</span><span class="p">)</span> <span class="p">{</span>
+  <span class="nb">chomp</span><span class="p">(</span><span class="nv">$line</span><span class="p">);</span>
+  <span class="k">my</span> <span class="p">(</span><span class="nv">@fields</span><span class="p">)</span> <span class="o">=</span> <span class="nb">split</span><span class="p">(</span><span class="sr">/\t/</span><span class="p">,</span><span class="nv">$line</span><span class="p">,</span><span class="nb">scalar</span> <span class="nv">@fh</span><span class="p">);</span>
+
+  <span class="nb">map</span> <span class="p">{</span> <span class="k">print</span> <span class="p">{</span><span class="nv">$fh</span><span class="p">[</span><span class="nv">$_</span><span class="p">]}</span> <span class="s">"$fields[$_]\n"</span> <span class="p">}</span> <span class="p">(</span><span class="mi">0</span><span class="o">..</span><span class="nv">$#fields</span><span class="p">);</span>
+<span class="p">}</span>
+
+<span class="k">sub </span><span class="nf">get_filehandle</span> <span class="p">{</span>
+    <span class="k">my</span> <span class="nv">$file</span> <span class="o">=</span> <span class="nb">shift</span><span class="p">;</span>
+
+    <span class="k">if</span> <span class="p">(</span><span class="nv">$file</span> <span class="ow">eq</span> <span class="s">"-"</span><span class="p">)</span> <span class="p">{</span>
+        <span class="k">return</span> <span class="o">*</span><span class="bp">STDOUT</span><span class="p">;</span>
+    <span class="p">}</span> <span class="k">else</span> <span class="p">{</span>
+        <span class="nb">local</span> <span class="o">*</span><span class="nv">FH</span><span class="p">;</span>
+        <span class="nb">open</span> <span class="nv">FH</span><span class="p">,</span> <span class="s">"&gt;$file"</span> <span class="ow">or</span> <span class="nb">die</span> <span class="s">"can't open '$file' for writing"</span><span class="p">;</span>
+        <span class="k">return</span> <span class="o">*</span><span class="nv">FH</span><span class="p">;</span>
+    <span class="p">}</span>
+<span class="p">}</span>
+</code></pre>
+</div>
+
+<p>Now we can run the pipeline to extract the grammar. Run the following script:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code><span class="c">#!/bin/bash</span>
+
+<span class="c"># this creates a grammar</span>
+
+<span class="c"># NEED:</span>
+<span class="c"># pair</span>
+<span class="c"># type</span>
+
+<span class="nb">set</span> -u
+
+<span class="nv">pair</span><span class="o">=</span>es-en
+<span class="nb">type</span><span class="o">=</span>hiero
+
+<span class="c">#. ~/.bashrc</span>
+
+<span class="c">#basedir=$(pwd)</span>
+
+<span class="nv">dir</span><span class="o">=</span>grammar-<span class="nv">$pair</span>-<span class="nv">$type</span>
+
+<span class="o">[[</span> ! -d <span class="nv">$dir</span> <span class="o">]]</span> <span class="o">&amp;&amp;</span> mkdir -p <span class="nv">$dir</span>
+<span class="nb">cd</span> <span class="nv">$dir</span>
+
+<span class="nb">source</span><span class="o">=</span><span class="k">$(</span><span class="nb">echo</span> <span class="nv">$pair</span> | cut -d- -f 1<span class="k">)</span>
+<span class="nv">target</span><span class="o">=</span><span class="k">$(</span><span class="nb">echo</span> <span class="nv">$pair</span> | cut -d- -f 2<span class="k">)</span>
+
+<span class="nv">$JOSHUA</span>/scripts/training/pipeline.pl <span class="se">\</span>
+  --source <span class="nv">$source</span> <span class="se">\</span>
+  --target <span class="nv">$target</span> <span class="se">\</span>
+  --corpus /home/hltcoe/lorland/expts/scale12/model1/input/all.norm.tok.lc.noblanks <span class="se">\</span>
+  --type <span class="nv">$type</span> <span class="se">\</span>
+  --joshua-mem 100g <span class="se">\</span>
+  --no-prepare <span class="se">\</span>
+  --first-step align <span class="se">\</span>
+  --last-step thrax <span class="se">\</span>
+  --hadoop <span class="nv">$HADOOP</span> <span class="se">\</span>
+  --threads 8 <span class="se">\</span>
+</code></pre>
+</div>
+
+
+      </div>
+    </div>
+
+  </body>
+</html>
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-joshua-site/blob/53cc3005/4.0/tms.md
----------------------------------------------------------------------
diff --git a/4.0/tms.md b/4.0/tms.md
deleted file mode 100644
index a86a311..0000000
--- a/4.0/tms.md
+++ /dev/null
@@ -1,106 +0,0 @@
----
-layout: default4
-category: advanced
-title: Building Translation Models
----
-
-# Build a translation model
-
-Extracting a grammar from a large amount of data is a multi-step process. The first requirement is parallel data. The Europarl, Call Home, and Fisher corpora all contain parallel translations of Spanish and English sentences.
-
-We will copy (or symlink) the parallel source text files in a subdirectory called `input/`.
-
-Then, we concatenate all the training files on each side. The pipeline script normally does tokenization and normalization, but in this instance we have a custom tokenizer we need to apply to the source side, so we have to do it manually and then skip that step using the `pipeline.pl` option `--first-step alignment`.
-
-* to tokenize the English data, do
-
-    cat callhome.en europarl.en fisher.en > all.en | $JOSHUA/scripts/training/normalize-punctuation.pl en | $JOSHUA/scripts/training/penn-treebank-tokenizer.perl | $JOSHUA/scripts/lowercase.perl > all.norm.tok.lc.en
-
-The same can be done for the Spanish side of the input data:
-
-    cat callhome.es europarl.es fisher.es > all.es | $JOSHUA/scripts/training/normalize-punctuation.pl es | $JOSHUA/scripts/training/penn-treebank-tokenizer.perl | $JOSHUA/scripts/lowercase.perl > all.norm.tok.lc.es
-
-By the way, an alternative tokenizer is a Twitter tokenizer found in the [Jerboa](http://github.com/vandurme/jerboa) project.
-
-The final step in the training data preparation is to remove all examples in which either of the language sides is a blank line.
-
-    paste all.norm.tok.lc.es all.norm.tok.lc.en | grep -Pv "^\t|\t$" \
-      | ./splittabs.pl all.norm.tok.lc.noblanks.es all.norm.tok.lc.noblanks.en
-
-contents of `splittabls.pl` by Matt Post:
-
-    #!/usr/bin/perl
-
-    # splits on tab, printing respective chunks to the list of files given
-    # as script arguments
-
-    use FileHandle;
-
-    my @fh;
-    $| = 1;   # don't buffer output
-
-    if (@ARGV < 0) {
-      print "Usage: splittabs.pl < tabbed-file\n";
-      exit;
-    }
-
-    my @fh = map { get_filehandle($_) } @ARGV;
-    @ARGV = ();
-
-    while (my $line = <>) {
-      chomp($line);
-      my (@fields) = split(/\t/,$line,scalar @fh);
-
-      map { print {$fh[$_]} "$fields[$_]\n" } (0..$#fields);
-    }
-
-    sub get_filehandle {
-        my $file = shift;
-
-        if ($file eq "-") {
-            return *STDOUT;
-        } else {
-            local *FH;
-            open FH, ">$file" or die "can't open '$file' for writing";
-            return *FH;
-        }
-    }
-
-Now we can run the pipeline to extract the grammar. Run the following script:
-
-    #!/bin/bash
-
-    # this creates a grammar
-
-    # NEED:
-    # pair
-    # type
-
-    set -u
-
-    pair=es-en
-    type=hiero
-
-    #. ~/.bashrc
-
-    #basedir=$(pwd)
-
-    dir=grammar-$pair-$type
-
-    [[ ! -d $dir ]] && mkdir -p $dir
-    cd $dir
-
-    source=$(echo $pair | cut -d- -f 1)
-    target=$(echo $pair | cut -d- -f 2)
-
-    $JOSHUA/scripts/training/pipeline.pl \
-      --source $source \
-      --target $target \
-      --corpus /home/hltcoe/lorland/expts/scale12/model1/input/all.norm.tok.lc.noblanks \
-      --type $type \
-      --joshua-mem 100g \
-      --no-prepare \
-      --first-step align \
-      --last-step thrax \
-      --hadoop $HADOOP \
-      --threads 8 \

http://git-wip-us.apache.org/repos/asf/incubator-joshua-site/blob/53cc3005/4.0/zmert.html
----------------------------------------------------------------------
diff --git a/4.0/zmert.html b/4.0/zmert.html
new file mode 100644
index 0000000..a589161
--- /dev/null
+++ b/4.0/zmert.html
@@ -0,0 +1,339 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+
+<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="en" lang="en">
+  <head>
+    <meta http-equiv="Content-Type" content="text/html; charset=UTF-8" />
+    <link rel="stylesheet" type="text/css" media="screen,print" href="../joshua4.css" />
+    <title>Joshua | Z-MERT</title>
+  </head>
+
+  <body>
+
+    <div id="navbar">
+      <a href="http://joshua-decoder.org/">
+        <img src="../images/joshua-logo-small.png" width="130px" 
+             alt="Joshua logo (picture of a Joshua tree)" />
+      </a>
+
+      <p class="infobox">
+        <b>Stable version</b><br />
+        4.1<br/><br/>
+        <b>Release date</b><br />
+        2013 January
+      </p>
+
+<!--       <div class="infobox"> -->
+<!--         <b>AUTO LINKS</b><br/> -->
+<!--         <ul> -->
+<!--            -->
+<!--           <li> Advanced features</li> -->
+<!--            -->
+<!--           <li> Advanced features</li> -->
+<!--            -->
+<!--           <li> Advanced features</li> -->
+<!--            -->
+<!--           <li> Building a language pack</li> -->
+<!--            -->
+<!--           <li> Building a language pack</li> -->
+<!--            -->
+<!--           <li> Bundling a configuration</li> -->
+<!--            -->
+<!--           <li> Contributors</li> -->
+<!--            -->
+<!--           <li> Decoder configuration parameters</li> -->
+<!--            -->
+<!--           <li> Decoder configuration parameters</li> -->
+<!--            -->
+<!--           <li> Decoder configuration parameters</li> -->
+<!--            -->
+<!--           <li> Decoder configuration parameters</li> -->
+<!--            -->
+<!--           <li> Frequently Asked Questions</li> -->
+<!--            -->
+<!--           <li> Common problems</li> -->
+<!--            -->
+<!--           <li> Frequently Asked Questions</li> -->
+<!--            -->
+<!--           <li> Common problems</li> -->
+<!--            -->
+<!--           <li> Features</li> -->
+<!--            -->
+<!--           <li> Features</li> -->
+<!--            -->
+<!--           <li> Features</li> -->
+<!--            -->
+<!--           <li> Features</li> -->
+<!--            -->
+<!--           <li> Joshua file formats</li> -->
+<!--            -->
+<!--           <li> Joshua file formats</li> -->
+<!--            -->
+<!--           <li> Joshua file formats</li> -->
+<!--            -->
+<!--           <li> Joshua file formats</li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> Fisher and CALLHOME Spanish English Speech Translation Corpus</li> -->
+<!--            -->
+<!--           <li> Indian Languages Parallel Corpora</li> -->
+<!--            -->
+<!--           <li> Joshua 4.0 User Documentation</li> -->
+<!--            -->
+<!--           <li> Language packs</li> -->
+<!--            -->
+<!--           <li> Paraphrase Packs</li> -->
+<!--            -->
+<!--           <li> Joshua releases</li> -->
+<!--            -->
+<!--           <li> Support</li> -->
+<!--            -->
+<!--           <li> Getting Started</li> -->
+<!--            -->
+<!--           <li> Welcome to Joshua</li> -->
+<!--            -->
+<!--           <li> Joshua documentation</li> -->
+<!--            -->
+<!--           <li> Joshua documentation</li> -->
+<!--            -->
+<!--           <li> Installation</li> -->
+<!--            -->
+<!--           <li> Installation</li> -->
+<!--            -->
+<!--           <li> Alignment with Jacana</li> -->
+<!--            -->
+<!--           <li> Alignment with Jacana</li> -->
+<!--            -->
+<!--           <li> Alignment with Jacana</li> -->
+<!--            -->
+<!--           <li> Building large LMs with SRILM</li> -->
+<!--            -->
+<!--           <li> Building large LMs with SRILM</li> -->
+<!--            -->
+<!--           <li> Building large LMs with SRILM</li> -->
+<!--            -->
+<!--           <li> Building large LMs with SRILM</li> -->
+<!--            -->
+<!--           <li> Lattice decoding</li> -->
+<!--            -->
+<!--           <li> Grammar Packing</li> -->
+<!--            -->
+<!--           <li> Grammar Packing</li> -->
+<!--            -->
+<!--           <li> Grammar Packing</li> -->
+<!--            -->
+<!--           <li> Grammar Packing</li> -->
+<!--            -->
+<!--           <li> The Joshua Pipeline</li> -->
+<!--            -->
+<!--           <li> The Joshua Pipeline</li> -->
+<!--            -->
+<!--           <li> The Joshua Pipeline</li> -->
+<!--            -->
+<!--           <li> The Joshua Pipeline</li> -->
+<!--            -->
+<!--           <li> Quick Start</li> -->
+<!--            -->
+<!--           <li> Quick Start</li> -->
+<!--            -->
+<!--           <li> Releases</li> -->
+<!--            -->
+<!--           <li> Server mode</li> -->
+<!--            -->
+<!--           <li> Server mode</li> -->
+<!--            -->
+<!--           <li> Server mode</li> -->
+<!--            -->
+<!--           <li> Installing and running the Joshua Decoder</li> -->
+<!--            -->
+<!--           <li> Grammar extraction with Thrax</li> -->
+<!--            -->
+<!--           <li> Grammar extraction with Thrax</li> -->
+<!--            -->
+<!--           <li> Grammar extraction with Thrax</li> -->
+<!--            -->
+<!--           <li> Grammar extraction with Thrax</li> -->
+<!--            -->
+<!--           <li> Building Translation Models</li> -->
+<!--            -->
+<!--           <li> Building Translation Models</li> -->
+<!--            -->
+<!--           <li> Building Translation Models</li> -->
+<!--            -->
+<!--           <li> Building Translation Models</li> -->
+<!--            -->
+<!--           <li> Pipeline tutorial</li> -->
+<!--            -->
+<!--           <li> Pipeline tutorial</li> -->
+<!--            -->
+<!--           <li> Pipeline tutorial</li> -->
+<!--            -->
+<!--           <li> What's New</li> -->
+<!--            -->
+<!--           <li> What's New</li> -->
+<!--            -->
+<!--           <li> Z-MERT</li> -->
+<!--            -->
+<!--           <li> Z-MERT</li> -->
+<!--            -->
+<!--           <li> Z-MERT</li> -->
+<!--            -->
+<!--           <li> Z-MERT</li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--           <li> </li> -->
+<!--            -->
+<!--         </ul> -->
+<!--       </div>   -->
+
+      <div class="infobox">
+
+        <b>Links</b><br />
+        <ul>
+          <li> <a href="../index.html">Main</a> </li>
+          <li> <a href="pipeline.html">Pipeline</a> </li>
+          <li> <a href="step-by-step-instructions.html">Manual walkthrough</a> </li>
+          <li> <a href="decoder.html">Decoder</a> </li>
+          <li> <a href="server.html">Decoder Server</a> </li>
+          <li> <a href="file-formats.html">File formats</a> </li>
+          <li> <a href="thrax.html">Grammar Extraction</a> </li>
+          <li> <a href="../releases.html">Releases</a> </li>
+        </ul>
+      </div>
+
+      <div class="infobox">
+        <b>Advanced</b><br />
+        <ul>
+<!--          <li> <a href="packing.html">Grammar packing</a> </li> -->
+          <li> <a href="large-lms.html">Building large LMs</a> </li>
+          <li> <a href="zmert.html">Running Z-MERT</a> </li>
+          <li> <a href="lattice.html">Lattices</a> </li>
+          <li> <a href="server.html">TCP/IP server</a> </li>
+          <li> <a href="bundle.html">Bundled configuration</a> </li>
+        </ul>
+      </div>
+
+      <div class="infobox">
+        <b>Help</b><br />
+        <ul>
+          <li> <a href="faq.html">Answers</a> </li>
+          <li> <a href="https://groups.google.com/d/forum/joshua_support">Archive</a> </li>
+        </ul>
+      </div>
+
+      <div class="footer">
+        Last updated on April 08, 2016
+      </div>
+
+    </div>
+
+    <div id="main">
+      <div id="title">
+        <h1>Z-MERT</h1>
+      </div>
+
+      <div id="content">
+        
+        <p>This document describes how to manually run the ZMERT module.  ZMERT is Joshua’s minimum error-rate
+training module, written by Omar F. Zaidan.  It is easily adapted to drop in different decoders, and
+was also written so as to work with different objective functions (other than BLEU).</p>
+
+<p>((Section (1) in <code class="highlighter-rouge">$JOSHUA/examples/ZMERT/README_ZMERT.txt</code> is an expanded version of this section))</p>
+
+<p>Z-MERT, can be used by launching the driver program (<code class="highlighter-rouge">ZMERT.java</code>), which expects a config file as
+its main argument.  This config file can be used to specify any subset of Z-MERT’s 20-some
+parameters.  For a full list of those parameters, and their default values, run ZMERT with a single
+-h argument as follows:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>java -cp $JOSHUA/bin joshua.zmert.ZMERT -h
+</code></pre>
+</div>
+
+<p>So what does a Z-MERT config file look like?</p>
+
+<p>Examine the file <code class="highlighter-rouge">examples/ZMERT/ZMERT_config_ex2.txt</code>.  You will find that it
+specifies the following “main” MERT parameters:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>(*) -dir dirPrefix:         working directory
+(*) -s sourceFile:          source sentences (foreign sentences) of the MERT dataset
+(*) -r refFile:             target sentences (reference translations) of the MERT dataset
+(*) -rps refsPerSen:        number of reference translations per sentence
+(*) -p paramsFile:          file containing parameter names, initial values, and ranges
+(*) -maxIt maxMERTIts:      maximum number of MERT iterations
+(*) -ipi initsPerIt:        number of intermediate initial points per iteration
+(*) -cmd commandFile:       name of file containing commands to run the decoder
+(*) -decOut decoderOutFile: name of the output file produced by the decoder
+(*) -dcfg decConfigFile:    name of decoder config file
+(*) -N N:                   size of N-best list (per sentence) generated in each MERT iteration
+(*) -v verbosity:           output verbosity level (0-2; higher value =&gt; more verbose)
+(*) -seed seed:             seed used to initialize the random number generator
+</code></pre>
+</div>
+
+<p>(Note that the <code class="highlighter-rouge">-s</code> parameter is only used if Z-MERT is running Joshua as an
+ internal decoder.  If Joshua is run as an external decoder, as is the case in
+ this README, then this parameter is ignored.)</p>
+
+<p>To test Z-MERT on the 100-sentence test set of example2, provide this config
+file to Z-MERT as follows:</p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>java -cp bin joshua.zmert.ZMERT -maxMem 500 examples/ZMERT/ZMERT_config_ex2.txt &gt; examples/ZMERT/ZMERT_example/ZMERT.out
+</code></pre>
+</div>
+
+<p>This will run Z-MERT for a couple of iterations on the data from the example2
+folder.  (Notice that we have made copies of the source and reference files
+from example2 and renamed them as src.txt and ref.* in the MERT_example folder,
+just to have all the files needed by Z-MERT in one place.)  Once the Z-MERT run
+is complete, you should be able to inspect the log file to see what kinds of
+things it did.  If everything goes well, the run should take a few minutes, of
+which more than 95% is time spent by Z-MERT waiting on Joshua to finish
+decoding the sentences (once per iteration).</p>
+
+<p>The output file you get should be equivalent to <code class="highlighter-rouge">ZMERT.out.verbosity1</code>.  If you
+rerun the experiment with the verbosity (-v) argument set to 2 instead of 1,
+the output file you get should be equivalent to <code class="highlighter-rouge">ZMERT.out.verbosity2</code>, which has
+more interesting details about what Z-MERT does.</p>
+
+<p>Notice the additional <code class="highlighter-rouge">-maxMem</code> argument.  It tells Z-MERT that it should not
+persist to use up memory while the decoder is running (during which time Z-MERT
+would be idle).  The 500 tells Z-MERT that it can only use a maximum of 500 MB.
+For more details on this issue, see section (4) in Z-MERT’s README.</p>
+
+<p>A quick note about Z-MERT’s interaction with the decoder.  If you examine the
+file <code class="highlighter-rouge">decoder_command_ex2.txt</code>, which is provided as the commandFile (<code class="highlighter-rouge">-cmd</code>)
+argument in Z-MERT’s config file, you’ll find it contains the command one would
+use to run the decoder.  Z-MERT launches the commandFile as an external
+process, and assumes that it will launch the decoder to produce translations.
+(Make sure that commandFile is executable.)  After launching this external
+process, Z-MERT waits for it to finish, then uses the resulting output file for
+parameter tuning (in addition to the output files from previous iterations).
+The command file here only has a single command, but your command file could
+have multiple lines.  Just make sure the command file itself is executable.</p>
+
+<p>Notice that the Z-MERT arguments <code class="highlighter-rouge">configFile</code> and <code class="highlighter-rouge">decoderOutFile</code> (<code class="highlighter-rouge">-cfg</code> and
+<code class="highlighter-rouge">-decOut</code>) must match the two Joshua arguments in the commandFile’s (<code class="highlighter-rouge">-cmd</code>) single
+command.  Also, the Z-MERT argument for N must match the value for <code class="highlighter-rouge">top_n</code> in
+Joshua’s config file, indicated by the Z-MERT argument configFile (<code class="highlighter-rouge">-cfg</code>).</p>
+
+<p>For more details on Z-MERT, refer to <code class="highlighter-rouge">$JOSHUA/examples/ZMERT/README_ZMERT.txt</code></p>
+
+
+      </div>
+    </div>
+
+  </body>
+</html>
+
+
+
+
+

http://git-wip-us.apache.org/repos/asf/incubator-joshua-site/blob/53cc3005/4.0/zmert.md
----------------------------------------------------------------------
diff --git a/4.0/zmert.md b/4.0/zmert.md
deleted file mode 100644
index 538a2ac..0000000
--- a/4.0/zmert.md
+++ /dev/null
@@ -1,83 +0,0 @@
----
-layout: default4
-category: advanced
-title: Z-MERT
----
-
-This document describes how to manually run the ZMERT module.  ZMERT is Joshua's minimum error-rate
-training module, written by Omar F. Zaidan.  It is easily adapted to drop in different decoders, and
-was also written so as to work with different objective functions (other than BLEU).
-
-((Section (1) in `$JOSHUA/examples/ZMERT/README_ZMERT.txt` is an expanded version of this section))
-
-Z-MERT, can be used by launching the driver program (`ZMERT.java`), which expects a config file as
-its main argument.  This config file can be used to specify any subset of Z-MERT's 20-some
-parameters.  For a full list of those parameters, and their default values, run ZMERT with a single
--h argument as follows:
-
-    java -cp $JOSHUA/bin joshua.zmert.ZMERT -h
-
-So what does a Z-MERT config file look like?
-
-Examine the file `examples/ZMERT/ZMERT_config_ex2.txt`.  You will find that it
-specifies the following "main" MERT parameters:
-
-    (*) -dir dirPrefix:         working directory
-    (*) -s sourceFile:          source sentences (foreign sentences) of the MERT dataset
-    (*) -r refFile:             target sentences (reference translations) of the MERT dataset
-    (*) -rps refsPerSen:        number of reference translations per sentence
-    (*) -p paramsFile:          file containing parameter names, initial values, and ranges
-    (*) -maxIt maxMERTIts:      maximum number of MERT iterations
-    (*) -ipi initsPerIt:        number of intermediate initial points per iteration
-    (*) -cmd commandFile:       name of file containing commands to run the decoder
-    (*) -decOut decoderOutFile: name of the output file produced by the decoder
-    (*) -dcfg decConfigFile:    name of decoder config file
-    (*) -N N:                   size of N-best list (per sentence) generated in each MERT iteration
-    (*) -v verbosity:           output verbosity level (0-2; higher value => more verbose)
-    (*) -seed seed:             seed used to initialize the random number generator
-
-(Note that the `-s` parameter is only used if Z-MERT is running Joshua as an
- internal decoder.  If Joshua is run as an external decoder, as is the case in
- this README, then this parameter is ignored.)
-
-To test Z-MERT on the 100-sentence test set of example2, provide this config
-file to Z-MERT as follows:
-
-    java -cp bin joshua.zmert.ZMERT -maxMem 500 examples/ZMERT/ZMERT_config_ex2.txt > examples/ZMERT/ZMERT_example/ZMERT.out
-
-This will run Z-MERT for a couple of iterations on the data from the example2
-folder.  (Notice that we have made copies of the source and reference files
-from example2 and renamed them as src.txt and ref.* in the MERT_example folder,
-just to have all the files needed by Z-MERT in one place.)  Once the Z-MERT run
-is complete, you should be able to inspect the log file to see what kinds of
-things it did.  If everything goes well, the run should take a few minutes, of
-which more than 95% is time spent by Z-MERT waiting on Joshua to finish
-decoding the sentences (once per iteration).
-
-The output file you get should be equivalent to `ZMERT.out.verbosity1`.  If you
-rerun the experiment with the verbosity (-v) argument set to 2 instead of 1,
-the output file you get should be equivalent to `ZMERT.out.verbosity2`, which has
-more interesting details about what Z-MERT does.
-
-Notice the additional `-maxMem` argument.  It tells Z-MERT that it should not
-persist to use up memory while the decoder is running (during which time Z-MERT
-would be idle).  The 500 tells Z-MERT that it can only use a maximum of 500 MB.
-For more details on this issue, see section (4) in Z-MERT's README.
-
-A quick note about Z-MERT's interaction with the decoder.  If you examine the
-file `decoder_command_ex2.txt`, which is provided as the commandFile (`-cmd`)
-argument in Z-MERT's config file, you'll find it contains the command one would
-use to run the decoder.  Z-MERT launches the commandFile as an external
-process, and assumes that it will launch the decoder to produce translations.
-(Make sure that commandFile is executable.)  After launching this external
-process, Z-MERT waits for it to finish, then uses the resulting output file for
-parameter tuning (in addition to the output files from previous iterations).
-The command file here only has a single command, but your command file could
-have multiple lines.  Just make sure the command file itself is executable.
-
-Notice that the Z-MERT arguments `configFile` and `decoderOutFile` (`-cfg` and
-`-decOut`) must match the two Joshua arguments in the commandFile's (`-cmd`) single
-command.  Also, the Z-MERT argument for N must match the value for `top_n` in
-Joshua's config file, indicated by the Z-MERT argument configFile (`-cfg`).
-
-For more details on Z-MERT, refer to `$JOSHUA/examples/ZMERT/README_ZMERT.txt`

http://git-wip-us.apache.org/repos/asf/incubator-joshua-site/blob/53cc3005/5.0/advanced.html
----------------------------------------------------------------------
diff --git a/5.0/advanced.html b/5.0/advanced.html
new file mode 100644
index 0000000..ad963e7
--- /dev/null
+++ b/5.0/advanced.html
@@ -0,0 +1,170 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Joshua Documentation | Advanced features</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <meta name="description" content="">
+    <meta name="author" content="">
+
+    <!-- Le styles -->
+    <link href="/bootstrap/css/bootstrap.css" rel="stylesheet">
+    <style>
+      body {
+        padding-top: 60px; /* 60px to make the container go all the way to the bottom of the topbar */
+      }
+      #download {
+          background-color: green;
+          font-size: 14pt;
+          font-weight: bold;
+          text-align: center;
+          color: white;
+          border-radius: 5px;
+          padding: 4px;
+      }
+
+      #download a:link {
+          color: white;
+      }
+
+      #download a:hover {
+          color: lightgrey;
+      }
+
+      #download a:visited {
+          color: white;
+      }
+
+      a.pdf {
+          font-variant: small-caps;
+          /* font-weight: bold; */
+          font-size: 10pt;
+          color: white;
+          background: brown;
+          padding: 2px;
+      }
+
+      a.bibtex {
+          font-variant: small-caps;
+          /* font-weight: bold; */
+          font-size: 10pt;
+          color: white;
+          background: orange;
+          padding: 2px;
+      }
+
+      img.sponsor {
+        height: 120px;
+        margin: 5px;
+      }
+    </style>
+    <link href="bootstrap/css/bootstrap-responsive.css" rel="stylesheet">
+
+    <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
+    <!--[if lt IE 9]>
+      <script src="bootstrap/js/html5shiv.js"></script>
+    <![endif]-->
+
+    <!-- Fav and touch icons -->
+    <link rel="apple-touch-icon-precomposed" sizes="144x144" href="bootstrap/ico/apple-touch-icon-144-precomposed.png">
+    <link rel="apple-touch-icon-precomposed" sizes="114x114" href="bootstrap/ico/apple-touch-icon-114-precomposed.png">
+      <link rel="apple-touch-icon-precomposed" sizes="72x72" href="bootstrap/ico/apple-touch-icon-72-precomposed.png">
+                    <link rel="apple-touch-icon-precomposed" href="bootstrap/ico/apple-touch-icon-57-precomposed.png">
+                                   <link rel="shortcut icon" href="bootstrap/ico/favicon.png">
+  </head>
+
+  <body>
+
+    <div class="navbar navbar-inverse navbar-fixed-top">
+      <div class="navbar-inner">
+        <div class="container">
+          <button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <a class="brand" href="/">Joshua</a>
+          <div class="nav-collapse collapse">
+            <ul class="nav">
+              <li><a href="index.html">Documentation</a></li>
+              <li><a href="pipeline.html">Pipeline</a></li>
+              <li><a href="tutorial.html">Tutorial</a></li>
+              <li><a href="decoder.html">Decoder</a></li>
+              <li><a href="thrax.html">Thrax</a></li>
+              <li><a href="file-formats.html">File formats</a></li>
+              <!-- <li><a href="advanced.html">Advanced</a></li> -->
+              <li><a href="faq.html">FAQ</a></li>
+            </ul>
+          </div><!--/.nav-collapse -->
+        </div>
+      </div>
+    </div>
+
+    <div class="container">
+
+      <div class="row">
+        <div class="span2">
+          <img src="/images/joshua-logo-small.png" 
+               alt="Joshua logo (picture of a Joshua tree)" />
+        </div>
+        <div class="span10">
+          <h1>Joshua Documentation</h1>
+          <h2>Advanced features</h2>
+          <span id="download">
+            <a href="http://cs.jhu.edu/~post/files/joshua-v5.0.tgz">Download</a>
+          </span>
+          &nbsp; (version 5.0, released 16 August 2013)
+        </div>
+      </div>
+      
+      <hr />
+
+      <div class="row">
+        <div class="span8">
+
+          
+
+
+        </div>
+      </div>
+    </div> <!-- /container -->
+
+    <!-- Le javascript
+    ================================================== -->
+    <!-- Placed at the end of the document so the pages load faster -->
+    <script src="bootstrap/js/jquery.js"></script>
+    <script src="bootstrap/js/bootstrap-transition.js"></script>
+    <script src="bootstrap/js/bootstrap-alert.js"></script>
+    <script src="bootstrap/js/bootstrap-modal.js"></script>
+    <script src="bootstrap/js/bootstrap-dropdown.js"></script>
+    <script src="bootstrap/js/bootstrap-scrollspy.js"></script>
+    <script src="bootstrap/js/bootstrap-tab.js"></script>
+    <script src="bootstrap/js/bootstrap-tooltip.js"></script>
+    <script src="bootstrap/js/bootstrap-popover.js"></script>
+    <script src="bootstrap/js/bootstrap-button.js"></script>
+    <script src="bootstrap/js/bootstrap-collapse.js"></script>
+    <script src="bootstrap/js/bootstrap-carousel.js"></script>
+    <script src="bootstrap/js/bootstrap-typeahead.js"></script>
+
+    <!-- Start of StatCounter Code for Default Guide -->
+    <script type="text/javascript">
+      var sc_project=8264132; 
+      var sc_invisible=1; 
+      var sc_security="4b97fe2d"; 
+    </script>
+    <script type="text/javascript" src="http://www.statcounter.com/counter/counter.js"></script>
+    <noscript>
+      <div class="statcounter">
+        <a title="hit counter joomla" 
+           href="http://statcounter.com/joomla/"
+           target="_blank">
+          <img class="statcounter"
+               src="http://c.statcounter.com/8264132/0/4b97fe2d/1/"
+               alt="hit counter joomla" />
+        </a>
+      </div>
+    </noscript>
+    <!-- End of StatCounter Code for Default Guide -->
+
+  </body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-joshua-site/blob/53cc3005/5.0/advanced.md
----------------------------------------------------------------------
diff --git a/5.0/advanced.md b/5.0/advanced.md
deleted file mode 100644
index 174041e..0000000
--- a/5.0/advanced.md
+++ /dev/null
@@ -1,7 +0,0 @@
----
-layout: default
-category: links
-title: Advanced features
----
-
-

http://git-wip-us.apache.org/repos/asf/incubator-joshua-site/blob/53cc3005/5.0/bundle.html
----------------------------------------------------------------------
diff --git a/5.0/bundle.html b/5.0/bundle.html
new file mode 100644
index 0000000..4977a8f
--- /dev/null
+++ b/5.0/bundle.html
@@ -0,0 +1,189 @@
+<!DOCTYPE html>
+<html lang="en">
+  <head>
+    <meta charset="utf-8">
+    <title>Joshua Documentation | Bundling a configuration</title>
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <meta name="description" content="">
+    <meta name="author" content="">
+
+    <!-- Le styles -->
+    <link href="/bootstrap/css/bootstrap.css" rel="stylesheet">
+    <style>
+      body {
+        padding-top: 60px; /* 60px to make the container go all the way to the bottom of the topbar */
+      }
+      #download {
+          background-color: green;
+          font-size: 14pt;
+          font-weight: bold;
+          text-align: center;
+          color: white;
+          border-radius: 5px;
+          padding: 4px;
+      }
+
+      #download a:link {
+          color: white;
+      }
+
+      #download a:hover {
+          color: lightgrey;
+      }
+
+      #download a:visited {
+          color: white;
+      }
+
+      a.pdf {
+          font-variant: small-caps;
+          /* font-weight: bold; */
+          font-size: 10pt;
+          color: white;
+          background: brown;
+          padding: 2px;
+      }
+
+      a.bibtex {
+          font-variant: small-caps;
+          /* font-weight: bold; */
+          font-size: 10pt;
+          color: white;
+          background: orange;
+          padding: 2px;
+      }
+
+      img.sponsor {
+        height: 120px;
+        margin: 5px;
+      }
+    </style>
+    <link href="bootstrap/css/bootstrap-responsive.css" rel="stylesheet">
+
+    <!-- HTML5 shim, for IE6-8 support of HTML5 elements -->
+    <!--[if lt IE 9]>
+      <script src="bootstrap/js/html5shiv.js"></script>
+    <![endif]-->
+
+    <!-- Fav and touch icons -->
+    <link rel="apple-touch-icon-precomposed" sizes="144x144" href="bootstrap/ico/apple-touch-icon-144-precomposed.png">
+    <link rel="apple-touch-icon-precomposed" sizes="114x114" href="bootstrap/ico/apple-touch-icon-114-precomposed.png">
+      <link rel="apple-touch-icon-precomposed" sizes="72x72" href="bootstrap/ico/apple-touch-icon-72-precomposed.png">
+                    <link rel="apple-touch-icon-precomposed" href="bootstrap/ico/apple-touch-icon-57-precomposed.png">
+                                   <link rel="shortcut icon" href="bootstrap/ico/favicon.png">
+  </head>
+
+  <body>
+
+    <div class="navbar navbar-inverse navbar-fixed-top">
+      <div class="navbar-inner">
+        <div class="container">
+          <button type="button" class="btn btn-navbar" data-toggle="collapse" data-target=".nav-collapse">
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+            <span class="icon-bar"></span>
+          </button>
+          <a class="brand" href="/">Joshua</a>
+          <div class="nav-collapse collapse">
+            <ul class="nav">
+              <li><a href="index.html">Documentation</a></li>
+              <li><a href="pipeline.html">Pipeline</a></li>
+              <li><a href="tutorial.html">Tutorial</a></li>
+              <li><a href="decoder.html">Decoder</a></li>
+              <li><a href="thrax.html">Thrax</a></li>
+              <li><a href="file-formats.html">File formats</a></li>
+              <!-- <li><a href="advanced.html">Advanced</a></li> -->
+              <li><a href="faq.html">FAQ</a></li>
+            </ul>
+          </div><!--/.nav-collapse -->
+        </div>
+      </div>
+    </div>
+
+    <div class="container">
+
+      <div class="row">
+        <div class="span2">
+          <img src="/images/joshua-logo-small.png" 
+               alt="Joshua logo (picture of a Joshua tree)" />
+        </div>
+        <div class="span10">
+          <h1>Joshua Documentation</h1>
+          <h2>Bundling a configuration</h2>
+          <span id="download">
+            <a href="http://cs.jhu.edu/~post/files/joshua-v5.0.tgz">Download</a>
+          </span>
+          &nbsp; (version 5.0, released 16 August 2013)
+        </div>
+      </div>
+      
+      <hr />
+
+      <div class="row">
+        <div class="span8">
+
+          <p>A <em>bundled configuration</em> is a minimal set of configuration, resource, and script files. A script, <code class="highlighter-rouge">$JOSHUA/scripts/support/run-bundler.py</code> can be used to package up the run bundle. The resulting bundle can easily be transferred and shared.</p>
+
+<p><strong>Example invocation:</strong></p>
+
+<div class="highlighter-rouge"><pre class="highlight"><code>./run-bundler.py \
+  --force \
+  /path/to/rundir/runs/5/test/1/joshua.config \
+  /path/to/rundir/runs/5 \
+  bundled-configurations \
+    "-top-n 1 \
+    -output-format %S \
+    -mark-oovs false \
+    -server-port 5674 \
+    -tm/pt "thrax pt 20 /path/to/rundir/runs/5/test/1/grammar.gz"
+</code></pre>
+</div>
+
+<p>A new directory <code class="highlighter-rouge">./bundled-configurations</code> will be created, and all the bundled files will be copied or created in it.  To use the configuration with Joshua, run the executable file <code class="highlighter-rouge">./bundled-configurations/bundle-runner.sh</code>.</p>
+
+<p>Note, the additional options between the pair of quotation marks are passed as arguments to the <code class="highlighter-rouge">$JOSHUA/scripts/copy-config.pl</code> script. That script has some special parameters, especially the <code class="highlighter-rouge">-tm/..</code> option.</p>
+
+
+        </div>
+      </div>
+    </div> <!-- /container -->
+
+    <!-- Le javascript
+    ================================================== -->
+    <!-- Placed at the end of the document so the pages load faster -->
+    <script src="bootstrap/js/jquery.js"></script>
+    <script src="bootstrap/js/bootstrap-transition.js"></script>
+    <script src="bootstrap/js/bootstrap-alert.js"></script>
+    <script src="bootstrap/js/bootstrap-modal.js"></script>
+    <script src="bootstrap/js/bootstrap-dropdown.js"></script>
+    <script src="bootstrap/js/bootstrap-scrollspy.js"></script>
+    <script src="bootstrap/js/bootstrap-tab.js"></script>
+    <script src="bootstrap/js/bootstrap-tooltip.js"></script>
+    <script src="bootstrap/js/bootstrap-popover.js"></script>
+    <script src="bootstrap/js/bootstrap-button.js"></script>
+    <script src="bootstrap/js/bootstrap-collapse.js"></script>
+    <script src="bootstrap/js/bootstrap-carousel.js"></script>
+    <script src="bootstrap/js/bootstrap-typeahead.js"></script>
+
+    <!-- Start of StatCounter Code for Default Guide -->
+    <script type="text/javascript">
+      var sc_project=8264132; 
+      var sc_invisible=1; 
+      var sc_security="4b97fe2d"; 
+    </script>
+    <script type="text/javascript" src="http://www.statcounter.com/counter/counter.js"></script>
+    <noscript>
+      <div class="statcounter">
+        <a title="hit counter joomla" 
+           href="http://statcounter.com/joomla/"
+           target="_blank">
+          <img class="statcounter"
+               src="http://c.statcounter.com/8264132/0/4b97fe2d/1/"
+               alt="hit counter joomla" />
+        </a>
+      </div>
+    </noscript>
+    <!-- End of StatCounter Code for Default Guide -->
+
+  </body>
+</html>

http://git-wip-us.apache.org/repos/asf/incubator-joshua-site/blob/53cc3005/5.0/bundle.md
----------------------------------------------------------------------
diff --git a/5.0/bundle.md b/5.0/bundle.md
deleted file mode 100644
index c3874ab..0000000
--- a/5.0/bundle.md
+++ /dev/null
@@ -1,24 +0,0 @@
----
-layout: default
-category: links
-title: Bundling a configuration
----
-
-A *bundled configuration* is a minimal set of configuration, resource, and script files. A script, `$JOSHUA/scripts/support/run-bundler.py` can be used to package up the run bundle. The resulting bundle can easily be transferred and shared.
-
-**Example invocation:**
-
-    ./run-bundler.py \
-      --force \
-      /path/to/rundir/runs/5/test/1/joshua.config \
-      /path/to/rundir/runs/5 \
-      bundled-configurations \
-        "-top-n 1 \
-        -output-format %S \
-        -mark-oovs false \
-        -server-port 5674 \
-        -tm/pt "thrax pt 20 /path/to/rundir/runs/5/test/1/grammar.gz"
-
-A new directory `./bundled-configurations` will be created, and all the bundled files will be copied or created in it.  To use the configuration with Joshua, run the executable file `./bundled-configurations/bundle-runner.sh`.
-
-Note, the additional options between the pair of quotation marks are passed as arguments to the `$JOSHUA/scripts/copy-config.pl` script. That script has some special parameters, especially the `-tm/..` option.