You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/08/23 22:17:45 UTC

[28/50] [abbrv] incubator-joshua git commit: Merge branch 'master' into 7

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/server/ServerThread.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/server/ServerThread.java
index 6cfea6c,0000000..e9f9c62
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/server/ServerThread.java
+++ b/joshua-core/src/main/java/org/apache/joshua/server/ServerThread.java
@@@ -1,297 -1,0 +1,300 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.server;
 +
 +import static org.apache.joshua.decoder.ff.FeatureMap.hashFeature;
 +
 +import java.io.BufferedReader;
 +import java.io.IOException;
 +import java.io.InputStreamReader;
 +import java.io.OutputStream;
 +import java.io.StringReader;
 +import java.io.UnsupportedEncodingException;
 +import java.net.Socket;
 +import java.net.SocketException;
 +import java.net.URLDecoder;
 +import java.nio.charset.Charset;
 +import java.util.ArrayList;
 +import java.util.Arrays;
 +import java.util.HashMap;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.Translation;
 +import org.apache.joshua.decoder.Translations;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.ff.tm.Trie;
 +import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
 +import org.apache.joshua.decoder.io.JSONMessage;
 +import org.apache.joshua.decoder.io.TranslationRequestStream;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +import com.sun.net.httpserver.HttpExchange;
 +import com.sun.net.httpserver.HttpHandler;
 +
 +/**
 + * This class handles a concurrent request for translations from a newly opened socket, for
 + * both raw TCP/IP connections and for HTTP connections.
 + * 
 + */
 +public class ServerThread extends Thread implements HttpHandler {
 +
 +  private static final Logger LOG = LoggerFactory.getLogger(ServerThread.class);
 +  private static final Charset FILE_ENCODING = Charset.forName("UTF-8");
 +  
 +  private final JoshuaConfiguration joshuaConfiguration;
 +  private Socket socket = null;
 +  private final Decoder decoder;
 +
 +  /**
 +   * Creates a new TcpServerThread that can run a set of translations.
 +   * 
 +   * @param socket the socket representing the input/output streams
 +   * @param decoder the configured decoder that handles performing translations
 +   * @param joshuaConfiguration a populated {@link org.apache.joshua.decoder.JoshuaConfiguration}
 +   */
 +  public ServerThread(Socket socket, Decoder decoder, JoshuaConfiguration joshuaConfiguration) {
 +    this.joshuaConfiguration = joshuaConfiguration;
 +    this.socket = socket;
 +    this.decoder = decoder;
 +  }
 +
 +  /**
 +   * Reads the input from the socket, submits the input to the decoder, transforms the resulting
 +   * translations into the required output format, writes out the formatted output, then closes the
 +   * socket.
 +   */
 +  @Override
 +  public void run() {
 +
 +    //TODO: use try-with-resources block
 +    try {
 +      BufferedReader reader = new BufferedReader(new InputStreamReader(socket.getInputStream(), FILE_ENCODING));
 +
 +      TranslationRequestStream request = new TranslationRequestStream(reader, joshuaConfiguration);
 +
 +      try {
 +        Translations translations = decoder.decodeAll(request);
 +        
 +        OutputStream out = socket.getOutputStream();
 +        
 +        for (Translation translation: translations) {
 +          out.write(translation.toString().getBytes());
 +        }
 +        
 +      } catch (SocketException e) {
 +        LOG.error(" Socket interrupted", e);
 +        request.shutdown();
 +      } finally {
 +        reader.close();
 +        socket.close();
 +      }
 +    } catch (IOException e) {
 +      LOG.error(e.getMessage(), e);
 +    }
 +  }
 +  
 +  public HashMap<String, String> queryToMap(String query) throws UnsupportedEncodingException {
 +    HashMap<String, String> result = new HashMap<String, String>();
 +    for (String param : query.split("&")) {
 +        String pair[] = param.split("=");
 +        if (pair.length > 1) {
 +          result.put(pair[0], URLDecoder.decode(pair[1], "UTF-8"));
 +        } else {
 +            result.put(pair[0], "");
 +        }
 +    }
 +    return result;
 +  } 
 +
 +  private class HttpWriter extends OutputStream {
 +
 +    private HttpExchange client = null;
 +    private OutputStream out = null;
 +    
 +    public HttpWriter(HttpExchange client) {
 +      this.client = client;
 +      client.getResponseHeaders().add("Access-Control-Allow-Origin", "*");
 +    }
 +    
 +    @Override
 +    public void write(byte[] response) throws IOException {
 +      client.sendResponseHeaders(200, response.length);
 +      out = client.getResponseBody();
 +      out.write(response);
 +      out.close();
 +    }
 +
 +    @Override
 +    public void write(int b) throws IOException {
 +      out.write(b);
 +    }
 +  }
 +
 +  /**
 +   * Called to handle an HTTP connection. This looks for metadata in the URL string, which is processed
 +   * if present. It also then handles returning a JSON-formatted object to the caller. 
 +   * 
 +   * @param client the client connection
 +   */
 +  @Override
 +  public synchronized void handle(HttpExchange client) throws IOException {
 +
 +    HashMap<String, String> params = queryToMap(client.getRequestURI().getQuery());
 +    String query = params.get("q");
 +    String meta = params.get("meta");
 +    
 +    BufferedReader reader = new BufferedReader(new StringReader(query));
 +    TranslationRequestStream request = new TranslationRequestStream(reader, joshuaConfiguration);
 +    
 +    Translations translations = decoder.decodeAll(request);
 +    JSONMessage message = new JSONMessage();
 +    if (meta != null && ! meta.isEmpty())
 +      handleMetadata(meta, message);
 +
 +    for (Translation translation: translations) {
 +      LOG.info("TRANSLATION: '{}' with {} k-best items", translation, translation.getStructuredTranslations().size());
 +      message.addTranslation(translation);
 +    }
 +
 +    OutputStream out = new HttpWriter(client);
 +    out.write(message.toString().getBytes());
 +    if (LOG.isDebugEnabled())
 +      LOG.debug(message.toString());
 +    out.close();
 +    
 +    reader.close();
 +  }
 +  
 +  /**
 +   * Processes metadata commands received in the HTTP request. Some commands result in sending data back.
 +   *
 +   * @param meta the metadata request
 +   * @return result string (for some commands)
 +   */
 +  private void handleMetadata(String meta, JSONMessage message) {
 +    String[] tokens = meta.split("\\s+", 2);
 +    String type = tokens[0];
 +    String args = tokens.length > 1 ? tokens[1] : "";
 +    
 +    if (type.equals("get_weight")) {
 +      String weight = tokens[1];
 +      LOG.info("WEIGHT: %s = %.3f", weight, Decoder.weights.getOrDefault(hashFeature(weight)));
 +
 +    } else if (type.equals("set_weights")) {
 +      // Change a decoder weight
 +      String[] argTokens = args.split("\\s+");
 +      for (int i = 0; i < argTokens.length; i += 2) {
 +        String feature = argTokens[i];
 +        int featureId = hashFeature(feature);
 +        String newValue = argTokens[i+1];
 +        float old_weight = Decoder.weights.getOrDefault(featureId);
 +        Decoder.weights.put(featureId, Float.parseFloat(newValue));
 +        LOG.info("set_weights: {} {} -> {}", feature, old_weight, Decoder.weights.getOrDefault(featureId));
 +      }
 +      
 +      message.addMetaData("weights " + Decoder.weights.toString());
 +      
 +    } else if (type.equals("get_weights")) {
 +      message.addMetaData("weights " + Decoder.weights.toString());
 +      
 +    } else if (type.equals("add_rule")) {
 +      String argTokens[] = args.split(" \\|\\|\\| ");
 +  
 +      if (argTokens.length < 3) {
 +        LOG.error("* INVALID RULE '{}'", meta);
 +        return;
 +      }
 +      
 +      String lhs = argTokens[0];
 +      String source = argTokens[1];
 +      String target = argTokens[2];
 +      String featureStr = "";
++      String alignmentStr = "";
 +      if (argTokens.length > 3) 
 +        featureStr = argTokens[3];
-           
++      if (argTokens.length > 4)
++        alignmentStr = " ||| " + argTokens[4];
++      
 +      /* Prepend source and target side nonterminals for phrase-based decoding. Probably better
 +       * handled in each grammar type's addRule() function.
 +       */
 +      String ruleString = (joshuaConfiguration.search_algorithm.equals("stack"))
-           ? String.format("%s ||| [X,1] %s ||| [X,1] %s ||| custom=1 %s", lhs, source, target, featureStr)
-           : String.format("%s ||| %s ||| %s ||| custom=1 %s", lhs, source, target, featureStr);
++          ? String.format("%s ||| [X,1] %s ||| [X,1] %s ||| -1 %s %s", lhs, source, target, featureStr, alignmentStr)
++          : String.format("%s ||| %s ||| %s ||| -1 %s %s", lhs, source, target, featureStr, alignmentStr);
 +      
 +      Rule rule = new HieroFormatReader(decoder.getCustomPhraseTable().getOwner()).parseLine(ruleString);
 +      decoder.addCustomRule(rule);
 +      
 +      LOG.info("Added custom rule {}", rule.toString());
 +  
 +    } else if (type.equals("list_rules")) {
 +  
 +      LOG.info("list_rules");
 +      
 +      // Walk the the grammar trie
 +      ArrayList<Trie> nodes = new ArrayList<Trie>();
 +      nodes.add(decoder.getCustomPhraseTable().getTrieRoot());
 +  
 +      while (nodes.size() > 0) {
 +        Trie trie = nodes.remove(0);
 +  
 +        if (trie == null)
 +          continue;
 +  
 +        if (trie.hasRules()) {
 +          for (Rule rule: trie.getRuleCollection().getRules()) {
 +            message.addRule(rule.toString());
 +            LOG.debug("Found rule: " + rule);
 +          }
 +        }
 +  
 +        if (trie.getExtensions() != null)
 +          nodes.addAll(trie.getExtensions());
 +      }
 +  
 +    } else if (type.equals("remove_rule")) {
 +      
 +      Rule rule = new HieroFormatReader(decoder.getCustomPhraseTable().getOwner()).parseLine(args);
 +      
 +      LOG.info("remove_rule " + rule);
 +  
 +      Trie trie = decoder.getCustomPhraseTable().getTrieRoot();
 +      int[] sourceTokens = rule.getSource();
 +      for (int i = 0; i < sourceTokens.length; i++) {
 +        Trie nextTrie = trie.match(sourceTokens[i]);
 +        if (nextTrie == null)
 +          return;
 +        
 +        trie = nextTrie;
 +      }
 +
 +      if (trie.hasRules()) {
 +        for (Rule ruleCand: trie.getRuleCollection().getRules()) {
 +          if (Arrays.equals(rule.getTarget(), ruleCand.getTarget())) {
 +            trie.getRuleCollection().getRules().remove(ruleCand);
 +            break;
 +          }
 +        }
 +        return;
 +      }
 +    }
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/main/java/org/apache/joshua/tools/GrammarPacker.java
----------------------------------------------------------------------
diff --cc joshua-core/src/main/java/org/apache/joshua/tools/GrammarPacker.java
index 6c02d19,0000000..5861052
mode 100644,000000..100644
--- a/joshua-core/src/main/java/org/apache/joshua/tools/GrammarPacker.java
+++ b/joshua-core/src/main/java/org/apache/joshua/tools/GrammarPacker.java
@@@ -1,932 -1,0 +1,936 @@@
 +/**
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.tools;
 +
 +import static org.apache.joshua.decoder.ff.tm.OwnerMap.UNKNOWN_OWNER_ID;
 +import static org.apache.joshua.decoder.ff.tm.packed.PackedGrammar.VOCABULARY_FILENAME;
 +
 +import java.io.BufferedOutputStream;
 +import java.io.DataOutputStream;
 +import java.io.File;
 +import java.io.FileOutputStream;
 +import java.io.FileWriter;
 +import java.io.IOException;
 +import java.io.PrintWriter;
 +import java.nio.ByteBuffer;
 +import java.util.ArrayList;
 +import java.util.LinkedList;
 +import java.util.List;
 +import java.util.Map.Entry;
 +import java.util.Queue;
 +import java.util.TreeMap;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.decoder.ff.tm.RuleFactory;
 +import org.apache.joshua.decoder.ff.tm.format.HieroFormatReader;
 +import org.apache.joshua.decoder.ff.tm.format.MosesFormatReader;
 +import org.apache.joshua.util.FormatUtils;
 +import org.apache.joshua.util.encoding.EncoderConfiguration;
 +import org.apache.joshua.util.encoding.FeatureTypeAnalyzer;
 +import org.apache.joshua.util.encoding.IntEncoder;
 +import org.apache.joshua.util.io.LineReader;
 +import org.slf4j.Logger;
 +import org.slf4j.LoggerFactory;
 +
 +
 +public class GrammarPacker {
 +
 +  private static final Logger LOG = LoggerFactory.getLogger(GrammarPacker.class);
 +
 +  /**
 +   * The packed grammar version number. Increment this any time you add new features, and update
 +   * the documentation.
 +   * 
 +   * Version history:
 +   * 
 +   * - 3 (May 2016). This was the first version that was marked. It removed the special phrase-
 +   * table packing that packed phrases without the [X,1] on the source and target sides, which
 +   * then required special handling in the decoder to use for phrase-based decoding.
 +   * 
-    * 
++   * - 4 (August 2016). Phrase-based decoding rewritten to represent phrases without a builtin
++   * nonterminal. Instead, cost-less glue rules are used in phrase-based decoding. This eliminates
++   * the need for special handling of phrase grammars (except for having to add a LHS), and lets
++   * phrase grammars be used in both hierarchical and phrase-based decoding without conversion.
++   *
 +   */
-   public static final int VERSION = 3;
++  public static final int VERSION = 4;
 +  
 +  // Size limit for slice in bytes.
 +  private static int DATA_SIZE_LIMIT = (int) (Integer.MAX_VALUE * 0.8);
 +  // Estimated average number of feature entries for one rule.
 +  private static int DATA_SIZE_ESTIMATE = 20;
 +
 +  private static final String SOURCE_WORDS_SEPARATOR = " ||| ";
 +
 +  // Output directory name.
 +  private String output;
 +
 +  // Input grammar to be packed.
 +  private String grammar;
 +
 +  public String getGrammar() {
 +    return grammar;
 +  }
 +
 +  public String getOutputDirectory() {
 +    return output;
 +  }
 +
 +  // Approximate maximum size of a slice in number of rules
 +  private int approximateMaximumSliceSize;
 +
 +  private boolean labeled;
 +
 +  private boolean packAlignments;
 +  private boolean grammarAlignments;
 +  private String alignments;
 +
 +  private FeatureTypeAnalyzer types;
 +  private EncoderConfiguration encoderConfig;
 +
 +  private String dump;
 +
 +  private int max_source_len;
 +
 +  public GrammarPacker(String grammar_filename, String config_filename, String output_filename,
 +      String alignments_filename, String featuredump_filename, boolean grammar_alignments,
 +      int approximateMaximumSliceSize)
 +      throws IOException {
 +    this.labeled = true;
 +    this.grammar = grammar_filename;
 +    this.output = output_filename;
 +    this.dump = featuredump_filename;
 +    this.grammarAlignments = grammar_alignments;
 +    this.approximateMaximumSliceSize = approximateMaximumSliceSize;
 +    this.max_source_len = 0;
 +
 +    // TODO: Always open encoder config? This is debatable.
 +    this.types = new FeatureTypeAnalyzer(true);
 +
 +    this.alignments = alignments_filename;
 +    packAlignments = grammarAlignments || (alignments != null);
 +    if (!packAlignments) {
 +      LOG.info("No alignments file or grammar specified, skipping.");
 +    } else if (alignments != null && !new File(alignments_filename).exists()) {
 +      throw new RuntimeException("Alignments file does not exist: " + alignments);
 +    }
 +
 +    if (config_filename != null) {
 +      readConfig(config_filename);
 +      types.readConfig(config_filename);
 +    } else {
 +      LOG.info("No config specified. Attempting auto-detection of feature types.");
 +    }
 +    LOG.info("Approximate maximum slice size (in # of rules) set to {}", approximateMaximumSliceSize);
 +
 +    File working_dir = new File(output);
 +    working_dir.mkdir();
 +    if (!working_dir.exists()) {
 +      throw new RuntimeException("Failed creating output directory.");
 +    }
 +  }
 +
 +  private void readConfig(String config_filename) throws IOException {
 +    LineReader reader = new LineReader(config_filename);
 +    while (reader.hasNext()) {
 +      // Clean up line, chop comments off and skip if the result is empty.
 +      String line = reader.next().trim();
 +      if (line.indexOf('#') != -1)
 +        line = line.substring(0, line.indexOf('#'));
 +      if (line.isEmpty())
 +        continue;
 +      String[] fields = line.split("[\\s]+");
 +
 +      if (fields.length < 2) {
 +        throw new RuntimeException("Incomplete line in config.");
 +      }
 +      if ("slice_size".equals(fields[0])) {
 +        // Number of records to concurrently load into memory for sorting.
 +        approximateMaximumSliceSize = Integer.parseInt(fields[1]);
 +      }
 +    }
 +    reader.close();
 +  }
 +
 +  /**
 +   * Executes the packing.
 +   * 
 +   * @throws IOException if there is an error reading the grammar
 +   */
 +  public void pack() throws IOException {
 +    LOG.info("Beginning exploration pass.");
 +
 +    // Explore pass. Learn vocabulary and feature value histograms.
 +    LOG.info("Exploring: {}", grammar);
 +
 +    HieroFormatReader grammarReader = getGrammarReader();
 +    explore(grammarReader);
 +
 +    LOG.info("Exploration pass complete. Freezing vocabulary and finalizing encoders.");
 +    if (dump != null) {
 +      PrintWriter dump_writer = new PrintWriter(dump);
 +      dump_writer.println(types.toString());
 +      dump_writer.close();
 +    }
 +
 +    types.inferTypes(this.labeled);
 +    LOG.info("Type inference complete.");
 +
 +    LOG.info("Finalizing encoding.");
 +
 +    LOG.info("Writing encoding.");
 +    types.write(output + File.separator + "encoding");
 +
 +    writeVocabulary();
 +
 +    String configFile = output + File.separator + "config";
 +    LOG.info("Writing config to '{}'", configFile);
 +    // Write config options
 +    FileWriter config = new FileWriter(configFile);
 +    config.write(String.format("version = %d\n", VERSION));
 +    config.write(String.format("max-source-len = %d\n", max_source_len));
 +    config.close();
 +
 +    // Read previously written encoder configuration to match up to changed
 +    // vocabulary id's.
 +    LOG.info("Reading encoding.");
 +    encoderConfig = new EncoderConfiguration();
 +    encoderConfig.load(output + File.separator + "encoding");
 +
 +    LOG.info("Beginning packing pass.");
 +    // Actual binarization pass. Slice and pack source, target and data.
 +    grammarReader = getGrammarReader();
 +    LineReader alignment_reader = null;
 +    if (packAlignments && !grammarAlignments)
 +      alignment_reader = new LineReader(alignments);
 +    binarize(grammarReader, alignment_reader);
 +    LOG.info("Packing complete.");
 +
 +    LOG.info("Packed grammar in: {}", output);
 +    LOG.info("Done.");
 +  }
 +
 +  /**
 +   * Returns a reader that turns whatever file format is found into unowned Hiero grammar rules.
 +   * This means, features are NOT prepended with an owner string at packing time.
 +   * 
 +   * @param grammarFile
 +   * @return GrammarReader of correct Format
 +   * @throws IOException
 +   */
 +  private HieroFormatReader getGrammarReader() throws IOException {
 +    LineReader reader = new LineReader(grammar);
 +    String line = reader.next();
 +    if (line.startsWith("[")) {
 +      return new HieroFormatReader(grammar, UNKNOWN_OWNER_ID);
 +    } else {
 +      return new MosesFormatReader(grammar, UNKNOWN_OWNER_ID);
 +    }
 +  }
 +
 +  /**
 +   * This first pass over the grammar 
 +   * @param reader
 +   */
 +  private void explore(HieroFormatReader reader) {
 +
 +    // We always assume a labeled grammar. Unlabeled features are assumed to be dense and to always
 +    // appear in the same order. They are assigned numeric names in order of appearance.
 +    this.types.setLabeled(true);
 +
 +    for (Rule rule : reader) {
 +
 +      max_source_len = Math.max(max_source_len, rule.getSource().length);
 +
 +      /* Add symbols to vocabulary.
 +       * NOTE: In case of nonterminals, we add both stripped versions ("[X]")
 +       * and "[X,1]" to the vocabulary.
 +       * 
 +       * TODO: MJP May 2016: Is it necessary to add [X,1]? This is currently being done in
 +       * {@link HieroFormatReader}, which is called by {@link MosesFormatReader}. 
 +       */
 +
 +      // pass the value through the appropriate encoder.
 +      for (final Entry<Integer, Float> entry : rule.getFeatureVector().entrySet()) {
 +        types.observe(entry.getKey(), entry.getValue());
 +      }
 +    }
 +  }
 +
 +  /**
 +   * Returns a String encoding the first two source words.
 +   * If there is only one source word, use empty string for the second.
 +   */
 +  private String getFirstTwoSourceWords(final String[] source_words) {
 +    return source_words[0] + SOURCE_WORDS_SEPARATOR + ((source_words.length > 1) ? source_words[1] : "");
 +  }
 +
 +  private void binarize(HieroFormatReader grammarReader, LineReader alignment_reader) throws IOException {
 +    int counter = 0;
 +    int slice_counter = 0;
 +    int num_slices = 0;
 +
 +    boolean ready_to_flush = false;
 +    // to determine when flushing is possible
 +    String prev_first_two_source_words = null;
 +
 +    PackingTrie<SourceValue> source_trie = new PackingTrie<SourceValue>();
 +    PackingTrie<TargetValue> target_trie = new PackingTrie<TargetValue>();
 +    FeatureBuffer feature_buffer = new FeatureBuffer();
 +
 +    AlignmentBuffer alignment_buffer = null;
 +    if (packAlignments)
 +      alignment_buffer = new AlignmentBuffer();
 +
 +    TreeMap<Integer, Float> features = new TreeMap<Integer, Float>();
 +    for (Rule rule : grammarReader) {
 +      counter++;
 +      slice_counter++;
 +
 +      String lhs_word = Vocabulary.word(rule.getLHS());
 +      String[] source_words = rule.getSourceWords().split("\\s+");
 +      String[] target_words = rule.getTargetWords().split("\\s+");
 +
 +      // Reached slice limit size, indicate that we're closing up.
 +      if (!ready_to_flush
 +          && (slice_counter > approximateMaximumSliceSize
 +              || feature_buffer.overflowing()
 +              || (packAlignments && alignment_buffer.overflowing()))) {
 +        ready_to_flush = true;
 +        // store the first two source words when slice size limit was reached
 +        prev_first_two_source_words = getFirstTwoSourceWords(source_words);
 +      }
 +      // ready to flush
 +      if (ready_to_flush) {
 +        final String first_two_source_words = getFirstTwoSourceWords(source_words);
 +        // the grammar can only be partitioned at the level of first two source word changes.
 +        // Thus, we can only flush if the current first two source words differ from the ones
 +        // when the slice size limit was reached.
 +        if (!first_two_source_words.equals(prev_first_two_source_words)) {
 +          LOG.warn("ready to flush and first two words have changed ({} vs. {})",
 +              prev_first_two_source_words, first_two_source_words);
 +          LOG.info("flushing {} rules to slice.", slice_counter);
 +          flush(source_trie, target_trie, feature_buffer, alignment_buffer, num_slices);
 +          source_trie.clear();
 +          target_trie.clear();
 +          feature_buffer.clear();
 +          if (packAlignments)
 +            alignment_buffer.clear();
 +
 +          num_slices++;
 +          slice_counter = 0;
 +          ready_to_flush = false;
 +        }
 +      }
 +
 +      int alignment_index = -1;
 +      // If present, process alignments.
 +      if (packAlignments) {
 +        byte[] alignments = null;
 +        if (grammarAlignments) {
 +          alignments = rule.getAlignment();
 +        } else {
 +          if (!alignment_reader.hasNext()) {
 +            LOG.error("No more alignments starting in line {}", counter);
 +            throw new RuntimeException("No more alignments starting in line " + counter);
 +          }
 +          alignments = RuleFactory.parseAlignmentString(alignment_reader.next().trim());
 +        }
 +        alignment_index = alignment_buffer.add(alignments);
 +      }
 +
 +      // Process features.
 +      // Implicitly sort via TreeMap, write to data buffer, remember position
 +      // to pass on to the source trie node.
 +      features.clear();
 +      for (Entry<Integer, Float> entry : rule.getFeatureVector().entrySet()) {
 +        int featureId = entry.getKey();
 +        float featureValue = entry.getValue();
 +        if (featureValue != 0f) {
 +          features.put(encoderConfig.innerId(featureId), featureValue);
 +        }
 +      }
 +
 +      int features_index = feature_buffer.add(features);
 +
 +      // Sanity check on the data block index.
 +      if (packAlignments && features_index != alignment_index) {
 +        LOG.error("Block index mismatch between features ({}) and alignments ({}).",
 +            features_index, alignment_index);
 +        throw new RuntimeException("Data block index mismatch.");
 +      }
 +
 +      // Process source side.
 +      SourceValue sv = new SourceValue(Vocabulary.id(lhs_word), features_index);
 +      int[] source = new int[source_words.length];
 +      for (int i = 0; i < source_words.length; i++) {
 +        if (FormatUtils.isNonterminal(source_words[i]))
 +          source[i] = Vocabulary.id(FormatUtils.stripNonTerminalIndex(source_words[i]));
 +        else
 +          source[i] = Vocabulary.id(source_words[i]);
 +      }
 +      source_trie.add(source, sv);
 +
 +      // Process target side.
 +      TargetValue tv = new TargetValue(sv);
 +      int[] target = new int[target_words.length];
 +      for (int i = 0; i < target_words.length; i++) {
 +        if (FormatUtils.isNonterminal(target_words[i])) {
 +          target[target_words.length - (i + 1)] = -FormatUtils.getNonterminalIndex(target_words[i]);
 +        } else {
 +          target[target_words.length - (i + 1)] = Vocabulary.id(target_words[i]);
 +        }
 +      }
 +      target_trie.add(target, tv);
 +    }
 +    // flush last slice and clear buffers
 +    flush(source_trie, target_trie, feature_buffer, alignment_buffer, num_slices);
 +  }
 +
 +  /**
 +   * Serializes the source, target and feature data structures into interlinked binary files. Target
 +   * is written first, into a skeletal (node don't carry any data) upward-pointing trie, updating
 +   * the linking source trie nodes with the position once it is known. Source and feature data are
 +   * written simultaneously. The source structure is written into a downward-pointing trie and
 +   * stores the rule's lhs as well as links to the target and feature stream. The feature stream is
 +   * prompted to write out a block
 +   * 
 +   * @param source_trie
 +   * @param target_trie
 +   * @param feature_buffer
 +   * @param id
 +   * @throws IOException
 +   */
 +  private void flush(PackingTrie<SourceValue> source_trie,
 +      PackingTrie<TargetValue> target_trie, FeatureBuffer feature_buffer,
 +      AlignmentBuffer alignment_buffer, int id) throws IOException {
 +    // Make a slice object for this piece of the grammar.
 +    PackingFileTuple slice = new PackingFileTuple("slice_" + String.format("%05d", id));
 +    // Pull out the streams for source, target and data output.
 +    DataOutputStream source_stream = slice.getSourceOutput();
 +    DataOutputStream target_stream = slice.getTargetOutput();
 +    DataOutputStream target_lookup_stream = slice.getTargetLookupOutput();
 +    DataOutputStream feature_stream = slice.getFeatureOutput();
 +    DataOutputStream alignment_stream = slice.getAlignmentOutput();
 +
 +    Queue<PackingTrie<TargetValue>> target_queue;
 +    Queue<PackingTrie<SourceValue>> source_queue;
 +
 +    // The number of bytes both written into the source stream and
 +    // buffered in the source queue.
 +    int source_position;
 +    // The number of bytes written into the target stream.
 +    int target_position;
 +
 +    // Add trie root into queue, set target position to 0 and set cumulated
 +    // size to size of trie root.
 +    target_queue = new LinkedList<PackingTrie<TargetValue>>();
 +    target_queue.add(target_trie);
 +    target_position = 0;
 +
 +    // Target lookup table for trie levels.
 +    int current_level_size = 1;
 +    int next_level_size = 0;
 +    ArrayList<Integer> target_lookup = new ArrayList<Integer>();
 +
 +    // Packing loop for upwards-pointing target trie.
 +    while (!target_queue.isEmpty()) {
 +      // Pop top of queue.
 +      PackingTrie<TargetValue> node = target_queue.poll();
 +      // Register that this is where we're writing the node to.
 +      node.address = target_position;
 +      // Tell source nodes that we're writing to this position in the file.
 +      for (TargetValue tv : node.values)
 +        tv.parent.target = node.address;
 +      // Write link to parent.
 +      if (node.parent != null)
 +        target_stream.writeInt(node.parent.address);
 +      else
 +        target_stream.writeInt(-1);
 +      target_stream.writeInt(node.symbol);
 +      // Enqueue children.
 +      for (int k : node.children.descendingKeySet()) {
 +        PackingTrie<TargetValue> child = node.children.get(k);
 +        target_queue.add(child);
 +      }
 +      target_position += node.size(false, true);
 +      next_level_size += node.children.descendingKeySet().size();
 +
 +      current_level_size--;
 +      if (current_level_size == 0) {
 +        target_lookup.add(target_position);
 +        current_level_size = next_level_size;
 +        next_level_size = 0;
 +      }
 +    }
 +    target_lookup_stream.writeInt(target_lookup.size());
 +    for (int i : target_lookup)
 +      target_lookup_stream.writeInt(i);
 +    target_lookup_stream.close();
 +
 +    // Setting up for source and data writing.
 +    source_queue = new LinkedList<PackingTrie<SourceValue>>();
 +    source_queue.add(source_trie);
 +    source_position = source_trie.size(true, false);
 +    source_trie.address = target_position;
 +
 +    // Ready data buffers for writing.
 +    feature_buffer.initialize();
 +    if (packAlignments)
 +      alignment_buffer.initialize();
 +
 +    // Packing loop for downwards-pointing source trie.
 +    while (!source_queue.isEmpty()) {
 +      // Pop top of queue.
 +      PackingTrie<SourceValue> node = source_queue.poll();
 +      // Write number of children.
 +      source_stream.writeInt(node.children.size());
 +      // Write links to children.
 +      for (int k : node.children.descendingKeySet()) {
 +        PackingTrie<SourceValue> child = node.children.get(k);
 +        // Enqueue child.
 +        source_queue.add(child);
 +        // Child's address will be at the current end of the queue.
 +        child.address = source_position;
 +        // Advance cumulated size by child's size.
 +        source_position += child.size(true, false);
 +        // Write the link.
 +        source_stream.writeInt(k);
 +        source_stream.writeInt(child.address);
 +      }
 +      // Write number of data items.
 +      source_stream.writeInt(node.values.size());
 +      // Write lhs and links to target and data.
 +      for (SourceValue sv : node.values) {
 +        int feature_block_index = feature_buffer.write(sv.data);
 +        if (packAlignments) {
 +          int alignment_block_index = alignment_buffer.write(sv.data);
 +          if (alignment_block_index != feature_block_index) {
 +            LOG.error("Block index mismatch.");
 +            throw new RuntimeException("Block index mismatch: alignment (" + alignment_block_index
 +                + ") and features (" + feature_block_index + ") don't match.");
 +          }
 +        }
 +        source_stream.writeInt(sv.lhs);
 +        source_stream.writeInt(sv.target);
 +        source_stream.writeInt(feature_block_index);
 +      }
 +    }
 +    // Flush the data stream.
 +    feature_buffer.flush(feature_stream);
 +    if (packAlignments)
 +      alignment_buffer.flush(alignment_stream);
 +
 +    target_stream.close();
 +    source_stream.close();
 +    feature_stream.close();
 +    if (packAlignments)
 +      alignment_stream.close();
 +  }
 +
 +  public void writeVocabulary() throws IOException {
 +    final String vocabularyFilename = output + File.separator + VOCABULARY_FILENAME;
 +    LOG.info("Writing vocabulary to {}", vocabularyFilename);
 +    Vocabulary.write(vocabularyFilename);
 +  }
 +
 +  /**
 +   * Integer-labeled, doubly-linked trie with some provisions for packing.
 +   * 
 +   * @author Juri Ganitkevitch
 +   * 
 +   * @param <D> The trie's value type.
 +   */
 +  class PackingTrie<D extends PackingTrieValue> {
 +    int symbol;
 +    PackingTrie<D> parent;
 +
 +    TreeMap<Integer, PackingTrie<D>> children;
 +    List<D> values;
 +
 +    int address;
 +
 +    PackingTrie() {
 +      address = -1;
 +
 +      symbol = 0;
 +      parent = null;
 +
 +      children = new TreeMap<Integer, PackingTrie<D>>();
 +      values = new ArrayList<D>();
 +    }
 +
 +    PackingTrie(PackingTrie<D> parent, int symbol) {
 +      this();
 +      this.parent = parent;
 +      this.symbol = symbol;
 +    }
 +
 +    void add(int[] path, D value) {
 +      add(path, 0, value);
 +    }
 +
 +    private void add(int[] path, int index, D value) {
 +      if (index == path.length)
 +        this.values.add(value);
 +      else {
 +        PackingTrie<D> child = children.get(path[index]);
 +        if (child == null) {
 +          child = new PackingTrie<D>(this, path[index]);
 +          children.put(path[index], child);
 +        }
 +        child.add(path, index + 1, value);
 +      }
 +    }
 +
 +    /**
 +     * Calculate the size (in ints) of a packed trie node. Distinguishes downwards pointing (parent
 +     * points to children) from upwards pointing (children point to parent) tries, as well as
 +     * skeletal (no data, just the labeled links) and non-skeletal (nodes have a data block)
 +     * packing.
 +     * 
 +     * @param downwards Are we packing into a downwards-pointing trie?
 +     * @param skeletal Are we packing into a skeletal trie?
 +     * 
 +     * @return Number of bytes the trie node would occupy.
 +     */
 +    int size(boolean downwards, boolean skeletal) {
 +      int size = 0;
 +      if (downwards) {
 +        // Number of children and links to children.
 +        size = 1 + 2 * children.size();
 +      } else {
 +        // Link to parent.
 +        size += 2;
 +      }
 +      // Non-skeletal packing: number of data items.
 +      if (!skeletal)
 +        size += 1;
 +      // Non-skeletal packing: write size taken up by data items.
 +      if (!skeletal && !values.isEmpty())
 +        size += values.size() * values.get(0).size();
 +
 +      return size;
 +    }
 +
 +    void clear() {
 +      children.clear();
 +      values.clear();
 +    }
 +  }
 +
 +  interface PackingTrieValue {
 +    int size();
 +  }
 +
 +  class SourceValue implements PackingTrieValue {
 +    int lhs;
 +    int data;
 +    int target;
 +
 +    public SourceValue() {
 +    }
 +
 +    SourceValue(int lhs, int data) {
 +      this.lhs = lhs;
 +      this.data = data;
 +    }
 +
 +    void setTarget(int target) {
 +      this.target = target;
 +    }
 +
 +    public int size() {
 +      return 3;
 +    }
 +  }
 +
 +  class TargetValue implements PackingTrieValue {
 +    SourceValue parent;
 +
 +    TargetValue(SourceValue parent) {
 +      this.parent = parent;
 +    }
 +
 +    public int size() {
 +      return 0;
 +    }
 +  }
 +
 +  abstract class PackingBuffer<T> {
 +    private byte[] backing;
 +    protected ByteBuffer buffer;
 +
 +    protected ArrayList<Integer> memoryLookup;
 +    protected int totalSize;
 +    protected ArrayList<Integer> onDiskOrder;
 +
 +    PackingBuffer() throws IOException {
 +      allocate();
 +      memoryLookup = new ArrayList<Integer>();
 +      onDiskOrder = new ArrayList<Integer>();
 +      totalSize = 0;
 +    }
 +
 +    abstract int add(T item);
 +
 +    // Allocate a reasonably-sized buffer for the feature data.
 +    private void allocate() {
 +      backing = new byte[approximateMaximumSliceSize * DATA_SIZE_ESTIMATE];
 +      buffer = ByteBuffer.wrap(backing);
 +    }
 +
 +    // Reallocate the backing array and buffer, copies data over.
 +    protected void reallocate() {
 +      if (backing.length == Integer.MAX_VALUE)
 +        return;
 +      long attempted_length = backing.length * 2l;
 +      int new_length;
 +      // Detect overflow.
 +      if (attempted_length >= Integer.MAX_VALUE)
 +        new_length = Integer.MAX_VALUE;
 +      else
 +        new_length = (int) attempted_length;
 +      byte[] new_backing = new byte[new_length];
 +      System.arraycopy(backing, 0, new_backing, 0, backing.length);
 +      int old_position = buffer.position();
 +      ByteBuffer new_buffer = ByteBuffer.wrap(new_backing);
 +      new_buffer.position(old_position);
 +      buffer = new_buffer;
 +      backing = new_backing;
 +    }
 +
 +    /**
 +     * Prepare the data buffer for disk writing.
 +     */
 +    void initialize() {
 +      onDiskOrder.clear();
 +    }
 +
 +    /**
 +     * Enqueue a data block for later writing.
 +     * 
 +     * @param block_index The index of the data block to add to writing queue.
 +     * @return The to-be-written block's output index.
 +     */
 +    int write(int block_index) {
 +      onDiskOrder.add(block_index);
 +      return onDiskOrder.size() - 1;
 +    }
 +
 +    /**
 +     * Performs the actual writing to disk in the order specified by calls to write() since the last
 +     * call to initialize().
 +     * 
 +     * @param out
 +     * @throws IOException
 +     */
 +    void flush(DataOutputStream out) throws IOException {
 +      writeHeader(out);
 +      int size;
 +      int block_address;
 +      for (int block_index : onDiskOrder) {
 +        block_address = memoryLookup.get(block_index);
 +        size = blockSize(block_index);
 +        out.write(backing, block_address, size);
 +      }
 +    }
 +
 +    void clear() {
 +      buffer.clear();
 +      memoryLookup.clear();
 +      onDiskOrder.clear();
 +    }
 +
 +    boolean overflowing() {
 +      return (buffer.position() >= DATA_SIZE_LIMIT);
 +    }
 +
 +    private void writeHeader(DataOutputStream out) throws IOException {
 +      if (out.size() == 0) {
 +        out.writeInt(onDiskOrder.size());
 +        out.writeInt(totalSize);
 +        int disk_position = headerSize();
 +        for (int block_index : onDiskOrder) {
 +          out.writeInt(disk_position);
 +          disk_position += blockSize(block_index);
 +        }
 +      } else {
 +        throw new RuntimeException("Got a used stream for header writing.");
 +      }
 +    }
 +
 +    private int headerSize() {
 +      // One integer for each data block, plus number of blocks and total size.
 +      return 4 * (onDiskOrder.size() + 2);
 +    }
 +
 +    private int blockSize(int block_index) {
 +      int block_address = memoryLookup.get(block_index);
 +      return (block_index < memoryLookup.size() - 1 ? memoryLookup.get(block_index + 1) : totalSize)
 +          - block_address;
 +    }
 +  }
 +
 +  class FeatureBuffer extends PackingBuffer<TreeMap<Integer, Float>> {
 +
 +    private IntEncoder idEncoder;
 +
 +    FeatureBuffer() throws IOException {
 +      super();
 +      idEncoder = types.getIdEncoder();
 +      LOG.info("Encoding feature ids in: {}", idEncoder.getKey());
 +    }
 +
 +    /**
 +     * Add a block of features to the buffer.
 +     * 
 +     * @param features TreeMap with the features for one rule.
 +     * @return The index of the resulting data block.
 +     */
 +    int add(TreeMap<Integer, Float> features) {
 +      int data_position = buffer.position();
 +
 +      // Over-estimate how much room this addition will need: for each
 +      // feature (ID_SIZE for label, "upper bound" of 4 for the value), plus ID_SIZE for
 +      // the number of features. If this won't fit, reallocate the buffer.
 +      int size_estimate = (4 + EncoderConfiguration.ID_SIZE) * features.size()
 +          + EncoderConfiguration.ID_SIZE;
 +      if (buffer.capacity() - buffer.position() <= size_estimate)
 +        reallocate();
 +
 +      // Write features to buffer.
 +      idEncoder.write(buffer, features.size());
 +      for (Integer k : features.descendingKeySet()) {
 +        float v = features.get(k);
 +        // Sparse features.
 +        if (v != 0.0) {
 +          idEncoder.write(buffer, k);
 +          encoderConfig.encoder(k).write(buffer, v);
 +        }
 +      }
 +      // Store position the block was written to.
 +      memoryLookup.add(data_position);
 +      // Update total size (in bytes).
 +      totalSize = buffer.position();
 +
 +      // Return block index.
 +      return memoryLookup.size() - 1;
 +    }
 +  }
 +
 +  class AlignmentBuffer extends PackingBuffer<byte[]> {
 +
 +    AlignmentBuffer() throws IOException {
 +      super();
 +    }
 +
 +    /**
 +     * Add a rule alignments to the buffer.
 +     * 
 +     * @param alignments a byte array with the alignment points for one rule.
 +     * @return The index of the resulting data block.
 +     */
 +    int add(byte[] alignments) {
 +      int data_position = buffer.position();
 +      int size_estimate = alignments.length + 1;
 +      if (buffer.capacity() - buffer.position() <= size_estimate)
 +        reallocate();
 +
 +      // Write alignment points to buffer.
 +      buffer.put((byte) (alignments.length / 2));
 +      buffer.put(alignments);
 +
 +      // Store position the block was written to.
 +      memoryLookup.add(data_position);
 +      // Update total size (in bytes).
 +      totalSize = buffer.position();
 +      // Return block index.
 +      return memoryLookup.size() - 1;
 +    }
 +  }
 +
 +  class PackingFileTuple implements Comparable<PackingFileTuple> {
 +    private File sourceFile;
 +    private File targetLookupFile;
 +    private File targetFile;
 +
 +    private File featureFile;
 +    private File alignmentFile;
 +
 +    PackingFileTuple(String prefix) {
 +      sourceFile = new File(output + File.separator + prefix + ".source");
 +      targetFile = new File(output + File.separator + prefix + ".target");
 +      targetLookupFile = new File(output + File.separator + prefix + ".target.lookup");
 +      featureFile = new File(output + File.separator + prefix + ".features");
 +
 +      alignmentFile = null;
 +      if (packAlignments)
 +        alignmentFile = new File(output + File.separator + prefix + ".alignments");
 +
 +      LOG.info("Allocated slice: {}", sourceFile.getAbsolutePath());
 +    }
 +
 +    DataOutputStream getSourceOutput() throws IOException {
 +      return getOutput(sourceFile);
 +    }
 +
 +    DataOutputStream getTargetOutput() throws IOException {
 +      return getOutput(targetFile);
 +    }
 +
 +    DataOutputStream getTargetLookupOutput() throws IOException {
 +      return getOutput(targetLookupFile);
 +    }
 +
 +    DataOutputStream getFeatureOutput() throws IOException {
 +      return getOutput(featureFile);
 +    }
 +
 +    DataOutputStream getAlignmentOutput() throws IOException {
 +      if (alignmentFile != null)
 +        return getOutput(alignmentFile);
 +      return null;
 +    }
 +
 +    private DataOutputStream getOutput(File file) throws IOException {
 +      if (file.createNewFile()) {
 +        return new DataOutputStream(new BufferedOutputStream(new FileOutputStream(file)));
 +      } else {
 +        throw new RuntimeException("File doesn't exist: " + file.getName());
 +      }
 +    }
 +
 +    long getSize() {
 +      return sourceFile.length() + targetFile.length() + featureFile.length();
 +    }
 +
 +    @Override
 +    public int compareTo(PackingFileTuple o) {
 +      if (getSize() > o.getSize()) {
 +        return -1;
 +      } else if (getSize() < o.getSize()) {
 +        return 1;
 +      } else {
 +        return 0;
 +      }
 +    }
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
index 7752785,0000000..cbe6a7f
mode 100644,000000..100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMBerkeleySentenceProbablityTest.java
@@@ -1,64 -1,0 +1,64 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff.lm.berkeley_lm;
 +
 +import edu.berkeley.nlp.lm.ArrayEncodedNgramLanguageModel;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.testng.annotations.AfterMethod;
 +import org.testng.annotations.Test;
 +
 +import static org.testng.Assert.assertTrue;
 +import static org.testng.Assert.assertFalse;
 +
 +import static org.testng.Assert.assertEquals;
 +
 +public class LMBerkeleySentenceProbablityTest {
 +
 +  @Test
 +  public void verifySentenceLogProbability() {
-     LMGrammarBerkeley grammar = new LMGrammarBerkeley(2, "resources/berkeley_lm/lm");
++    LMGrammarBerkeley grammar = new LMGrammarBerkeley(2, "src/test/resources/berkeley_lm/lm");
 +    grammar.registerWord("the", 2);
 +    grammar.registerWord("chat-rooms", 3);
 +    grammar.registerWord("<unk>", 0);
 +
 +    ArrayEncodedNgramLanguageModel<String> lm = grammar.getLM();
 +    float expected =
 +        lm.getLogProb(new int[] {}, 0, 0)
 +        + lm.getLogProb(new int[] {0}, 0, 1)
 +        + lm.getLogProb(new int[] {0, 2}, 0, 2)
 +        + lm.getLogProb(new int[] {2, 3}, 0, 2)
 +        + lm.getLogProb(new int[] {3, 0}, 0, 2);
 +
 +    float result = grammar.sentenceLogProbability(new int[] {0, 2, 3, 0}, 2, 0);
 +    assertEquals(expected, result, 0.0);
 +  }
 +  
 +  @Test
 +  public void givenUnknownWord_whenIsOov_thenCorrectlyDetected() {
-     LMGrammarBerkeley lm = new LMGrammarBerkeley(2, "resources/berkeley_lm/lm");
++    LMGrammarBerkeley lm = new LMGrammarBerkeley(2, "src/test/resources/berkeley_lm/lm");
 +    assertTrue(lm.isOov(Vocabulary.id("UNKNOWN_WORD")));
 +    assertFalse(lm.isOov(Vocabulary.id("chat-rooms")));
 +  }
 +  
 +  @AfterMethod
 +  public void tearDown() {
 +    Vocabulary.clear();
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
index cc4a94c,0000000..cf04a3d
mode 100644,000000..100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/berkeley_lm/LMGrammarBerkeleyTest.java
@@@ -1,83 -1,0 +1,83 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff.lm.berkeley_lm;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.Translation;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.testng.annotations.AfterMethod;
 +import org.testng.annotations.DataProvider;
 +import org.testng.annotations.Test;
 +
 +import static org.testng.Assert.assertEquals;
 +
 +/**
 + * Replacement for test/lm/berkeley/test.sh regression test
 + */
 +
 +public class LMGrammarBerkeleyTest {
 +
 +  private static final String INPUT = "the chat-rooms";
 +  private static final String EXPECTED_OUTPUT = "glue_0=-2.000000 lm_0=-7.152632\n";
 +  private static final String EXPECTED_OUTPUT_WITH_OOV = "glue_0=-2.000000 lm_0=-7.152632 lm_0_oov=0.000000\n";
 +  private static final String[] OPTIONS = "-v 1 -output-format %f".split(" ");
 +
 +  private JoshuaConfiguration joshuaConfig;
 +  private Decoder decoder;
 +
 +  @DataProvider(name = "languageModelFiles")
 +  public Object[][] lmFiles() {
-     return new Object[][]{{"resources/berkeley_lm/lm"},
-             {"resources/berkeley_lm/lm.gz"},
-             {"resources/berkeley_lm/lm.berkeleylm"},
-             {"resources/berkeley_lm/lm.berkeleylm.gz"}};
++    return new Object[][]{{"src/test/resources/berkeley_lm/lm"},
++            {"src/test/resources/berkeley_lm/lm.gz"},
++            {"src/test/resources/berkeley_lm/lm.berkeleylm"},
++            {"src/test/resources/berkeley_lm/lm.berkeleylm.gz"}};
 +  }
 +
 +  @AfterMethod
 +  public void tearDown() throws Exception {
 +    decoder.cleanUp();
 +  }
 +
 +  @Test(dataProvider = "languageModelFiles")
 +  public void verifyLM(String lmFile) {
 +    joshuaConfig = new JoshuaConfiguration();
 +    joshuaConfig.processCommandLineOptions(OPTIONS);
 +    joshuaConfig.features.add("LanguageModel -lm_type berkeleylm -lm_order 2 -lm_file " + lmFile);
 +    decoder = new Decoder(joshuaConfig, null);
 +    final String translation = decode(INPUT).toString();
 +    assertEquals(translation, EXPECTED_OUTPUT);
 +  }
 +
 +  private Translation decode(String input) {
 +    final Sentence sentence = new Sentence(input, 0, joshuaConfig);
 +    return decoder.decode(sentence);
 +  }
 +
 +  @Test
 +  public void givenLmWithOovFeature_whenDecoder_thenCorrectFeaturesReturned() {
 +    joshuaConfig = new JoshuaConfiguration();
 +    joshuaConfig.processCommandLineOptions(OPTIONS);
-     joshuaConfig.features.add("LanguageModel -lm_type berkeleylm -oov_feature -lm_order 2 -lm_file resources/berkeley_lm/lm");
++    joshuaConfig.features.add("LanguageModel -lm_type berkeleylm -oov_feature -lm_order 2 -lm_file src/test/resources/berkeley_lm/lm");
 +    decoder = new Decoder(joshuaConfig, null);
 +    final String translation = decode(INPUT).toString();
 +    assertEquals(translation, EXPECTED_OUTPUT_WITH_OOV);
 +  }
 +
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
index 0d7a9c4,0000000..2067f30
mode 100644,000000..100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/ff/lm/class_lm/ClassBasedLanguageModelTest.java
@@@ -1,77 -1,0 +1,77 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.ff.lm.class_lm;
 +
 +import static org.testng.Assert.assertEquals;
 +import static org.testng.Assert.assertTrue;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.ff.FeatureMap;
 +import org.apache.joshua.decoder.ff.FeatureVector;
 +import org.apache.joshua.decoder.ff.lm.LanguageModelFF;
 +import org.apache.joshua.decoder.ff.tm.OwnerMap;
 +import org.apache.joshua.decoder.ff.tm.Rule;
 +import org.apache.joshua.util.io.KenLmTestUtil;
 +import org.testng.annotations.AfterMethod;
 +import org.testng.annotations.BeforeMethod;
 +import org.testng.annotations.Test;
 +
 +/**
 + * This unit test relies on KenLM.  If the KenLM library is not found when the test is run all tests will be skipped.
 + */
 +public class ClassBasedLanguageModelTest {
 +
 +  private static final float WEIGHT = 0.5f;
 +
 +  private LanguageModelFF ff;
 +
 +  @BeforeMethod
 +  public void setUp() {
 +    Decoder.resetGlobalState();
 +
 +    FeatureVector weights = new FeatureVector(1);
 +    weights.put(FeatureMap.hashFeature("lm_0"), WEIGHT);
 +    String[] args = { "-lm_type", "kenlm", "-lm_order", "9",
-       "-lm_file", "./src/test/resources/lm/class_lm/class_lm_9gram.gz",
-       "-class_map", "./src/test/resources/lm/class_lm/class.map" };
++      "-lm_file", "src/test/resources/lm/class_lm/class_lm_9gram.gz",
++      "-class_map", "src/test/resources/lm/class_lm/class.map" };
 +
 +    JoshuaConfiguration config = new JoshuaConfiguration();
 +    KenLmTestUtil.Guard(() -> ff = new LanguageModelFF(weights, args, config));
 +  }
 +
 +  @AfterMethod
 +  public void tearDown() {
 +    Decoder.resetGlobalState();
 +  }
 +
 +  @Test
 +  public void givenLmDefinition_whenInitialized_thenInitializationIsCorrect() {
 +    assertTrue(ff.isClassLM());
 +    assertTrue(ff.isStateful());
 +  }
 +
 +  @Test
 +  public void givenRuleWithSingleWord_whenGetRuleId_thenIsMappedToClass() {
 +    final int[] target = Vocabulary.addAll(new String[] { "professionalism" });
 +    final Rule rule = new Rule(0, null, target, 0, new FeatureVector(0), null, OwnerMap.register(OwnerMap.UNKNOWN_OWNER));
 +    assertEquals(Vocabulary.word(ff.getRuleIds(rule)[0]), "13");
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
index 41569aa,0000000..f2cbe7f
mode 100644,000000..100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/kbest_extraction/KBestExtractionTest.java
@@@ -1,83 -1,0 +1,83 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.decoder.kbest_extraction;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.Translation;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.apache.joshua.util.io.KenLmTestUtil;
 +import org.testng.annotations.AfterMethod;
 +import org.testng.annotations.BeforeMethod;
 +import org.testng.annotations.Test;
 +import org.testng.reporters.Files;
 +
 +import java.io.File;
 +import java.io.IOException;
 +import java.nio.file.Path;
 +import java.nio.file.Paths;
 +
 +import static com.google.common.base.Charsets.UTF_8;
 +import static java.nio.file.Files.readAllBytes;
 +import static org.testng.Assert.assertEquals;
 +
 +/**
 + * Reimplements the kbest extraction regression test
 + * TODO (fhieber): this test strangely only works with StateMinimizing KenLM.
 + * This is to be investigated
 + */
 +
 +public class KBestExtractionTest {
 +
-   private static final String CONFIG = "resources/kbest_extraction/joshua.config";
++  private static final String CONFIG = "src/test/resources/kbest_extraction/joshua.config";
 +  private static final String INPUT = "a b c d e";
-   private static final Path GOLD_PATH = Paths.get("resources/kbest_extraction/output.scores.gold");
++  private static final Path GOLD_PATH = Paths.get("src/test/resources/kbest_extraction/output.scores.gold");
 +
 +  private JoshuaConfiguration joshuaConfig = null;
 +  private Decoder decoder = null;
 +
 +  @BeforeMethod
 +  public void setUp() throws Exception {
 +    //BROKEN
 +    joshuaConfig = new JoshuaConfiguration();
 +    joshuaConfig.readConfigFile(CONFIG);
 +    joshuaConfig.outputFormat = "%i ||| %s ||| %c";
 +    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
 +  }
 +
 +  @AfterMethod
 +  public void tearDown() throws Exception {
 +    decoder.cleanUp();
 +    decoder = null;
 +  }
 +
 +  @Test
 +  public void givenInput_whenKbestExtraction_thenOutputIsAsExpected() throws IOException {
 +    final String translation = decode(INPUT).toString();
 +    final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
 +    Files.writeFile(translation, new File("resources/kbest_extraction/output.actual"));
 +    assertEquals(translation, gold);
 +  }
 +
 +  private Translation decode(String input) {
 +    final Sentence sentence = new Sentence(input, 0, joshuaConfig);
 +    return decoder.decode(sentence);
 +  }
 +
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
index f2fc6a7,0000000..625fe0c
mode 100644,000000..100644
--- a/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/decoder/phrase/decode/PhraseDecodingTest.java
@@@ -1,75 -1,0 +1,90 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 + package org.apache.joshua.decoder.phrase.decode;
 +
- import static com.google.common.base.Charsets.UTF_8;
- import static java.nio.file.Files.readAllBytes;
 +import static org.testng.Assert.assertEquals;
 +
 +import java.io.IOException;
- import java.nio.file.Path;
- import java.nio.file.Paths;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.Translation;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.apache.joshua.util.io.KenLmTestUtil;
 +import org.testng.annotations.AfterMethod;
 +import org.testng.annotations.BeforeMethod;
 +import org.testng.annotations.Test;
 +
 +/**
 + * Reimplements the constrained phrase decoding test
 + */
 +public class PhraseDecodingTest {
 +
-   private static final String CONFIG = "resources/phrase_decoder/config";
++  private static final String CONFIG = "src/test/resources/phrase_decoder/config";
 +  private static final String INPUT = "una estrategia republicana para obstaculizar la reelecci�n de Obama";
-   private static final Path GOLD_PATH = Paths.get("resources/phrase_decoder/output.gold");
- 
++  private static final String OUTPUT = "0 ||| a strategy republican to hinder reelection Obama ||| tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 OOVPenalty=0.000 WordPenalty=-3.040 Distortion=0.000 PhrasePenalty=5.000 ||| -7.496";
++  private static final String OUTPUT_WITH_ALIGNMENTS = "0 ||| a strategy |0-1| republican |2-2| to hinder |3-4| reelection |5-6| Obama |7-8| ||| tm_pt_0=-9.702 tm_pt_1=-10.800 tm_pt_2=-7.543 tm_pt_3=-8.555 lm_0=-19.117 OOVPenalty=0.000 WordPenalty=-3.040 Distortion=0.000 PhrasePenalty=5.000 ||| -7.496";
++  
 +  private JoshuaConfiguration joshuaConfig = null;
 +  private Decoder decoder = null;
 +
 +  @BeforeMethod
 +  public void setUp() throws Exception {
 +    joshuaConfig = new JoshuaConfiguration();
 +    joshuaConfig.readConfigFile(CONFIG);
 +    KenLmTestUtil.Guard(() -> decoder = new Decoder(joshuaConfig, ""));
 +  }
 +
 +  @AfterMethod
 +  public void tearDown() throws Exception {
 +    decoder.cleanUp();
 +    decoder = null;
 +  }
 +
-   @Test(enabled = false)
++  @Test(enabled = true)
 +  public void givenInput_whenPhraseDecoding_thenOutputIsAsExpected() throws IOException {
-     final String translation = decode(INPUT).toString();
-     final String gold = new String(readAllBytes(GOLD_PATH), UTF_8);
-     assertEquals(gold, translation);
++    final String translation = decode(INPUT).toString().trim();
++    final String gold = OUTPUT;
++    assertEquals(translation, gold);
++  }
++  
++  @Test(enabled = false)
++  public void givenInput_whenPhraseDecodingWithAlignments_thenOutputHasAlignments() throws IOException {
++    final String translation = decode(INPUT).toString().trim();
++    final String gold = OUTPUT_WITH_ALIGNMENTS;
++    assertEquals(translation, gold);
++  }
++  
++  @Test(enabled = true)
++  public void givenInput_whenPhraseDecoding_thenInputCanBeRetrieved() throws IOException {
++    String outputFormat = joshuaConfig.outputFormat;
++    joshuaConfig.outputFormat = "%e";
++    final String translation = decode(INPUT).toString().trim();
++    joshuaConfig.outputFormat = outputFormat;
++    final String gold = INPUT;
++    assertEquals(translation, gold);
 +  }
 +
 +  private Translation decode(String input) {
 +    final Sentence sentence = new Sentence(input, 0, joshuaConfig);
++//    joshuaConfig.setVerbosity(2);
 +    return decoder.decode(sentence);
 +  }
 +
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/test/java/org/apache/joshua/system/KenLmTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/system/KenLmTest.java
index 40514cd,0000000..74baef3
mode 100644,000000..100644
--- a/joshua-core/src/test/java/org/apache/joshua/system/KenLmTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/system/KenLmTest.java
@@@ -1,100 -1,0 +1,100 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.system;
 +
 +import org.apache.joshua.corpus.Vocabulary;
 +import org.apache.joshua.decoder.ff.lm.KenLM;
 +import org.apache.joshua.util.io.KenLmTestUtil;
 +import org.testng.annotations.AfterMethod;
 +import org.testng.annotations.BeforeMethod;
 +import org.testng.annotations.Test;
 +
 +import static org.apache.joshua.corpus.Vocabulary.registerLanguageModel;
 +import static org.apache.joshua.corpus.Vocabulary.unregisterLanguageModels;
 +import static org.testng.Assert.assertTrue;
 +import static org.testng.AssertJUnit.assertEquals;
 +import static org.testng.AssertJUnit.assertFalse;
 +
 +/**
 + * KenLM JNI interface tests.
 + * Loads libken.{so,dylib}.
 + * If run in Eclipse, add -Djava.library.path=./lib to JVM arguments
 + * of the run configuration.
 + */
 +
 +public class KenLmTest {
 +
-   private static final String LANGUAGE_MODEL_PATH = "resources/kenlm/oilers.kenlm";
++  private static final String LANGUAGE_MODEL_PATH = "src/test/resources/kenlm/oilers.kenlm";
 +  private KenLM kenLm;
 +
 +  @Test
 +  public void givenKenLm_whenQueryingForNgramProbability_thenProbIsCorrect() {
 +    // GIVEN
 +    KenLmTestUtil.Guard(() -> kenLm = new KenLM(3, LANGUAGE_MODEL_PATH));
 +
 +    int[] words = Vocabulary.addAll("Wayne Gretzky");
 +    registerLanguageModel(kenLm);
 +
 +    // WHEN
 +    float probability = kenLm.prob(words);
 +
 +    // THEN
 +    assertEquals("Found the wrong probability for 2-gram \"Wayne Gretzky\"", -0.99f, probability,
 +            Float.MIN_VALUE);
 +  }
 +
 +  @Test
 +  public void givenKenLm_whenQueryingForNgramProbability_thenIdAndStringMethodsReturnTheSame() {
 +    // GIVEN
 +    KenLmTestUtil.Guard(() -> kenLm = new KenLM(LANGUAGE_MODEL_PATH));
 +
 +    registerLanguageModel(kenLm);
 +    String sentence = "Wayne Gretzky";
 +    String[] words = sentence.split("\\s+");
 +    int[] ids = Vocabulary.addAll(sentence);
 +
 +    // WHEN
 +    float prob_string = kenLm.prob(words);
 +    float prob_id = kenLm.prob(ids);
 +
 +    // THEN
 +    assertEquals("ngram probabilities differ for word and id based n-gram query", prob_string, prob_id,
 +            Float.MIN_VALUE);
 +
 +  }
 +
 +  @Test
 +  public void givenKenLm_whenIsKnownWord_thenReturnValuesAreCorrect() {
 +    KenLmTestUtil.Guard(() -> kenLm = new KenLM(LANGUAGE_MODEL_PATH));
 +    assertTrue(kenLm.isKnownWord("Wayne"));
 +    assertFalse(kenLm.isKnownWord("Wayne2222"));
 +  }
 +
 +  @BeforeMethod
 +  public void setUp() throws Exception {
 +    Vocabulary.clear();
 +    unregisterLanguageModels();
 +  }
 +
 +  @AfterMethod
 +  public void tearDown() throws Exception {
 +    Vocabulary.clear();
 +    unregisterLanguageModels();
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java
index 9e2f622,0000000..84789ce
mode 100644,000000..100644
--- a/joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/system/LmOovFeatureTest.java
@@@ -1,76 -1,0 +1,76 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 + package org.apache.joshua.system;
 +
 +import static org.testng.Assert.assertEquals;
 +
 +import java.io.IOException;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.Translation;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.testng.annotations.AfterMethod;
 +import org.testng.annotations.BeforeMethod;
 +import org.testng.annotations.Test;
 +
 +public class LmOovFeatureTest {
 +
-   private static final String CONFIG = "resources/lm_oov/joshua.config";
++  private static final String CONFIG = "src/test/resources/lm_oov/joshua.config";
 +  private static final String INPUT = "a chat-rooms full";
 +  // expecting 2 lm oovs ('a' & 'full') and 2 grammar OOVs ('chat-rooms' & 'full') and score -198.000
 +  private static final String EXPECTED_FEATURES = "lm_0=-206.718124 lm_0_oov=2.000000 OOVPenalty=-200.000000 pt_0=2.000000 glue_0=-3.000000 | -198.000";
 +
 +  private JoshuaConfiguration joshuaConfig = null;
 +  private Decoder decoder = null;
 +
 +  @BeforeMethod
 +  public void setUp() throws Exception {
 +    joshuaConfig = new JoshuaConfiguration();
 +    joshuaConfig.readConfigFile(CONFIG);
 +    joshuaConfig.outputFormat = "%f | %c";
 +    decoder = new Decoder(joshuaConfig, "");
 +  }
 +
 +  @AfterMethod
 +  public void tearDown() throws Exception {
 +    decoder.cleanUp();
 +    decoder = null;
 +  }
 +
 +  @Test
 +  public void givenInputWithDifferentOovTypes_whenDecode_thenFeaturesAreAsExpected() throws IOException {
 +    final String translation = decode(INPUT).toString().trim();
 +    System.out.println(translation);
 +    assertEquals(translation, EXPECTED_FEATURES);
 +  }
 +
 +  private Translation decode(String input) {
 +    final Sentence sentence = new Sentence(input, 0, joshuaConfig);
 +    return decoder.decode(sentence);
 +  }
 +  
 +  public static void main(String[] args) throws Exception {
 +    
 +    LmOovFeatureTest test = new LmOovFeatureTest();
 +    test.setUp();
 +    test.givenInputWithDifferentOovTypes_whenDecode_thenFeaturesAreAsExpected();
 +    test.tearDown();
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
index 092dbc6,0000000..7b1c47f
mode 100644,000000..100644
--- a/joshua-core/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
+++ b/joshua-core/src/test/java/org/apache/joshua/system/MultithreadedTranslationTests.java
@@@ -1,155 -1,0 +1,155 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.system;
 +
 +import java.io.BufferedReader;
 +import java.io.ByteArrayInputStream;
 +import java.io.ByteArrayOutputStream;
 +import java.io.IOException;
 +import java.io.InputStreamReader;
 +import java.nio.charset.Charset;
 +import java.util.ArrayList;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.Translation;
 +import org.apache.joshua.decoder.Translations;
 +import org.apache.joshua.decoder.io.TranslationRequestStream;
 +import org.testng.annotations.AfterMethod;
 +import org.testng.annotations.BeforeMethod;
 +import org.testng.annotations.Test;
 +
 +import static org.testng.Assert.assertTrue;
 +
 +/**
 + * Integration test for multithreaded Joshua decoder tests. Grammar used is a
 + * toy packed grammar.
 + *
 + * @author kellens
 + */
 +public class MultithreadedTranslationTests {
 +
 +  private JoshuaConfiguration joshuaConfig = null;
 +  private Decoder decoder = null;
 +  private static final String INPUT = "A K B1 U Z1 Z2 B2 C";
 +  private int previousLogLevel;
 +  private final static long NANO_SECONDS_PER_SECOND = 1_000_000_000;
 +
 +  @BeforeMethod
 +  public void setUp() throws Exception {
 +    joshuaConfig = new JoshuaConfiguration();
 +    joshuaConfig.search_algorithm = "cky";
 +    joshuaConfig.mark_oovs = false;
 +    joshuaConfig.pop_limit = 100;
 +    joshuaConfig.use_unique_nbest = false;
 +    joshuaConfig.include_align_index = false;
 +    joshuaConfig.topN = 0;
-     joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path resources/wa_grammar.packed");
-     joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path resources/grammar.glue");
++    joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path src/test/resources/wa_grammar.packed");
++    joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path src/test/resources/grammar.glue");
 +    joshuaConfig.goal_symbol = "[GOAL]";
 +    joshuaConfig.default_non_terminal = "[X]";
 +    joshuaConfig.features.add("OOVPenalty");
 +    joshuaConfig.weights.add("tm_pt_0 1");
 +    joshuaConfig.weights.add("tm_pt_1 1");
 +    joshuaConfig.weights.add("tm_pt_2 1");
 +    joshuaConfig.weights.add("tm_pt_3 1");
 +    joshuaConfig.weights.add("tm_pt_4 1");
 +    joshuaConfig.weights.add("tm_pt_5 1");
 +    joshuaConfig.weights.add("tm_glue_0 1");
 +    joshuaConfig.weights.add("OOVPenalty 2");
 +    joshuaConfig.num_parallel_decoders = 500; // This will enable 500 parallel
 +                                              // decoders to run at once.
 +                                              // Useful to help flush out
 +                                              // concurrency errors in
 +                                              // underlying
 +                                              // data-structures.
 +    this.decoder = new Decoder(joshuaConfig, ""); // Second argument
 +                                                  // (configFile)
 +                                                  // is not even used by the
 +                                                  // constructor/initialize.
 +
 +    previousLogLevel = Decoder.VERBOSE;
 +    Decoder.VERBOSE = 0;
 +  }
 +
 +  @AfterMethod
 +  public void tearDown() throws Exception {
 +    this.decoder.cleanUp();
 +    this.decoder = null;
 +    Decoder.VERBOSE = previousLogLevel;
 +  }
 +
 +
 +
 +  // This test was created specifically to reproduce a multithreaded issue
 +  // related to mapped byte array access in the PackedGrammer getAlignmentArray
 +  // function.
 +
 +  // We'll test the decoding engine using N = 10,000 identical inputs. This
 +  // should be sufficient to induce concurrent data access for many shared
 +  // data structures.
 +
 +  @Test
 +  public void givenPackedGrammar_whenNTranslationsCalledConcurrently_thenReturnNResults() throws IOException {
 +    // GIVEN
 +
 +    int inputLines = 10000;
 +    joshuaConfig.use_structured_output = true; // Enabled alignments.
 +    StringBuilder sb = new StringBuilder();
 +    for (int i = 0; i < inputLines; i++) {
 +      sb.append(INPUT + "\n");
 +    }
 +
 +    // Append a large string together to simulate N requests to the decoding
 +    // engine.
 +    TranslationRequestStream req = new TranslationRequestStream(
 +        new BufferedReader(new InputStreamReader(new ByteArrayInputStream(sb.toString()
 +        .getBytes(Charset.forName("UTF-8"))))), joshuaConfig);
 +    
 +    ByteArrayOutputStream output = new ByteArrayOutputStream();
 +
 +    // WHEN
 +    // Translate all spans in parallel.
 +    Translations translations = this.decoder.decodeAll(req);
 +
 +    ArrayList<Translation> translationResults = new ArrayList<Translation>();
 +
 +
 +    final long translationStartTime = System.nanoTime();
 +    try {
 +      for (Translation t: translations)
 +        translationResults.add(t);
 +    } finally {
 +      if (output != null) {
 +        try {
 +          output.close();
 +        } catch (IOException e) {
 +          e.printStackTrace();
 +        }
 +      }
 +    }
 +
 +    final long translationEndTime = System.nanoTime();
 +    final double pipelineLoadDurationInSeconds = (translationEndTime - translationStartTime) / ((double)NANO_SECONDS_PER_SECOND);
 +    System.err.println(String.format("%.2f seconds", pipelineLoadDurationInSeconds));
 +
 +    // THEN
 +    assertTrue(translationResults.size() == inputLines);
 +  }
 +}

http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/dc756709/joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
----------------------------------------------------------------------
diff --cc joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
index 0cc8721,0000000..1c9a6fe
mode 100644,000000..100644
--- a/joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
+++ b/joshua-core/src/test/java/org/apache/joshua/system/StructuredOutputTest.java
@@@ -1,114 -1,0 +1,114 @@@
 +/*
 + * Licensed to the Apache Software Foundation (ASF) under one
 + * or more contributor license agreements.  See the NOTICE file
 + * distributed with this work for additional information
 + * regarding copyright ownership.  The ASF licenses this file
 + * to you under the Apache License, Version 2.0 (the
 + * "License"); you may not use this file except in compliance
 + * with the License.  You may obtain a copy of the License at
 + *
 + *  http://www.apache.org/licenses/LICENSE-2.0
 + *
 + * Unless required by applicable law or agreed to in writing,
 + * software distributed under the License is distributed on an
 + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
 + * KIND, either express or implied.  See the License for the
 + * specific language governing permissions and limitations
 + * under the License.
 + */
 +package org.apache.joshua.system;
 +
 +import java.util.Arrays;
 +import java.util.List;
 +
 +import org.apache.joshua.decoder.Decoder;
 +import org.apache.joshua.decoder.JoshuaConfiguration;
 +import org.apache.joshua.decoder.Translation;
 +import org.apache.joshua.decoder.segment_file.Sentence;
 +import org.testng.Assert;
 +import org.testng.annotations.AfterMethod;
 +import org.testng.annotations.BeforeMethod;
 +import org.testng.annotations.Test;
 +
 +/**
 + * Integration test for the complete Joshua decoder using a toy grammar that translates
 + * a bunch of capital letters to lowercase letters. Rules in the test grammar
 + * drop and generate additional words and simulate reordering of rules, so that
 + * proper extraction of word alignments can be tested.
 + *
 + * @author fhieber
 + */
 +public class StructuredOutputTest {
 +
 +  private JoshuaConfiguration joshuaConfig = null;
 +  private Decoder decoder = null;
 +  private Translation translation = null;
 +  private static final String input = "A K B1 U Z1 Z2 B2 C";
 +  private static final String expectedTranslation = "a b n1 u z c1 k1 k2 k3 n1 n2 n3 c2";
 +  private static final String expectedWordAlignmentString = "0-0 2-1 6-1 3-3 4-4 5-4 7-5 1-6 1-7 1-8 7-12";
 +  private static final List<List<Integer>> expectedWordAlignment = Arrays.asList(
 +      Arrays.asList(0), Arrays.asList(2, 6), Arrays.asList(), Arrays.asList(3),
 +      Arrays.asList(4, 5), Arrays.asList(7), Arrays.asList(1),
 +      Arrays.asList(1), Arrays.asList(1), Arrays.asList(), Arrays.asList(),
 +      Arrays.asList(), Arrays.asList(7));
 +  private static final double expectedScore = -17.0;
 +
 +  @BeforeMethod
 +  public void setUp() throws Exception {
 +    joshuaConfig = new JoshuaConfiguration();
 +    joshuaConfig.search_algorithm = "cky";
 +    joshuaConfig.mark_oovs = false;
 +    joshuaConfig.pop_limit = 100;
 +    joshuaConfig.use_unique_nbest = false;
 +    joshuaConfig.include_align_index = false;
 +    joshuaConfig.topN = 0;
-     joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path resources/wa_grammar");
-     joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path resources/grammar.glue");
++    joshuaConfig.tms.add("thrax -owner pt -maxspan 20 -path src/test/resources/wa_grammar");
++    joshuaConfig.tms.add("thrax -owner glue -maxspan -1 -path src/test/resources/grammar.glue");
 +    joshuaConfig.goal_symbol = "[GOAL]";
 +    joshuaConfig.default_non_terminal = "[X]";
 +    joshuaConfig.features.add("OOVPenalty");
 +    joshuaConfig.weights.add("pt_0 -1");
 +    joshuaConfig.weights.add("pt_1 -1");
 +    joshuaConfig.weights.add("pt_2 -1");
 +    joshuaConfig.weights.add("pt_3 -1");
 +    joshuaConfig.weights.add("pt_4 -1");
 +    joshuaConfig.weights.add("pt_5 -1");
 +    joshuaConfig.weights.add("glue_0 -1");
 +    joshuaConfig.weights.add("OOVPenalty 2");
 +    decoder = new Decoder(joshuaConfig, ""); // second argument (configFile
 +                                             // is not even used by the
 +                                             // constructor/initialize)
 +  }
 +
 +  @AfterMethod
 +  public void tearDown() throws Exception {
 +    decoder.cleanUp();
 +    decoder = null;
 +    translation = null;
 +  }
 +
 +  private Translation decode(String input) {
 +    Sentence sentence = new Sentence(input, 0, joshuaConfig);
 +    return decoder.decode(sentence);
 +  }
 +
 +  @Test
 +  public void test() {
 +
 +    // test standard output
 +    joshuaConfig.use_structured_output = false;
 +    joshuaConfig.outputFormat = "%s | %a ";
 +    translation = decode(input);
 +    Assert.assertEquals(translation.toString().trim(), expectedTranslation + " | " + expectedWordAlignmentString);
 +
 +    // test structured output
 +    joshuaConfig.use_structured_output = true; // set structured output creation to true
 +    translation = decode(input);
 +    Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationString(), expectedTranslation);
 +    Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationTokens(), Arrays.asList(expectedTranslation.split("\\s+")));
 +    Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationScore(), expectedScore, 0.00001);
 +    Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationWordAlignments(), expectedWordAlignment);
 +    Assert.assertEquals(translation.getStructuredTranslations().get(0).getTranslationWordAlignments().size(), translation
 +        .getStructuredTranslations().get(0).getTranslationTokens().size());
 +  }
 +}