You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@joshua.apache.org by mj...@apache.org on 2016/04/02 00:56:30 UTC
[07/10] incubator-joshua git commit: Clean up Slice constructor,
Fully loading source tries, lazy loading other structures
Clean up Slice constructor, Fully loading source tries, lazy loading other structures
Project: http://git-wip-us.apache.org/repos/asf/incubator-joshua/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-joshua/commit/9448ba55
Tree: http://git-wip-us.apache.org/repos/asf/incubator-joshua/tree/9448ba55
Diff: http://git-wip-us.apache.org/repos/asf/incubator-joshua/diff/9448ba55
Branch: refs/heads/master
Commit: 9448ba552cd03bacad81eb4b9b5e900db360c00e
Parents: 2cc9996
Author: Kellen Sunderland <ke...@amazon.com>
Authored: Tue Mar 29 17:23:23 2016 +0200
Committer: Kellen Sunderland <ke...@amazon.com>
Committed: Thu Mar 31 10:44:43 2016 +0200
----------------------------------------------------------------------
.../decoder/ff/tm/packed/PackedGrammar.java | 147 ++++++++++---------
1 file changed, 75 insertions(+), 72 deletions(-)
----------------------------------------------------------------------
http://git-wip-us.apache.org/repos/asf/incubator-joshua/blob/9448ba55/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
----------------------------------------------------------------------
diff --git a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
index 18aa60e..792a7ad 100644
--- a/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
+++ b/src/joshua/decoder/ff/tm/packed/PackedGrammar.java
@@ -38,15 +38,13 @@ package joshua.decoder.ff.tm.packed;
import static java.util.Collections.sort;
-import java.io.BufferedInputStream;
-import java.io.DataInputStream;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.IOException;
import java.io.InputStream;
-import java.io.RandomAccessFile;
import java.nio.BufferUnderflowException;
+import java.nio.ByteBuffer;
import java.nio.IntBuffer;
import java.nio.MappedByteBuffer;
import java.nio.channels.FileChannel;
@@ -75,7 +73,6 @@ import joshua.decoder.ff.tm.Rule;
import joshua.decoder.ff.tm.RuleCollection;
import joshua.decoder.ff.tm.Trie;
import joshua.decoder.ff.tm.hash_based.ExtensionIterator;
-import joshua.decoder.ff.tm.packed.SliceAggregatingTrie;
import joshua.util.encoding.EncoderConfiguration;
import joshua.util.encoding.FloatEncoder;
import joshua.util.io.LineReader;
@@ -322,20 +319,15 @@ public class PackedGrammar extends AbstractGrammar {
private final String name;
private final int[] source;
+ private final IntBuffer target;
+ private final ByteBuffer features;
+ private final ByteBuffer alignments;
- private final int[] target;
private final int[] targetLookup;
-
- private MappedByteBuffer features;
private int featureSize;
private int[] featureLookup;
- private RandomAccessFile featureFile;
-
private float[] estimated;
private float[] precomputable;
-
- private RandomAccessFile alignmentFile;
- private MappedByteBuffer alignments;
private int[] alignmentLookup;
/**
@@ -352,81 +344,92 @@ public class PackedGrammar extends AbstractGrammar {
File feature_file = new File(prefix + ".features");
File alignment_file = new File(prefix + ".alignments");
- // Get the channels etc.
- FileInputStream source_fis = new FileInputStream(source_file);
- FileChannel source_channel = source_fis.getChannel();
- int source_size = (int) source_channel.size();
-
- FileInputStream target_fis = new FileInputStream(target_file);
- FileChannel target_channel = target_fis.getChannel();
- int target_size = (int) target_channel.size();
+ source = fullyLoadFileToArray(source_file);
+ // First int specifies the size of this file, load from 1st int on
+ targetLookup = fullyLoadFileToArray(target_lookup_file, 1);
- featureFile = new RandomAccessFile(feature_file, "r");
- FileChannel feature_channel = featureFile.getChannel();
- int feature_size = (int) feature_channel.size();
+ target = associateMemoryMappedFile(target_file).asIntBuffer();
+ features = associateMemoryMappedFile(feature_file);
+ initializeFeatureStructures();
- IntBuffer source_buffer = source_channel.map(MapMode.READ_ONLY, 0, source_size).asIntBuffer();
- source = new int[source_size / 4];
- source_buffer.get(source);
- source_fis.close();
-
- IntBuffer target_buffer = target_channel.map(MapMode.READ_ONLY, 0, target_size).asIntBuffer();
- target = new int[target_size / 4];
- target_buffer.get(target);
- target_fis.close();
-
- features = feature_channel.map(MapMode.READ_ONLY, 0, feature_size);
- features.load();
-
if (alignment_file.exists()) {
- alignmentFile = new RandomAccessFile(alignment_file, "r");
- FileChannel alignment_channel = alignmentFile.getChannel();
- int alignment_size = (int) alignment_channel.size();
- alignments = alignment_channel.map(MapMode.READ_ONLY, 0, alignment_size);
- alignments.load();
-
- int num_blocks = alignments.getInt(0);
- alignmentLookup = new int[num_blocks];
- int header_pos = 8;
- for (int i = 0; i < num_blocks; i++) {
- alignmentLookup[i] = alignments.getInt(header_pos);
- header_pos += 4;
- }
+ alignments = associateMemoryMappedFile(alignment_file);
+ alignmentLookup = parseLookups(alignments);
} else {
alignments = null;
}
+ tries = new HashMap<Integer, PackedTrie>();
+ }
+
+ /**
+ * Helper function to help create all the structures which describe features
+ * in the Slice. Only called during object construction.
+ */
+ private void initializeFeatureStructures(){
int num_blocks = features.getInt(0);
- featureLookup = new int[num_blocks];
estimated = new float[num_blocks];
precomputable = new float[num_blocks];
+ Arrays.fill(estimated, Float.NEGATIVE_INFINITY);
+ Arrays.fill(precomputable, Float.NEGATIVE_INFINITY);
+ featureLookup = parseLookups(features);
featureSize = features.getInt(4);
- int header_pos = 8;
- for (int i = 0; i < num_blocks; i++) {
- featureLookup[i] = features.getInt(header_pos);
- estimated[i] = Float.NEGATIVE_INFINITY;
- precomputable[i] = Float.NEGATIVE_INFINITY;
- header_pos += 4;
+ }
+
+ // TOOD: (kellens) see if we can remove these lookups as they're addressed
+ // predictably into already present data structures. Are they redundant?
+ /**
+ * Build lookup arrays for various buffers (features / alignments) Typically
+ * this is copying out some relevant information from a larger byte array
+ *
+ * @param buffer
+ * the buffer parsed to find sub-elements
+ * @return an int array which can easily be accessed to find lookup values.
+ */
+ private int[] parseLookups(ByteBuffer buffer) {
+ int numBlocks = buffer.getInt(0);
+ int[] result = new int[numBlocks];
+ int headerPosition = 8;
+ for (int i = 0; i < numBlocks; i++) {
+ result[i] = buffer.getInt(headerPosition);
+ headerPosition += 4;
}
+ return result;
+ }
- DataInputStream target_lookup_stream = new DataInputStream(new BufferedInputStream(
- new FileInputStream(target_lookup_file)));
- targetLookup = new int[target_lookup_stream.readInt()];
- for (int i = 0; i < targetLookup.length; i++)
- targetLookup[i] = target_lookup_stream.readInt();
- target_lookup_stream.close();
+ private int[] fullyLoadFileToArray(File file) throws IOException {
+ return fullyLoadFileToArray(file, 0);
+ }
- tries = new HashMap<Integer, PackedTrie>();
+ /**
+ * This function will use a bulk loading method to fully populate a target
+ * array from file.
+ *
+ * @param file
+ * File that will be read from disk.
+ * @param startIndex
+ * an offset into the read file.
+ * @return an int array of size length(file) - offset containing ints in the
+ * file.
+ * @throws IOException
+ */
+ private int[] fullyLoadFileToArray(File file, int startIndex) throws IOException {
+ IntBuffer buffer = associateMemoryMappedFile(file).asIntBuffer();
+ int size = (int) (file.length() - (4 * startIndex))/4;
+ int[] result = new int[size];
+ buffer.position(startIndex);
+ buffer.get(result, 0, size);
+ return result;
}
- @SuppressWarnings("unused")
- private final Object guardian = new Object() {
- @Override
- // Finalizer object to ensure feature file handle get closed upon slice's dismissal.
- protected void finalize() throws Throwable {
- featureFile.close();
+ private ByteBuffer associateMemoryMappedFile(File file) throws IOException {
+ try(FileInputStream fileInputStream = new FileInputStream(file)) {
+ FileChannel fileChannel = fileInputStream.getChannel();
+ int size = (int) fileChannel.size();
+ MappedByteBuffer result = fileChannel.map(MapMode.READ_ONLY, 0, size);
+ return result;
}
- };
+ }
private final int[] getTarget(int pointer) {
// Figure out level.
@@ -437,9 +440,9 @@ public class PackedGrammar extends AbstractGrammar {
int index = 0;
int parent;
do {
- parent = target[pointer];
+ parent = target.get(pointer);
if (parent != -1)
- tgt[index++] = target[pointer + 1];
+ tgt[index++] = target.get(pointer + 1);
pointer = parent;
} while (pointer != -1);
return tgt;