You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by fr...@apache.org on 2017/06/29 14:59:16 UTC
svn commit: r1800290 -
/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
Author: frm
Date: Thu Jun 29 14:59:16 2017
New Revision: 1800290
URL: http://svn.apache.org/viewvc?rev=1800290&view=rev
Log:
OAK-6405 - Improve JavaDoc in TarReader
Modified:
jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java?rev=1800290&r1=1800289&r2=1800290&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/tar/TarReader.java Thu Jun 29 14:59:16 2017
@@ -70,7 +70,6 @@ import org.slf4j.LoggerFactory;
class TarReader implements Closeable {
- /** Logger instance */
private static final Logger log = LoggerFactory.getLogger(TarReader.class);
/**
@@ -95,19 +94,22 @@ class TarReader implements Closeable {
}
/**
- * Creates a TarReader instance for reading content from a tar file.
- * If there exist multiple generations of the same tar file, they are
- * all passed to this method. The latest generation with a valid tar
- * index (which is a good indication of general validity of the file)
- * is opened and the other generations are removed to clean things up.
- * If none of the generations has a valid index, then something must have
- * gone wrong and we'll try recover as much content as we can from the
- * existing tar generations.
- *
- * @param files
- * @param memoryMapping
- * @return
- * @throws IOException
+ * Creates a {@link TarReader} instance for reading content from a tar file.
+ * If there exist multiple generations of the same tar file, they are all
+ * passed to this method. The latest generation with a valid tar index
+ * (which is a good indication of general validity of the file) is opened
+ * and the other generations are removed to clean things up. If none of the
+ * generations has a valid index, then something must have gone wrong and
+ * we'll try recover as much content as we can from the existing tar
+ * generations.
+ *
+ * @param files The generations of the same TAR file.
+ * @param memoryMapping If {@code true}, opens the TAR file with memory
+ * mapping enabled.
+ * @param recovery Strategy for recovering a damaged TAR file.
+ * @param ioMonitor Callbacks to track internal operations for the open
+ * TAR file.
+ * @return An instance of {@link TarReader}.
*/
static TarReader open(Map<Character, File> files, boolean memoryMapping, TarRecovery recovery, IOMonitor ioMonitor) throws IOException {
SortedMap<Character, File> sorted = newTreeMap();
@@ -171,15 +173,13 @@ class TarReader implements Closeable {
/**
* Collects all entries from the given file and optionally backs-up the
* file, by renaming it to a ".bak" extension
- *
- * @param file
- * @param entries
- * @param backup
- * @throws IOException
+ *
+ * @param file The TAR file.
+ * @param entries The map where the recovered entries will be collected
+ * into.
+ * @param backup If {@code true}, performs a backup of the TAR file.
*/
- private static void collectFileEntries(File file,
- LinkedHashMap<UUID, byte[]> entries, boolean backup)
- throws IOException {
+ private static void collectFileEntries(File file, LinkedHashMap<UUID, byte[]> entries, boolean backup) throws IOException {
log.info("Recovering segments from tar file {}", file);
try {
RandomAccessFile access = new RandomAccessFile(file, "r");
@@ -199,10 +199,13 @@ class TarReader implements Closeable {
/**
* Regenerates a tar file from a list of entries.
- *
- * @param entries
- * @param file
- * @throws IOException
+ *
+ * @param entries Map of entries to recover. The entries will be recovered
+ * in the iteration order of this {@link LinkedHashMap}.
+ * @param file The output file that will contain the recovered
+ * entries.
+ * @param recovery The recovery strategy to execute.
+ * @param ioMonitor An instance of {@link IOMonitor}.
*/
private static void generateTarFile(LinkedHashMap<UUID, byte[]> entries, File file, TarRecovery recovery, IOMonitor ioMonitor) throws IOException {
log.info("Regenerating tar file {}", file);
@@ -236,12 +239,11 @@ class TarReader implements Closeable {
}
/**
- * Backup this tar file for manual inspection. Something went
- * wrong earlier so we want to prevent the data from being
- * accidentally removed or overwritten.
+ * Backup this tar file for manual inspection. Something went wrong earlier
+ * so we want to prevent the data from being accidentally removed or
+ * overwritten.
*
- * @param file
- * @throws IOException
+ * @param file File to backup.
*/
private static void backupSafely(File file) throws IOException {
File backup = findAvailGen(file, ".bak");
@@ -259,8 +261,9 @@ class TarReader implements Closeable {
/**
* Fine next available generation number so that a generated file doesn't
* overwrite another existing file.
- *
- * @param file
+ *
+ * @param file The file to backup.
+ * @param ext The extension of the backed up file.
*/
private static File findAvailGen(File file, String ext) {
File parent = file.getParentFile();
@@ -331,17 +334,16 @@ class TarReader implements Closeable {
/**
* Tries to read an existing index from the given tar file. The index is
- * returned if it is found and looks valid (correct checksum, passes
- * sanity checks).
+ * returned if it is found and looks valid (correct checksum, passes sanity
+ * checks).
*
- * @param file tar file
- * @param name name of the tar file, for logging purposes
- * @return tar index, or {@code null} if not found or not valid
- * @throws IOException if the tar file could not be read
+ * @param file The TAR file.
+ * @param name Name of the TAR file, for logging purposes.
+ * @return An instance of {@link ByteBuffer} populated with the content of
+ * the index. If the TAR doesn't contain any index, {@code null} is returned
+ * instead.
*/
- private static ByteBuffer loadAndValidateIndex(
- RandomAccessFile file, String name)
- throws IOException {
+ private static ByteBuffer loadAndValidateIndex(RandomAccessFile file, String name) throws IOException {
long length = file.length();
if (length % BLOCK_SIZE != 0
|| length < 6 * BLOCK_SIZE
@@ -420,11 +422,13 @@ class TarReader implements Closeable {
/**
* Scans through the tar file, looking for all segment entries.
*
- * @throws IOException if the tar file could not be read
+ * @param file The path of the TAR file.
+ * @param access The contents of the TAR file.
+ * @param entries The map that will contain the recovered entries. The
+ * entries are inserted in the {@link LinkedHashMap} in the
+ * order they appear in the TAR file.
*/
- private static void recoverEntries(
- File file, RandomAccessFile access,
- LinkedHashMap<UUID, byte[]> entries) throws IOException {
+ private static void recoverEntries(File file, RandomAccessFile access, LinkedHashMap<UUID, byte[]> entries) throws IOException {
byte[] header = new byte[BLOCK_SIZE];
while (access.getFilePointer() + BLOCK_SIZE <= access.length()) {
// read the tar header block
@@ -558,6 +562,12 @@ class TarReader implements Closeable {
}
}
+ /**
+ * Reads and returns the identifier of every segment included in the index
+ * of this TAR file.
+ *
+ * @return An instance of {@link Set}.
+ */
Set<UUID> getUUIDs() {
Set<UUID> uuids = newHashSetWithExpectedSize(index.remaining() / TarEntry.SIZE);
int position = index.position();
@@ -570,6 +580,14 @@ class TarReader implements Closeable {
return uuids;
}
+ /**
+ * Check if the requested entry exists in this TAR file.
+ *
+ * @param msb The most significant bits of the entry identifier.
+ * @param lsb The least significat bits of the entry identifier.
+ * @return {@code true} if the entry exists in this TAR file, {@code false}
+ * otherwise.
+ */
boolean containsEntry(long msb, long lsb) {
return findEntry(msb, lsb) != -1;
}
@@ -583,7 +601,7 @@ class TarReader implements Closeable {
*
* @param msb the most significant bits of the segment id
* @param lsb the least significant bits of the segment id
- * @return the byte buffer, or null if not in this file
+ * @return the byte buffer, or null if not in this file.
*/
ByteBuffer readEntry(long msb, long lsb) throws IOException {
int position = findEntry(msb, lsb);
@@ -597,12 +615,12 @@ class TarReader implements Closeable {
}
/**
- * Find the position of the given segment in the tar file.
- * It uses the tar index if available.
- *
- * @param msb the most significant bits of the segment id
- * @param lsb the least significant bits of the segment id
- * @return the position in the file, or -1 if not found
+ * Find the position of the given entry in this TAR file.
+ *
+ * @param msb The most significant bits of the entry identifier.
+ * @param lsb The least significant bits of the entry identifier.
+ * @return The position of the entry in the TAR file, or {@code -1} if the
+ * entry is not found.
*/
private int findEntry(long msb, long lsb) {
// The segment identifiers are randomly generated with uniform
@@ -648,6 +666,11 @@ class TarReader implements Closeable {
return -1;
}
+ /**
+ * Read the entries in this TAR file.
+ *
+ * @return An array of {@link TarEntry}.
+ */
@Nonnull
private TarEntry[] getEntries() {
TarEntry[] entries = new TarEntry[index.remaining() / TarEntry.SIZE];
@@ -665,7 +688,16 @@ class TarReader implements Closeable {
return entries;
}
+ /**
+ * Read the references of an entry in this TAR file.
+ *
+ * @param entry An entry in this TAR file.
+ * @param id The identifier of the entry.
+ * @param graph The content of the graph of this TAR file.
+ * @return The references of the provided TAR entry.
+ */
@Nonnull
+ // TODO frm remove the unused parameter 'entry'
private static List<UUID> getReferences(TarEntry entry, UUID id, Map<UUID, List<UUID>> graph) {
List<UUID> references = graph.get(id);
@@ -682,6 +714,7 @@ class TarReader implements Closeable {
* @param visitor visitor receiving call back while following the segment graph
* @throws IOException
*/
+ // TODO frm remove this method, see OAK-6021
public void traverseSegmentGraph(
@Nonnull Set<UUID> roots,
@Nonnull SegmentGraphVisitor visitor) throws IOException {
@@ -715,6 +748,7 @@ class TarReader implements Closeable {
*
* @throws IOException
*/
+ // TODO frm remove this method, see OAK-6021
void calculateForwardReferences(Set<UUID> referencedIds) throws IOException {
Map<UUID, List<UUID>> graph = getGraph(false);
TarEntry[] entries = getEntries();
@@ -730,10 +764,24 @@ class TarReader implements Closeable {
}
/**
- * Collect the references of those blobs that are reachable from any segment and
- * are not reclaimable according to the {@code reclaim} predicate.
+ * Collect the references of those BLOBs that are reachable from the entries
+ * in this TAR file.
+ * <p>
+ * The user-provided {@link Predicate} determines if entries belonging to a
+ * specific generation should be inspected for binary references of not.
+ * Given a generation number as input, if the predicate returns {@code
+ * true}, entries from that generation will be skipped. If the predicate
+ * returns {@code false}, entries from that generation will be inspected for
+ * references.
+ * <p>
+ * The provided {@link ReferenceCollector} is callback object that will be
+ * invoked for every reference found in the inspected entries.
+ *
+ * @param collector An instance of {@link ReferenceCollector}.
+ * @param skipGeneration An instance of {@link Predicate}.
*/
- void collectBlobReferences(@Nonnull ReferenceCollector collector, Predicate<Integer> reclaim) {
+ // TODO frm this package depends on org.apache.jackrabbit.oak.plugins.blob only because of ReferenceCollector
+ void collectBlobReferences(@Nonnull ReferenceCollector collector, Predicate<Integer> skipGeneration) {
Map<Integer, Map<UUID, Set<String>>> generations = getBinaryReferences();
if (generations == null) {
@@ -741,7 +789,7 @@ class TarReader implements Closeable {
}
for (Entry<Integer, Map<UUID, Set<String>>> entry : generations.entrySet()) {
- if (reclaim.apply(entry.getKey())) {
+ if (skipGeneration.apply(entry.getKey())) {
continue;
}
@@ -754,19 +802,39 @@ class TarReader implements Closeable {
}
/**
- * Collect reclaimable segments.
- * A data segment is reclaimable iff its generation is in the {@code reclaimGeneration}
- * predicate.
- * A bulk segment is reclaimable if it is not in {@code bulkRefs} or if it is transitively
- * reachable through a non reclaimable data segment.
- *
- * @param bulkRefs bulk segment gc roots
- * @param reclaim reclaimable segments
- * @param reclaimGeneration reclaim generation predicate for data segments
- * @throws IOException
+ * Mark entries that can be reclaimed.
+ * <p>
+ * A data segment is reclaimable iff its generation is in the {@code
+ * reclaimGeneration} predicate. A bulk segment is reclaimable if it is not
+ * in {@code bulkRefs} or if it is transitively reachable through a non
+ * reclaimable data segment.
+ * <p>
+ * The algorithm implemented by this method uses a couple of supporting data
+ * structures.
+ * <p>
+ * The first of the supporting data structures is the set of bulk segments
+ * to keep. When this method is invoked, this set initially contains the set
+ * of bulk segments that are currently in use. The algorithm removes a
+ * reference from this set if the corresponding bulk segment is not
+ * referenced (either directly or transitively) from a marked data segment.
+ * The algorithm adds a reference to this set if a marked data segment is
+ * references the corresponding bulk segment. When this method returns, the
+ * references in this set represent bulk segments that are currently in use
+ * and should not be removed.
+ * <p>
+ * The second of the supporting data structures is the set of segments to
+ * reclaim. This set contains references to bulk and data segments. A
+ * reference to a bulk segment is added if the bulk segment is not
+ * referenced (either directly or transitively) by marked data segment. A
+ * reference to a data segment is added if the user-provided predicate
+ * returns {@code true} for that segment. When this method returns, this set
+ * contains segments that are not marked and can be removed.
+ *
+ * @param bulkRefs The set of bulk segments to keep.
+ * @param reclaim The set of segments to remove.
+ * @param reclaimGeneration An instance of {@link Predicate}.
*/
- void mark(Set<UUID> bulkRefs, Set<UUID> reclaim, Predicate<Integer> reclaimGeneration)
- throws IOException {
+ void mark(Set<UUID> bulkRefs, Set<UUID> reclaim, Predicate<Integer> reclaimGeneration) throws IOException {
Map<UUID, List<UUID>> graph = getGraph(true);
TarEntry[] entries = getEntries();
for (int i = entries.length - 1; i >= 0; i--) {
@@ -797,11 +865,39 @@ class TarReader implements Closeable {
}
/**
- * Remove reclaimable segments and collect actually reclaimed segments.
- * @param reclaim segments to reclaim
- * @param reclaimed actually reclaimed segments
- * @return reader resulting from the reclamation process
- * @throws IOException
+ * Try to remove every segment contained in a user-provided set.
+ * <p>
+ * This method might refuse to remove the segments under the following
+ * circumstances.
+ * <p>
+ * First, if this TAR files does not contain any of the segments that are
+ * supposed to be removed. In this case, the method returns {@code null}.
+ * <p>
+ * Second, if this method contains some of the segments that are supposed to
+ * be removed, but the reclaimable space is be less than 1/4 of the current
+ * size of the TAR file. In this case, this method returns this {@link
+ * TarReader}.
+ * <p>
+ * Third, if this TAR file is in the highest generation possible ('z') and
+ * thus a new generation for this TAR file can't be created. In this case,
+ * the method returns this {@link TarReader}.
+ * <p>
+ * Fourth, if a new TAR file has been created but it is unreadable for
+ * unknown reasons. In this case, this method returns this {@link
+ * TarReader}.
+ * <p>
+ * If none of the above conditions apply, this method returns a new {@link
+ * TarReader} instance tha points to a TAR file that doesn't contain the
+ * removed segments. The returned {@link TarReader} will belong to the next
+ * generation of this {@link TarReader}. In this case, the {@code reclaimed}
+ * set will be updated to contain the identifiers of the segments that were
+ * removed from this TAR file.
+ *
+ * @param reclaim Set of segment sto reclaim.
+ * @param reclaimed Set of reclaimed segments. It will be update if this TAR
+ * file is rewritten.
+ * @return Either this {@link TarReader}, or a new instance of {@link
+ * TarReader}, or {@code null}.
*/
TarReader sweep(@Nonnull Set<UUID> reclaim, @Nonnull Set<UUID> reclaimed) throws IOException {
String name = file.getName();
@@ -922,8 +1018,8 @@ class TarReader implements Closeable {
}
/**
- * @return {@code true} iff this reader has been closed
- * @see #close()
+ * Check if this {@link TarReader} is closed.
+ * @return {@code true} if this instance is close, {@code false} otherwise.
*/
boolean isClosed() {
return closed;
@@ -935,14 +1031,13 @@ class TarReader implements Closeable {
access.close();
}
- //-----------------------------------------------------------< private >--
-
/**
* Loads and parses the optional pre-compiled graph entry from the given tar
* file.
*
- * @return the parsed graph, or {@code null} if one was not found
- * @throws IOException if the tar file could not be read
+ * @param bulkOnly If {@code true}, only vertices pointing to bulk segments
+ * are included in the graph.
+ * @return The parsed graph, or {@code null} if one was not found.
*/
Map<UUID, List<UUID>> getGraph(boolean bulkOnly) throws IOException {
ByteBuffer graph = loadGraph();
@@ -983,6 +1078,17 @@ class TarReader implements Closeable {
return getEntrySize(buffer.getInt(buffer.limit() - 8));
}
+ /**
+ * Read the index of binary references from this TAR file.
+ * <p>
+ * The index of binary references is a two-level map. The key to the first
+ * level of the map is the generation. The key to the second level of the
+ * map is the identifier of a data segment in this TAR file. The value of
+ * the second-level map is the set of binary references contained in the
+ * segment.
+ *
+ * @return An instance of {@link Map}.
+ */
Map<Integer, Map<UUID, Set<String>>> getBinaryReferences() {
ByteBuffer buffer;
@@ -1189,6 +1295,11 @@ class TarReader implements Closeable {
return number;
}
+ /**
+ * Return the path of this TAR file.
+ *
+ * @return An instance of {@link File}.
+ */
File getFile() {
return file;
}