You are viewing a plain text version of this content. The canonical link for it is here.
Posted to oak-commits@jackrabbit.apache.org by fr...@apache.org on 2017/08/10 15:00:43 UTC

svn commit: r1804675 - in /jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment: Segment.java SegmentBufferWriter.java data/ data/RecordIdData.java data/SegmentData.java data/StringData.java file/AbstractFileStore.java

Author: frm
Date: Thu Aug 10 15:00:43 2017
New Revision: 1804675

URL: http://svn.apache.org/viewvc?rev=1804675&view=rev
Log:
OAK-6457 - Encapsulate access to the buffers containing segment data

Added:
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/RecordIdData.java   (with props)
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/SegmentData.java   (with props)
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/StringData.java   (with props)
Modified:
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/Segment.java
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentBufferWriter.java
    jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java

Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/Segment.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/Segment.java?rev=1804675&r1=1804674&r2=1804675&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/Segment.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/Segment.java Thu Aug 10 15:00:43 2017
@@ -20,7 +20,6 @@ package org.apache.jackrabbit.oak.segmen
 
 import static com.google.common.base.Preconditions.checkArgument;
 import static com.google.common.base.Preconditions.checkNotNull;
-import static com.google.common.base.Preconditions.checkPositionIndexes;
 import static com.google.common.base.Preconditions.checkState;
 import static java.util.Arrays.fill;
 import static org.apache.jackrabbit.oak.commons.IOUtils.closeQuietly;
@@ -30,6 +29,7 @@ import static org.apache.jackrabbit.oak.
 import static org.apache.jackrabbit.oak.segment.SegmentStream.BLOCK_SIZE;
 import static org.apache.jackrabbit.oak.segment.SegmentVersion.LATEST_VERSION;
 import static org.apache.jackrabbit.oak.segment.SegmentVersion.isValid;
+import static org.apache.jackrabbit.oak.segment.data.SegmentData.newSegmentData;
 import static org.apache.jackrabbit.oak.segment.file.tar.GCGeneration.newGCGeneration;
 
 import java.io.IOException;
@@ -37,8 +37,6 @@ import java.io.OutputStream;
 import java.io.PrintWriter;
 import java.io.StringWriter;
 import java.nio.ByteBuffer;
-import java.nio.channels.Channels;
-import java.nio.channels.WritableByteChannel;
 import java.util.Arrays;
 import java.util.Iterator;
 import java.util.UUID;
@@ -50,11 +48,15 @@ import com.google.common.base.Charsets;
 import com.google.common.collect.AbstractIterator;
 import org.apache.commons.io.HexDump;
 import org.apache.commons.io.output.ByteArrayOutputStream;
+import org.apache.commons.io.output.WriterOutputStream;
 import org.apache.jackrabbit.oak.api.PropertyState;
 import org.apache.jackrabbit.oak.api.Type;
 import org.apache.jackrabbit.oak.commons.StringUtils;
 import org.apache.jackrabbit.oak.plugins.memory.PropertyStates;
 import org.apache.jackrabbit.oak.segment.RecordNumbers.Entry;
+import org.apache.jackrabbit.oak.segment.data.RecordIdData;
+import org.apache.jackrabbit.oak.segment.data.SegmentData;
+import org.apache.jackrabbit.oak.segment.data.StringData;
 import org.apache.jackrabbit.oak.segment.file.tar.GCGeneration;
 
 /**
@@ -137,8 +139,7 @@ public class Segment {
     @Nonnull
     private final SegmentId id;
 
-    @Nonnull
-    private final ByteBuffer data;
+    private final SegmentData data;
 
     /**
      * Version of the segment storage format.
@@ -174,19 +175,16 @@ public class Segment {
                    @Nonnull final ByteBuffer data) {
         this.reader = checkNotNull(reader);
         this.id = checkNotNull(id);
-        this.data = checkNotNull(data);
+        this.data = newSegmentData(checkNotNull(data).slice());
         if (id.isDataSegmentId()) {
-            byte segmentVersion = data.get(3);
-            checkState(data.get(0) == '0'
-                    && data.get(1) == 'a'
-                    && data.get(2) == 'K'
-                    && isValid(segmentVersion),
-                new Object() {  // Defer evaluation of error message
+            byte segmentVersion = this.data.getVersion();
+            checkState(this.data.getSignature().equals("0aK") && isValid(segmentVersion), new Object() {
+
                     @Override
                     public String toString() {
-                        return "Invalid segment format. Dumping segment " + id + "\n"
-                            + toHex(data.array());
+                        return String.format("Invalid segment format. Dumping segment %s\n%s", id, toHex(data.array()));
                     }
+
             });
             this.version = SegmentVersion.fromByte(segmentVersion);
             this.recordNumbers = readRecordNumberOffsets();
@@ -216,38 +214,31 @@ public class Segment {
      * @return An instance of {@link RecordNumbers}, never {@code null}.
      */
     private RecordNumbers readRecordNumberOffsets() {
-        int recordNumberCount = getRecordNumberCount();
+        int recordNumberCount = data.getRecordReferencesCount();
+
         if (recordNumberCount == 0) {
             return EMPTY_RECORD_NUMBERS;
         }
 
-        int position = HEADER_SIZE + data.position()
-                + getReferencedSegmentIdCount() * SEGMENT_REFERENCE_SIZE;
-        int maxIndex = data.getInt(position + (recordNumberCount - 1) * 9);
+        int maxIndex = data.getRecordReferenceNumber(recordNumberCount - 1);
 
         byte[] types = new byte[maxIndex + 1];
         int[] offsets = new int[maxIndex + 1];
         fill(offsets, -1);
 
         for (int i = 0; i < recordNumberCount; i++) {
-            int recordNumber = data.getInt(position);
-            position += 4;
-            types[recordNumber] = data.get(position);
-            position += 1;
-            offsets[recordNumber] = data.getInt(position);
-            position += 4;
+            int recordNumber = data.getRecordReferenceNumber(i);
+            types[recordNumber] = data.getRecordReferenceType(i);
+            offsets[recordNumber] = data.getRecordReferenceOffset(i);
         }
 
         return new ImmutableRecordNumbers(offsets, types);
     }
 
-    private SegmentReferences readReferencedSegments(
-            final SegmentIdProvider idProvider) {
-        checkState(getReferencedSegmentIdCount() + 1 < 0xffff,
-                "Segment cannot have more than 0xffff references");
+    private SegmentReferences readReferencedSegments(final SegmentIdProvider idProvider) {
+        checkState(getReferencedSegmentIdCount() + 1 < 0xffff, "Segment cannot have more than 0xffff references");
 
         final int referencedSegmentIdCount = getReferencedSegmentIdCount();
-        final int refOffset = data.position() + HEADER_SIZE;
 
         // We need to keep SegmentId references (as opposed to e.g. UUIDs)
         // here as frequently resolving the segment ids via the segment id
@@ -257,19 +248,21 @@ public class Segment {
         // is managed via the SegmentCache. It is the size of that cache that
         // keeps overall heap usage by Segment instances bounded.
         // See OAK-6106.
+
         final SegmentId[] refIds = new SegmentId[referencedSegmentIdCount];
+
         return new SegmentReferences() {
+
             @Override
             public SegmentId getSegmentId(int reference) {
                 checkArgument(reference <= referencedSegmentIdCount, "Segment reference out of bounds");
                 SegmentId id = refIds[reference - 1];
                 if (id == null) {
-                    synchronized(refIds) {
+                    synchronized (refIds) {
                         id = refIds[reference - 1];
                         if (id == null) {
-                            int position = refOffset + (reference - 1) * SEGMENT_REFERENCE_SIZE;
-                            long msb = data.getLong(position);
-                            long lsb = data.getLong(position + 8);
+                            long msb = data.getSegmentReferenceMsb(reference - 1);
+                            long lsb = data.getSegmentReferenceLsb(reference - 1);
                             id = idProvider.newSegmentId(msb, lsb);
                             refIds[reference - 1] = id;
                         }
@@ -282,7 +275,9 @@ public class Segment {
             @Override
             public Iterator<SegmentId> iterator() {
                 return new AbstractIterator<SegmentId>() {
+
                     private int reference = 1;
+
                     @Override
                     protected SegmentId computeNext() {
                         if (reference <= referencedSegmentIdCount) {
@@ -291,22 +286,25 @@ public class Segment {
                             return endOfData();
                         }
                     }
+
                 };
             }
+
         };
     }
 
-    Segment(@Nonnull SegmentId id,
-            @Nonnull SegmentReader reader,
-            @Nonnull byte[] buffer,
-            @Nonnull RecordNumbers recordNumbers,
-            @Nonnull SegmentReferences segmentReferences,
-            @Nonnull String info
+    Segment(
+        @Nonnull SegmentId id,
+        @Nonnull SegmentReader reader,
+        @Nonnull byte[] buffer,
+        @Nonnull RecordNumbers recordNumbers,
+        @Nonnull SegmentReferences segmentReferences,
+        @Nonnull String info
     ) {
         this.id = checkNotNull(id);
         this.reader = checkNotNull(reader);
         this.info = checkNotNull(info);
-        this.data = ByteBuffer.wrap(checkNotNull(buffer));
+        this.data = newSegmentData(buffer);
         this.version = SegmentVersion.fromByte(buffer[3]);
         this.recordNumbers = recordNumbers;
         this.segmentReferences = segmentReferences;
@@ -317,50 +315,16 @@ public class Segment {
         return version;
     }
 
-    private int pos(int recordNumber, int length) {
-        return pos(recordNumber, 0, 0, length);
-    }
-
-    private int pos(int recordNumber, int rawOffset, int length) {
-        return pos(recordNumber, rawOffset, 0, length);
-    }
-
-    /**
-     * Maps the given record number to the respective position within the
-     * internal {@link #data} array. The validity of a record with the given
-     * length at the given record number is also verified.
-     *
-     * @param recordNumber   record number
-     * @param rawOffset      offset to add to the base position of the record
-     * @param recordIdOffset offset to add to to the base position of the
-     *                       record, multiplied by the length of a record ID
-     * @param length         record length
-     * @return position within the data array
-     */
-    private int pos(int recordNumber, int rawOffset, int recordIdOffset, int length) {
-        int offset = recordNumbers.getOffset(recordNumber);
-
-        if (offset == -1) {
-            throw new IllegalStateException("invalid record number");
-        }
-
-        int base = offset + rawOffset + recordIdOffset * RECORD_ID_BYTES;
-        checkPositionIndexes(base, base + length, MAX_SEGMENT_SIZE);
-        int pos = data.limit() - MAX_SEGMENT_SIZE + base;
-        checkState(pos >= data.position());
-        return pos;
-    }
-
     public SegmentId getSegmentId() {
         return id;
     }
 
     public int getReferencedSegmentIdCount() {
-        return data.getInt(REFERENCED_SEGMENT_ID_COUNT_OFFSET);
+        return data.getSegmentReferencesCount();
     }
 
     private int getRecordNumberCount() {
-        return data.getInt(RECORD_NUMBER_COUNT_OFFSET);
+        return data.getRecordReferencesCount();
     }
 
     public UUID getReferencedSegmentId(int index) {
@@ -368,22 +332,19 @@ public class Segment {
     }
 
     /**
-     * Determine the gc generation a segment from its data. Note that bulk segments don't have
-     * generations (i.e. stay at 0).
+     * Determine the gc generation a segment from its data. Note that bulk
+     * segments don't have generations (i.e. stay at 0).
      *
-     * @param data         the date of the segment
-     * @param segmentId    the id of the segment
-     * @return  the gc generation of this segment or {@link GCGeneration#NULL} if this is bulk segment.
+     * @param data      the data of the segment
+     * @param segmentId the id of the segment
+     * @return the gc generation of this segment or {@link GCGeneration#NULL} if
+     * this is bulk segment.
      */
-    @Nonnull
-    public static GCGeneration getGcGeneration(ByteBuffer data, UUID segmentId) {
+    public static GCGeneration getGcGeneration(SegmentData data, UUID segmentId) {
         if (isDataSegmentId(segmentId.getLeastSignificantBits())) {
-            int generation = data.getInt(GC_GENERATION_OFFSET);
-            int fullGeneration = data.getInt(GC_FULL_GENERATION_OFFSET);
-            return newGCGeneration(generation, fullGeneration & 0x7fffffff, fullGeneration < 0);
-        } else {
-            return GCGeneration.NULL;
+            return newGCGeneration(data.getGeneration(), data.getFullGeneration(), data.isCompacted());
         }
+        return GCGeneration.NULL;
     }
 
     /**
@@ -422,48 +383,46 @@ public class Segment {
     }
 
     public int size() {
-        return data.remaining();
+        return data.size();
     }
 
     byte readByte(int recordNumber) {
-        return readByte(recordNumber, 0);
+        return data.readByte(recordNumbers.getOffset(recordNumber));
     }
 
     byte readByte(int recordNumber, int offset) {
-        return data.get(pos(recordNumber, offset, 1));
+        return data.readByte(recordNumbers.getOffset(recordNumber) + offset);
     }
 
     short readShort(int recordNumber) {
-        return data.getShort(pos(recordNumber, 2));
+        return data.readShort(recordNumbers.getOffset(recordNumber));
     }
 
     int readInt(int recordNumber) {
-        return data.getInt(pos(recordNumber, 4));
+        return data.readInt(recordNumbers.getOffset(recordNumber));
     }
 
     int readInt(int recordNumber, int offset) {
-        return data.getInt(pos(recordNumber, offset, 4));
+        return data.readInt(recordNumbers.getOffset(recordNumber) + offset);
     }
 
     long readLong(int recordNumber) {
-        return data.getLong(pos(recordNumber, 8));
+        return data.readLong(recordNumbers.getOffset(recordNumber));
     }
 
     void readBytes(int recordNumber, int position, byte[] buffer, int offset, int length) {
-        checkNotNull(buffer);
-        checkPositionIndexes(offset, offset + length, buffer.length);
-        ByteBuffer d = readBytes(recordNumber, position, length);
-        d.get(buffer, offset, length);
+        readBytes(recordNumber, position, length).get(buffer, offset, length);
     }
 
     ByteBuffer readBytes(int recordNumber, int position, int length) {
-        int pos = pos(recordNumber, position, length);
-        return slice(pos, length);
+        return data.readBytes(recordNumbers.getOffset(recordNumber) + position, length);
     }
 
     @Nonnull
     RecordId readRecordId(int recordNumber, int rawOffset, int recordIdOffset) {
-        return internalReadRecordId(pos(recordNumber, rawOffset, recordIdOffset, RECORD_ID_BYTES));
+        int offset = recordNumbers.getOffset(recordNumber) + rawOffset + recordIdOffset * RecordIdData.BYTES;
+        RecordIdData recordIdData = data.readRecordId(offset);
+        return new RecordId(dereferenceSegmentId(recordIdData.getSegmentReference()), recordIdData.getRecordNumber());
     }
 
     RecordId readRecordId(int recordNumber, int rawOffset) {
@@ -475,16 +434,6 @@ public class Segment {
     }
 
     @Nonnull
-    private RecordId internalReadRecordId(int pos) {
-        SegmentId segmentId = dereferenceSegmentId(asUnsigned(data.getShort(pos)));
-        return new RecordId(segmentId, data.getInt(pos + 2));
-    }
-
-    private static int asUnsigned(short value) {
-        return value & 0xffff;
-    }
-
-    @Nonnull
     private SegmentId dereferenceSegmentId(int reference) {
         if (reference == 0) {
             return id;
@@ -500,29 +449,23 @@ public class Segment {
     }
 
     @Nonnull
-    String readString(int offset) {
-        int pos = pos(offset, 1);
-        long length = internalReadLength(pos);
-        if (length < SMALL_LIMIT) {
-            return Charsets.UTF_8.decode(slice(pos + 1, (int) length)).toString();
-        } else if (length < MEDIUM_LIMIT) {
-            return Charsets.UTF_8.decode(slice(pos + 2, (int) length)).toString();
-        } else if (length < Integer.MAX_VALUE) {
-            int size = (int) ((length + BLOCK_SIZE - 1) / BLOCK_SIZE);
-            ListRecord list = new ListRecord(internalReadRecordId(pos + 8), size);
-            try (SegmentStream stream = new SegmentStream(new RecordId(id, offset), list, length)) {
+    String readString(int recordNumber) {
+        StringData data = this.data.readString(recordNumbers.getOffset(recordNumber));
+
+        if (data.isString()) {
+            return data.getString();
+        }
+
+        if (data.isRecordId()) {
+            SegmentId segmentId = dereferenceSegmentId(data.getRecordId().getSegmentReference());
+            RecordId recordId = new RecordId(segmentId, data.getRecordId().getRecordNumber());
+            ListRecord list = new ListRecord(recordId, (data.getLength() + BLOCK_SIZE - 1) / BLOCK_SIZE);
+            try (SegmentStream stream = new SegmentStream(new RecordId(id, recordNumber), list, data.getLength())) {
                 return stream.getString();
             }
-        } else {
-            throw new IllegalStateException("String is too long: " + length);
         }
-    }
 
-    private ByteBuffer slice(int pos, int length) {
-        ByteBuffer buffer = data.duplicate();
-        buffer.position(pos);
-        buffer.limit(pos + length);
-        return buffer.slice();
+        throw new IllegalStateException("Invalid return value");
     }
 
     @Nonnull
@@ -592,28 +535,7 @@ public class Segment {
     }
 
     long readLength(int recordNumber) {
-        return internalReadLength(pos(recordNumber, 1));
-    }
-
-    private long internalReadLength(int pos) {
-        int length = data.get(pos++) & 0xff;
-        if ((length & 0x80) == 0) {
-            return length;
-        } else if ((length & 0x40) == 0) {
-            return ((length & 0x3f) << 8
-                    | data.get(pos) & 0xff)
-                    + SMALL_LIMIT;
-        } else {
-            return (((long) length & 0x3f) << 56
-                    | ((long) (data.get(pos++) & 0xff)) << 48
-                    | ((long) (data.get(pos++) & 0xff)) << 40
-                    | ((long) (data.get(pos++) & 0xff)) << 32
-                    | ((long) (data.get(pos++) & 0xff)) << 24
-                    | ((long) (data.get(pos++) & 0xff)) << 16
-                    | ((long) (data.get(pos++) & 0xff)) << 8
-                    | ((long) (data.get(pos) & 0xff)))
-                    + MEDIUM_LIMIT;
-        }
+        return data.readLength(recordNumbers.getOffset(recordNumber));
     }
 
     //------------------------------------------------------------< Object >--
@@ -622,69 +544,34 @@ public class Segment {
     public String toString() {
         StringWriter string = new StringWriter();
         try (PrintWriter writer = new PrintWriter(string)) {
-            int length = data.remaining();
-
-            writer.format("Segment %s (%d bytes)%n", id, length);
+            writer.format("Segment %s (%d bytes)%n", id, data.size());
             String segmentInfo = getSegmentInfo();
             if (segmentInfo != null) {
                 writer.format("Info: %s, Generation: %s%n", segmentInfo, getGcGeneration());
             }
             if (id.isDataSegmentId()) {
                 writer.println("--------------------------------------------------------------------------");
-
                 int i = 1;
-
                 for (SegmentId segmentId : segmentReferences) {
                     writer.format("reference %02x: %s%n", i++, segmentId);
                 }
-
                 for (Entry entry : recordNumbers) {
-                    writer.format("%10s record %08x: %08x%n",
-                            entry.getType(), entry.getRecordNumber(), entry.getOffset());
+                    writer.format("%10s record %08x: %08x%n", entry.getType(), entry.getRecordNumber(), entry.getOffset());
                 }
             }
             writer.println("--------------------------------------------------------------------------");
-            int pos = data.limit() - ((length + 15) & ~15);
-            while (pos < data.limit()) {
-                writer.format("%04x: ", (MAX_SEGMENT_SIZE - data.limit() + pos) >> RECORD_ALIGN_BITS);
-                for (int i = 0; i < 16; i++) {
-                    if (i > 0 && i % 4 == 0) {
-                        writer.append(' ');
-                    }
-                    if (pos + i >= data.position()) {
-                        byte b = data.get(pos + i);
-                        writer.format("%02x ", b & 0xff);
-                    } else {
-                        writer.append("   ");
-                    }
-                }
-                writer.append(' ');
-                for (int i = 0; i < 16; i++) {
-                    if (pos + i >= data.position()) {
-                        byte b = data.get(pos + i);
-                        if (b >= ' ' && b < 127) {
-                            writer.append((char) b);
-                        } else {
-                            writer.append('.');
-                        }
-                    } else {
-                        writer.append(' ');
-                    }
-                }
-                writer.println();
-                pos += 16;
+            try {
+                data.hexDump(new WriterOutputStream(writer, Charsets.UTF_8));
+            } catch (IOException e) {
+                throw new IllegalStateException(e);
             }
             writer.println("--------------------------------------------------------------------------");
-            return string.toString();
         }
+        return string.toString();
     }
 
     public void writeTo(OutputStream stream) throws IOException {
-        ByteBuffer buffer = data.duplicate();
-        WritableByteChannel channel = Channels.newChannel(stream);
-        while (buffer.hasRemaining()) {
-            channel.write(buffer);
-        }
+        data.binDump(stream);
     }
 
     /**
@@ -731,11 +618,9 @@ public class Segment {
 
             size += StringUtils.estimateMemoryUsage(info);
         }
-        if (!data.isDirect()) {
-            // seems to over report by 100+ bytes
-            size += size();
-        }
+        size += data.estimateMemoryUsage();
         size += id.estimateMemoryUsage();
         return size;
     }
+
 }

Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentBufferWriter.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentBufferWriter.java?rev=1804675&r1=1804674&r2=1804675&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentBufferWriter.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/SegmentBufferWriter.java Thu Aug 10 15:00:43 2017
@@ -45,6 +45,7 @@ import javax.annotation.CheckForNull;
 import javax.annotation.Nonnull;
 
 import org.apache.jackrabbit.oak.segment.RecordNumbers.Entry;
+import org.apache.jackrabbit.oak.segment.data.SegmentData;
 import org.apache.jackrabbit.oak.segment.file.tar.GCGeneration;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;

Added: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/RecordIdData.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/RecordIdData.java?rev=1804675&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/RecordIdData.java (added)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/RecordIdData.java Thu Aug 10 15:00:43 2017
@@ -0,0 +1,41 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.oak.segment.data;
+
+public class RecordIdData {
+
+    public static final int BYTES = Short.BYTES + Integer.BYTES;
+
+    private final int segmentReference;
+
+    private final int recordNumber;
+
+    RecordIdData(int segmentReference, int recordNumber) {
+        this.segmentReference = segmentReference;
+        this.recordNumber = recordNumber;
+    }
+
+    public int getSegmentReference() {
+        return segmentReference;
+    }
+
+    public int getRecordNumber() {
+        return recordNumber;
+    }
+
+}

Propchange: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/RecordIdData.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/SegmentData.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/SegmentData.java?rev=1804675&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/SegmentData.java (added)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/SegmentData.java Thu Aug 10 15:00:43 2017
@@ -0,0 +1,302 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.oak.segment.data;
+
+import java.io.IOException;
+import java.io.OutputStream;
+import java.nio.ByteBuffer;
+import java.nio.channels.Channels;
+import java.nio.channels.WritableByteChannel;
+
+import com.google.common.base.Charsets;
+import org.apache.commons.io.HexDump;
+
+/**
+ * Access the data of a segment.
+ * <p>
+ * A segment is composed of a header and the proper data. The header has a
+ * fixed- and a variable-length part.
+ * <p>
+ * The fixed-length part of the header contains a {@link #getSignature()
+ * signature}, which is a string that uniquely identifies the rest of the
+ * content as a segment; a {@link #getVersion()}, which specifies the version of
+ * the binary format used to serialize the content of the segment; a {@link
+ * #getFullGeneration() full generation}, which describes the generation of the
+ * segment with regards to full compaction; a {@link #getGeneration()
+ * generation}, which identifies the generation of the segment with regards to
+ * full or tail compaction; a {@link #isCompacted() compacted flag}, which
+ * determines if the segment was written by a compaction operation; the {@link
+ * #getRecordReferencesCount() number of record references}, which is the number
+ * of record entries in the segment; the {@link #getSegmentReferencesCount()
+ * number of segment references}, which is the number of identifiers of other
+ * segments used by this segment.
+ * <p>
+ * The variable part of the header contains the list of segment references and
+ * the list of record references. A segment references is composed by the {@link
+ * #getSegmentReferenceMsb(int) most significant bits} and {@link
+ * #getSegmentReferenceLsb(int) lsb} of the segment identifier. A record
+ * reference is composed of a {@link #getRecordReferenceNumber(int) record
+ * number}, a {@link #getRecordReferenceType(int) record type} and a {@link
+ * #getRecordReferenceOffset(int) record offset}.
+ * <p>
+ * The most prominent use for a segment is to hold record data. Many methods of
+ * this class allows access to the record data. These methods accept an integer
+ * representing an absolute position pointing to the record data. The absolute
+ * position, though, is virtual: it is computed on a virtual segment 256K long.
+ * This offset is usually obtained by accessing the {@link
+ * #getRecordReferenceOffset(int) record offset} of a record reference entry.
+ * The class will normalize the offset for the actual size of the segment, which
+ * can be smaller than 256K. It is acceptable to displace the offset of a record
+ * reference entry by a positive amount. This can be useful to access a field of
+ * a composite record saved at a specific offset.
+ */
+public class SegmentData {
+
+    private static final int HEADER_SIZE = 32;
+
+    private static final int SIGNATURE_OFFSET = 0;
+
+    private static final int SIGNATURE_LENGTH = 3;
+
+    private static final int VERSION_OFFSET = 3;
+
+    private static final int FULL_GENERATION_OFFSET = 4;
+
+    private static final int GENERATION_OFFSET = 10;
+
+    private static final int SEGMENT_REFERENCES_COUNT_OFFSET = 14;
+
+    private static final int SEGMENT_REFERENCE_LENGTH = 16;
+
+    private static final int RECORD_REFERENCES_COUNT_OFFSET = 18;
+
+    private static final int RECORD_REFERENCE_LENGTH = 9;
+
+    // Relative to a segment reference - BEGIN
+
+    private static final int SEGMENT_REFERENCE_MSB_OFFSET = 0;
+
+    private static final int SEGMENT_REFERENCE_LSB_OFFSET = 8;
+
+    // Relative to a segment reference - END
+
+    // Relative to a record reference - BEGIN
+
+    private static final int RECORD_REFERENCE_NUMBER_OFFSET = 0;
+
+    private static final int RECORD_REFERENCE_TYPE_OFFSET = 4;
+
+    private static final int RECORD_REFERENCE_OFFSET_OFFSET = 5;
+
+    // Relative to a record reference - END
+
+    private static final int MAX_SMALL_LENGTH_VALUE = 1 << 7;
+
+    private static final int MAX_MEDIUM_LENGTH_VALUE = (1 << 14) + MAX_SMALL_LENGTH_VALUE;
+
+    private static final int MAX_SEGMENT_SIZE = 1 << 18;
+
+    public static SegmentData newSegmentData(byte[] buffer) {
+        return new SegmentData(ByteBuffer.wrap(buffer));
+    }
+
+    public static SegmentData newSegmentData(ByteBuffer buffer) {
+        return new SegmentData(buffer);
+    }
+
+    private SegmentData(ByteBuffer buffer) {
+        this.buffer = buffer;
+    }
+
+    private final ByteBuffer buffer;
+
+    public byte getVersion() {
+        return buffer.get(VERSION_OFFSET);
+    }
+
+    public String getSignature() {
+        byte[] signature = new byte[SIGNATURE_LENGTH];
+
+        for (int i = 0; i < SIGNATURE_LENGTH; i++) {
+            signature[i] = buffer.get(SIGNATURE_OFFSET + i);
+        }
+
+        return new String(signature, Charsets.UTF_8);
+    }
+
+    public int getFullGeneration() {
+        return buffer.getInt(FULL_GENERATION_OFFSET) & 0x7fffffff;
+    }
+
+    public boolean isCompacted() {
+        return buffer.getInt(FULL_GENERATION_OFFSET) < 0;
+    }
+
+    public int getGeneration() {
+        return buffer.getInt(GENERATION_OFFSET);
+    }
+
+    public int getSegmentReferencesCount() {
+        return buffer.getInt(SEGMENT_REFERENCES_COUNT_OFFSET);
+    }
+
+    public int getRecordReferencesCount() {
+        return buffer.getInt(RECORD_REFERENCES_COUNT_OFFSET);
+    }
+
+    private int getRecordReferenceBase(int i) {
+        return HEADER_SIZE + getSegmentReferencesCount() * SEGMENT_REFERENCE_LENGTH + i * RECORD_REFERENCE_LENGTH;
+    }
+
+    public int getRecordReferenceNumber(int i) {
+        return buffer.getInt(getRecordReferenceBase(i) + RECORD_REFERENCE_NUMBER_OFFSET);
+    }
+
+    public byte getRecordReferenceType(int i) {
+        return buffer.get(getRecordReferenceBase(i) + RECORD_REFERENCE_TYPE_OFFSET);
+    }
+
+    public int getRecordReferenceOffset(int i) {
+        return buffer.getInt(getRecordReferenceBase(i) + RECORD_REFERENCE_OFFSET_OFFSET);
+    }
+
+    private int getSegmentReferenceBase(int i) {
+        return HEADER_SIZE + i * SEGMENT_REFERENCE_LENGTH;
+    }
+
+    public long getSegmentReferenceMsb(int i) {
+        return buffer.getLong(getSegmentReferenceBase(i) + SEGMENT_REFERENCE_MSB_OFFSET);
+    }
+
+    public long getSegmentReferenceLsb(int i) {
+        return buffer.getLong(getSegmentReferenceBase(i) + SEGMENT_REFERENCE_LSB_OFFSET);
+    }
+
+    private int index(int recordReferenceOffset) {
+        return buffer.limit() - (MAX_SEGMENT_SIZE - recordReferenceOffset);
+    }
+
+    public long readLength(int recordReferenceOffset) {
+        return internalReadLength(index(recordReferenceOffset));
+    }
+
+    private long internalReadLength(int index) {
+        int head = buffer.get(index) & 0xff;
+
+        if ((head & 0x80) == 0) {
+            return head;
+        }
+
+        if ((head & 0x40) == 0) {
+            return MAX_SMALL_LENGTH_VALUE + (buffer.getShort(index) & 0x3fff);
+        }
+
+        return MAX_MEDIUM_LENGTH_VALUE + (buffer.getLong(index) & 0x3fffffffffffffffL);
+    }
+
+    public StringData readString(int recordReferenceOffset) {
+        return internalReadString(index(recordReferenceOffset));
+    }
+
+    private StringData internalReadString(int index) {
+        long length = internalReadLength(index);
+
+        if (length < MAX_SMALL_LENGTH_VALUE) {
+            return internalReadString(index + Byte.BYTES, (int) length);
+        }
+
+        if (length < MAX_MEDIUM_LENGTH_VALUE) {
+            return internalReadString(index + Short.BYTES, (int) length);
+        }
+
+        if (length < Integer.MAX_VALUE) {
+            return new StringData(internalReadRecordId(index + Long.BYTES), (int) length);
+        }
+
+        throw new IllegalStateException("String is too long: " + length);
+    }
+
+    private StringData internalReadString(int index, int length) {
+        ByteBuffer duplicate = buffer.duplicate();
+        duplicate.position(index);
+        duplicate.limit(index + length);
+        String string = Charsets.UTF_8.decode(duplicate).toString();
+        return new StringData(string, length);
+    }
+
+    public RecordIdData readRecordId(int recordReferenceOffset) {
+        return internalReadRecordId(index(recordReferenceOffset));
+    }
+
+    private RecordIdData internalReadRecordId(int index) {
+        int segmentReference = buffer.getShort(index) & 0xffff;
+        int recordNumber = buffer.getInt(index + Short.BYTES);
+        return new RecordIdData(segmentReference, recordNumber);
+    }
+
+    public byte readByte(int recordReferenceOffset) {
+        return buffer.get(index(recordReferenceOffset));
+    }
+
+    public int readInt(int recordReferenceOffset) {
+        return buffer.getInt(index(recordReferenceOffset));
+    }
+
+    public short readShort(int recordReferenceOffset) {
+        return buffer.getShort(index(recordReferenceOffset));
+    }
+
+    public long readLong(int recordReferenceOffset) {
+        return buffer.getLong(index(recordReferenceOffset));
+    }
+
+    public ByteBuffer readBytes(int recordReferenceOffset, int size) {
+        return internalReadBytes(index(recordReferenceOffset), size);
+    }
+
+    private ByteBuffer internalReadBytes(int index, int size) {
+        ByteBuffer duplicate = buffer.duplicate();
+        duplicate.position(index);
+        duplicate.limit(index + size);
+        return duplicate.slice();
+    }
+
+    public int size() {
+        return buffer.remaining();
+    }
+
+    public void hexDump(OutputStream stream) throws IOException {
+        byte[] data = new byte[buffer.remaining()];
+        buffer.duplicate().get(data);
+        HexDump.dump(data, 0, stream, 0);
+    }
+
+    public void binDump(OutputStream stream) throws IOException {
+        ByteBuffer data = buffer.duplicate();
+        try (WritableByteChannel channel = Channels.newChannel(stream)) {
+            while (data.hasRemaining()) {
+                channel.write(data);
+            }
+        }
+    }
+
+    public int estimateMemoryUsage() {
+        return buffer.isDirect() ? 0 : buffer.remaining();
+    }
+
+}

Propchange: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/SegmentData.java
------------------------------------------------------------------------------
    svn:eol-style = native

Added: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/StringData.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/StringData.java?rev=1804675&view=auto
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/StringData.java (added)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/StringData.java Thu Aug 10 15:00:43 2017
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.jackrabbit.oak.segment.data;
+
+public class StringData {
+
+    private final String string;
+
+    private final RecordIdData recordId;
+
+    private final int length;
+
+    StringData(String string, int length) {
+        this.string = string;
+        this.length = length;
+        this.recordId = null;
+    }
+
+    StringData(RecordIdData recordId, int length) {
+        this.recordId = recordId;
+        this.length = length;
+        this.string = null;
+    }
+
+    public boolean isString() {
+        return string != null;
+    }
+
+    public boolean isRecordId() {
+        return recordId != null;
+    }
+
+    public String getString() {
+        return string;
+    }
+
+    public RecordIdData getRecordId() {
+         return recordId;
+    }
+
+    public int getLength() {
+        return length;
+    }
+
+}

Propchange: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/data/StringData.java
------------------------------------------------------------------------------
    svn:eol-style = native

Modified: jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java
URL: http://svn.apache.org/viewvc/jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java?rev=1804675&r1=1804674&r2=1804675&view=diff
==============================================================================
--- jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java (original)
+++ jackrabbit/oak/trunk/oak-segment-tar/src/main/java/org/apache/jackrabbit/oak/segment/file/AbstractFileStore.java Thu Aug 10 15:00:43 2017
@@ -18,6 +18,8 @@
  */
 package org.apache.jackrabbit.oak.segment.file;
 
+import static org.apache.jackrabbit.oak.segment.data.SegmentData.newSegmentData;
+
 import java.io.Closeable;
 import java.io.File;
 import java.io.IOException;
@@ -207,7 +209,7 @@ public abstract class AbstractFileStore
         long msb = id.getMostSignificantBits();
         long lsb = id.getLeastSignificantBits();
         ByteBuffer buffer = ByteBuffer.wrap(data);
-        GCGeneration generation = Segment.getGcGeneration(buffer, id);
+        GCGeneration generation = Segment.getGcGeneration(newSegmentData(buffer), id);
         w.recoverEntry(msb, lsb, data, 0, data.length, generation);
         if (SegmentId.isDataSegmentId(lsb)) {
             Segment segment = new Segment(tracker, segmentReader, tracker.newSegmentId(msb, lsb), buffer);