You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@pig.apache.org by ga...@apache.org on 2009/11/21 16:36:14 UTC
svn commit: r882929 [4/7] - in
/hadoop/pig/branches/branch-0.6/contrib/zebra: ./
src/java/org/apache/hadoop/zebra/io/ src/java/org/apache/hadoop/zebra/mapred/
src/java/org/apache/hadoop/zebra/schema/
src/java/org/apache/hadoop/zebra/tfile/ src/java/org...
Added: hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/tfile/TFileDumper.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/tfile/TFileDumper.java?rev=882929&view=auto
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/tfile/TFileDumper.java (added)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/tfile/TFileDumper.java Sat Nov 21 15:36:12 2009
@@ -0,0 +1,295 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.tfile;
+
+import java.io.IOException;
+import java.io.PrintStream;
+import java.util.Collection;
+import java.util.Iterator;
+import java.util.LinkedHashMap;
+import java.util.Map;
+import java.util.Set;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.FSDataInputStream;
+import org.apache.hadoop.fs.FileSystem;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.io.IOUtils;
+import org.apache.hadoop.zebra.tfile.BCFile.BlockRegion;
+import org.apache.hadoop.zebra.tfile.BCFile.MetaIndexEntry;
+import org.apache.hadoop.zebra.tfile.TFile.TFileIndexEntry;
+import org.apache.hadoop.zebra.tfile.Utils.Version;
+
+/**
+ * Dumping the information of a TFile.
+ */
+class TFileDumper {
+ static final Log LOG = LogFactory.getLog(TFileDumper.class);
+
+ private TFileDumper() {
+ // namespace object not constructable.
+ }
+
+ private enum Align {
+ LEFT, CENTER, RIGHT, ZERO_PADDED;
+ static String format(String s, int width, Align align) {
+ if (s.length() >= width) return s;
+ int room = width - s.length();
+ Align alignAdjusted = align;
+ if (room == 1) {
+ alignAdjusted = LEFT;
+ }
+ if (alignAdjusted == LEFT) {
+ return s + String.format("%" + room + "s", "");
+ }
+ if (alignAdjusted == RIGHT) {
+ return String.format("%" + room + "s", "") + s;
+ }
+ if (alignAdjusted == CENTER) {
+ int half = room / 2;
+ return String.format("%" + half + "s", "") + s
+ + String.format("%" + (room - half) + "s", "");
+ }
+ throw new IllegalArgumentException("Unsupported alignment");
+ }
+
+ static String format(long l, int width, Align align) {
+ if (align == ZERO_PADDED) {
+ return String.format("%0" + width + "d", l);
+ }
+ return format(Long.toString(l), width, align);
+ }
+
+ static int calculateWidth(String caption, long max) {
+ return Math.max(caption.length(), Long.toString(max).length());
+ }
+ }
+
+ /**
+ * Dump information about TFile.
+ *
+ * @param file
+ * Path string of the TFile
+ * @param out
+ * PrintStream to output the information.
+ * @param conf
+ * The configuration object.
+ * @throws IOException
+ */
+ static public void dumpInfo(String file, PrintStream out, Configuration conf)
+ throws IOException {
+ final int maxKeySampleLen = 16;
+ Path path = new Path(file);
+ FileSystem fs = path.getFileSystem(conf);
+ long length = fs.getFileStatus(path).getLen();
+ FSDataInputStream fsdis = fs.open(path);
+ TFile.Reader reader = new TFile.Reader(fsdis, length, conf);
+ try {
+ LinkedHashMap<String, String> properties =
+ new LinkedHashMap<String, String>();
+ int blockCnt = reader.readerBCF.getBlockCount();
+ int metaBlkCnt = reader.readerBCF.metaIndex.index.size();
+ properties.put("BCFile Version", reader.readerBCF.version.toString());
+ properties.put("TFile Version", reader.tfileMeta.version.toString());
+ properties.put("File Length", Long.toString(length));
+ properties.put("Data Compression", reader.readerBCF
+ .getDefaultCompressionName());
+ properties.put("Record Count", Long.toString(reader.getEntryCount()));
+ properties.put("Sorted", Boolean.toString(reader.isSorted()));
+ if (reader.isSorted()) {
+ properties.put("Comparator", reader.getComparatorName());
+ }
+ properties.put("Data Block Count", Integer.toString(blockCnt));
+ long dataSize = 0, dataSizeUncompressed = 0;
+ if (blockCnt > 0) {
+ for (int i = 0; i < blockCnt; ++i) {
+ BlockRegion region =
+ reader.readerBCF.dataIndex.getBlockRegionList().get(i);
+ dataSize += region.getCompressedSize();
+ dataSizeUncompressed += region.getRawSize();
+ }
+ properties.put("Data Block Bytes", Long.toString(dataSize));
+ if (reader.readerBCF.getDefaultCompressionName() != "none") {
+ properties.put("Data Block Uncompressed Bytes", Long
+ .toString(dataSizeUncompressed));
+ properties.put("Data Block Compression Ratio", String.format(
+ "1:%.1f", (double) dataSizeUncompressed / dataSize));
+ }
+ }
+
+ properties.put("Meta Block Count", Integer.toString(metaBlkCnt));
+ long metaSize = 0, metaSizeUncompressed = 0;
+ if (metaBlkCnt > 0) {
+ Collection<MetaIndexEntry> metaBlks =
+ reader.readerBCF.metaIndex.index.values();
+ boolean calculateCompression = false;
+ for (Iterator<MetaIndexEntry> it = metaBlks.iterator(); it.hasNext();) {
+ MetaIndexEntry e = it.next();
+ metaSize += e.getRegion().getCompressedSize();
+ metaSizeUncompressed += e.getRegion().getRawSize();
+ if (e.getCompressionAlgorithm() != Compression.Algorithm.NONE) {
+ calculateCompression = true;
+ }
+ }
+ properties.put("Meta Block Bytes", Long.toString(metaSize));
+ if (calculateCompression) {
+ properties.put("Meta Block Uncompressed Bytes", Long
+ .toString(metaSizeUncompressed));
+ properties.put("Meta Block Compression Ratio", String.format(
+ "1:%.1f", (double) metaSizeUncompressed / metaSize));
+ }
+ }
+ properties.put("Meta-Data Size Ratio", String.format("1:%.1f",
+ (double) dataSize / metaSize));
+ long leftOverBytes = length - dataSize - metaSize;
+ long miscSize =
+ BCFile.Magic.size() * 2 + Long.SIZE / Byte.SIZE + Version.size();
+ long metaIndexSize = leftOverBytes - miscSize;
+ properties.put("Meta Block Index Bytes", Long.toString(metaIndexSize));
+ properties.put("Headers Etc Bytes", Long.toString(miscSize));
+ // Now output the properties table.
+ int maxKeyLength = 0;
+ Set<Map.Entry<String, String>> entrySet = properties.entrySet();
+ for (Iterator<Map.Entry<String, String>> it = entrySet.iterator(); it
+ .hasNext();) {
+ Map.Entry<String, String> e = it.next();
+ if (e.getKey().length() > maxKeyLength) {
+ maxKeyLength = e.getKey().length();
+ }
+ }
+ for (Iterator<Map.Entry<String, String>> it = entrySet.iterator(); it
+ .hasNext();) {
+ Map.Entry<String, String> e = it.next();
+ out.printf("%s : %s\n", Align.format(e.getKey(), maxKeyLength,
+ Align.LEFT), e.getValue());
+ }
+ out.println();
+ reader.checkTFileDataIndex();
+ if (blockCnt > 0) {
+ String blkID = "Data-Block";
+ int blkIDWidth = Align.calculateWidth(blkID, blockCnt);
+ int blkIDWidth2 = Align.calculateWidth("", blockCnt);
+ String offset = "Offset";
+ int offsetWidth = Align.calculateWidth(offset, length);
+ String blkLen = "Length";
+ int blkLenWidth =
+ Align.calculateWidth(blkLen, dataSize / blockCnt * 10);
+ String rawSize = "Raw-Size";
+ int rawSizeWidth =
+ Align.calculateWidth(rawSize, dataSizeUncompressed / blockCnt * 10);
+ String records = "Records";
+ int recordsWidth =
+ Align.calculateWidth(records, reader.getEntryCount() / blockCnt
+ * 10);
+ String endKey = "End-Key";
+ int endKeyWidth = Math.max(endKey.length(), maxKeySampleLen * 2 + 5);
+
+ out.printf("%s %s %s %s %s %s\n", Align.format(blkID, blkIDWidth,
+ Align.CENTER), Align.format(offset, offsetWidth, Align.CENTER),
+ Align.format(blkLen, blkLenWidth, Align.CENTER), Align.format(
+ rawSize, rawSizeWidth, Align.CENTER), Align.format(records,
+ recordsWidth, Align.CENTER), Align.format(endKey, endKeyWidth,
+ Align.LEFT));
+
+ for (int i = 0; i < blockCnt; ++i) {
+ BlockRegion region =
+ reader.readerBCF.dataIndex.getBlockRegionList().get(i);
+ TFileIndexEntry indexEntry = reader.tfileIndex.getEntry(i);
+ out.printf("%s %s %s %s %s ", Align.format(Align.format(i,
+ blkIDWidth2, Align.ZERO_PADDED), blkIDWidth, Align.LEFT), Align
+ .format(region.getOffset(), offsetWidth, Align.LEFT), Align
+ .format(region.getCompressedSize(), blkLenWidth, Align.LEFT),
+ Align.format(region.getRawSize(), rawSizeWidth, Align.LEFT),
+ Align.format(indexEntry.kvEntries, recordsWidth, Align.LEFT));
+ byte[] key = indexEntry.key;
+ boolean asAscii = true;
+ int sampleLen = Math.min(maxKeySampleLen, key.length);
+ for (int j = 0; j < sampleLen; ++j) {
+ byte b = key[j];
+ if ((b < 32 && b != 9) || (b == 127)) {
+ asAscii = false;
+ }
+ }
+ if (!asAscii) {
+ out.print("0X");
+ for (int j = 0; j < sampleLen; ++j) {
+ byte b = key[i];
+ out.printf("%X", b);
+ }
+ } else {
+ out.print(new String(key, 0, sampleLen));
+ }
+ if (sampleLen < key.length) {
+ out.print("...");
+ }
+ out.println();
+ }
+ }
+
+ out.println();
+ if (metaBlkCnt > 0) {
+ String name = "Meta-Block";
+ int maxNameLen = 0;
+ Set<Map.Entry<String, MetaIndexEntry>> metaBlkEntrySet =
+ reader.readerBCF.metaIndex.index.entrySet();
+ for (Iterator<Map.Entry<String, MetaIndexEntry>> it =
+ metaBlkEntrySet.iterator(); it.hasNext();) {
+ Map.Entry<String, MetaIndexEntry> e = it.next();
+ if (e.getKey().length() > maxNameLen) {
+ maxNameLen = e.getKey().length();
+ }
+ }
+ int nameWidth = Math.max(name.length(), maxNameLen);
+ String offset = "Offset";
+ int offsetWidth = Align.calculateWidth(offset, length);
+ String blkLen = "Length";
+ int blkLenWidth =
+ Align.calculateWidth(blkLen, metaSize / metaBlkCnt * 10);
+ String rawSize = "Raw-Size";
+ int rawSizeWidth =
+ Align.calculateWidth(rawSize, metaSizeUncompressed / metaBlkCnt
+ * 10);
+ String compression = "Compression";
+ int compressionWidth = compression.length();
+ out.printf("%s %s %s %s %s\n", Align.format(name, nameWidth,
+ Align.CENTER), Align.format(offset, offsetWidth, Align.CENTER),
+ Align.format(blkLen, blkLenWidth, Align.CENTER), Align.format(
+ rawSize, rawSizeWidth, Align.CENTER), Align.format(compression,
+ compressionWidth, Align.LEFT));
+
+ for (Iterator<Map.Entry<String, MetaIndexEntry>> it =
+ metaBlkEntrySet.iterator(); it.hasNext();) {
+ Map.Entry<String, MetaIndexEntry> e = it.next();
+ String blkName = e.getValue().getMetaName();
+ BlockRegion region = e.getValue().getRegion();
+ String blkCompression =
+ e.getValue().getCompressionAlgorithm().getName();
+ out.printf("%s %s %s %s %s\n", Align.format(blkName, nameWidth,
+ Align.LEFT), Align.format(region.getOffset(), offsetWidth,
+ Align.LEFT), Align.format(region.getCompressedSize(),
+ blkLenWidth, Align.LEFT), Align.format(region.getRawSize(),
+ rawSizeWidth, Align.LEFT), Align.format(blkCompression,
+ compressionWidth, Align.LEFT));
+ }
+ }
+ } finally {
+ IOUtils.cleanup(LOG, reader, fsdis);
+ }
+ }
+}
Added: hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/tfile/Utils.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/tfile/Utils.java?rev=882929&view=auto
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/tfile/Utils.java (added)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/tfile/Utils.java Sat Nov 21 15:36:12 2009
@@ -0,0 +1,517 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+
+package org.apache.hadoop.zebra.tfile;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Comparator;
+import java.util.List;
+
+import org.apache.hadoop.io.Text;
+
+/**
+ * Supporting Utility classes used by TFile, and shared by users of TFile.
+ */
+public final class Utils {
+
+ /**
+ * Prevent the instantiation of Utils.
+ */
+ private Utils() {
+ // nothing
+ }
+
+ /**
+ * Encoding an integer into a variable-length encoding format. Synonymous to
+ * <code>Utils#writeVLong(out, n)</code>.
+ *
+ * @param out
+ * output stream
+ * @param n
+ * The integer to be encoded
+ * @throws IOException
+ * @see Utils#writeVLong(DataOutput, long)
+ */
+ public static void writeVInt(DataOutput out, int n) throws IOException {
+ writeVLong(out, n);
+ }
+
+ /**
+ * Encoding a Long integer into a variable-length encoding format.
+ * <ul>
+ * <li>if n in [-32, 127): encode in one byte with the actual value.
+ * Otherwise,
+ * <li>if n in [-20*2^8, 20*2^8): encode in two bytes: byte[0] = n/256 - 52;
+ * byte[1]=n&0xff. Otherwise,
+ * <li>if n IN [-16*2^16, 16*2^16): encode in three bytes: byte[0]=n/2^16 -
+ * 88; byte[1]=(n>>8)&0xff; byte[2]=n&0xff. Otherwise,
+ * <li>if n in [-8*2^24, 8*2^24): encode in four bytes: byte[0]=n/2^24 - 112;
+ * byte[1] = (n>>16)&0xff; byte[2] = (n>>8)&0xff; byte[3]=n&0xff. Otherwise:
+ * <li>if n in [-2^31, 2^31): encode in five bytes: byte[0]=-125; byte[1] =
+ * (n>>24)&0xff; byte[2]=(n>>16)&0xff; byte[3]=(n>>8)&0xff; byte[4]=n&0xff;
+ * <li>if n in [-2^39, 2^39): encode in six bytes: byte[0]=-124; byte[1] =
+ * (n>>32)&0xff; byte[2]=(n>>24)&0xff; byte[3]=(n>>16)&0xff;
+ * byte[4]=(n>>8)&0xff; byte[5]=n&0xff
+ * <li>if n in [-2^47, 2^47): encode in seven bytes: byte[0]=-123; byte[1] =
+ * (n>>40)&0xff; byte[2]=(n>>32)&0xff; byte[3]=(n>>24)&0xff;
+ * byte[4]=(n>>16)&0xff; byte[5]=(n>>8)&0xff; byte[6]=n&0xff;
+ * <li>if n in [-2^55, 2^55): encode in eight bytes: byte[0]=-122; byte[1] =
+ * (n>>48)&0xff; byte[2] = (n>>40)&0xff; byte[3]=(n>>32)&0xff;
+ * byte[4]=(n>>24)&0xff; byte[5]=(n>>16)&0xff; byte[6]=(n>>8)&0xff;
+ * byte[7]=n&0xff;
+ * <li>if n in [-2^63, 2^63): encode in nine bytes: byte[0]=-121; byte[1] =
+ * (n>>54)&0xff; byte[2] = (n>>48)&0xff; byte[3] = (n>>40)&0xff;
+ * byte[4]=(n>>32)&0xff; byte[5]=(n>>24)&0xff; byte[6]=(n>>16)&0xff;
+ * byte[7]=(n>>8)&0xff; byte[8]=n&0xff;
+ * </ul>
+ *
+ * @param out
+ * output stream
+ * @param n
+ * the integer number
+ * @throws IOException
+ */
+ @SuppressWarnings("fallthrough")
+ public static void writeVLong(DataOutput out, long n) throws IOException {
+ if ((n < 128) && (n >= -32)) {
+ out.writeByte((int) n);
+ return;
+ }
+
+ long un = (n < 0) ? ~n : n;
+ // how many bytes do we need to represent the number with sign bit?
+ int len = (Long.SIZE - Long.numberOfLeadingZeros(un)) / 8 + 1;
+ int firstByte = (int) (n >> ((len - 1) * 8));
+ switch (len) {
+ case 1:
+ // fall it through to firstByte==-1, len=2.
+ firstByte >>= 8;
+ case 2:
+ if ((firstByte < 20) && (firstByte >= -20)) {
+ out.writeByte(firstByte - 52);
+ out.writeByte((int) n);
+ return;
+ }
+ // fall it through to firstByte==0/-1, len=3.
+ firstByte >>= 8;
+ case 3:
+ if ((firstByte < 16) && (firstByte >= -16)) {
+ out.writeByte(firstByte - 88);
+ out.writeShort((int) n);
+ return;
+ }
+ // fall it through to firstByte==0/-1, len=4.
+ firstByte >>= 8;
+ case 4:
+ if ((firstByte < 8) && (firstByte >= -8)) {
+ out.writeByte(firstByte - 112);
+ out.writeShort(((int) n) >>> 8);
+ out.writeByte((int) n);
+ return;
+ }
+ out.writeByte(len - 129);
+ out.writeInt((int) n);
+ return;
+ case 5:
+ out.writeByte(len - 129);
+ out.writeInt((int) (n >>> 8));
+ out.writeByte((int) n);
+ return;
+ case 6:
+ out.writeByte(len - 129);
+ out.writeInt((int) (n >>> 16));
+ out.writeShort((int) n);
+ return;
+ case 7:
+ out.writeByte(len - 129);
+ out.writeInt((int) (n >>> 24));
+ out.writeShort((int) (n >>> 8));
+ out.writeByte((int) n);
+ return;
+ case 8:
+ out.writeByte(len - 129);
+ out.writeLong(n);
+ return;
+ default:
+ throw new RuntimeException("Internel error");
+ }
+ }
+
+ /**
+ * Decoding the variable-length integer. Synonymous to
+ * <code>(int)Utils#readVLong(in)</code>.
+ *
+ * @param in
+ * input stream
+ * @return the decoded integer
+ * @throws IOException
+ *
+ * @see Utils#readVLong(DataInput)
+ */
+ public static int readVInt(DataInput in) throws IOException {
+ long ret = readVLong(in);
+ if ((ret > Integer.MAX_VALUE) || (ret < Integer.MIN_VALUE)) {
+ throw new RuntimeException(
+ "Number too large to be represented as Integer");
+ }
+ return (int) ret;
+ }
+
+ /**
+ * Decoding the variable-length integer. Suppose the value of the first byte
+ * is FB, and the following bytes are NB[*].
+ * <ul>
+ * <li>if (FB >= -32), return (long)FB;
+ * <li>if (FB in [-72, -33]), return (FB+52)<<8 + NB[0]&0xff;
+ * <li>if (FB in [-104, -73]), return (FB+88)<<16 + (NB[0]&0xff)<<8 +
+ * NB[1]&0xff;
+ * <li>if (FB in [-120, -105]), return (FB+112)<<24 + (NB[0]&0xff)<<16 +
+ * (NB[1]&0xff)<<8 + NB[2]&0xff;
+ * <li>if (FB in [-128, -121]), return interpret NB[FB+129] as a signed
+ * big-endian integer.
+ *
+ * @param in
+ * input stream
+ * @return the decoded long integer.
+ * @throws IOException
+ */
+
+ public static long readVLong(DataInput in) throws IOException {
+ int firstByte = in.readByte();
+ if (firstByte >= -32) {
+ return firstByte;
+ }
+
+ switch ((firstByte + 128) / 8) {
+ case 11:
+ case 10:
+ case 9:
+ case 8:
+ case 7:
+ return ((firstByte + 52) << 8) | in.readUnsignedByte();
+ case 6:
+ case 5:
+ case 4:
+ case 3:
+ return ((firstByte + 88) << 16) | in.readUnsignedShort();
+ case 2:
+ case 1:
+ return ((firstByte + 112) << 24) | (in.readUnsignedShort() << 8)
+ | in.readUnsignedByte();
+ case 0:
+ int len = firstByte + 129;
+ switch (len) {
+ case 4:
+ return in.readInt();
+ case 5:
+ return ((long) in.readInt()) << 8 | in.readUnsignedByte();
+ case 6:
+ return ((long) in.readInt()) << 16 | in.readUnsignedShort();
+ case 7:
+ return ((long) in.readInt()) << 24 | (in.readUnsignedShort() << 8)
+ | in.readUnsignedByte();
+ case 8:
+ return in.readLong();
+ default:
+ throw new IOException("Corrupted VLong encoding");
+ }
+ default:
+ throw new RuntimeException("Internal error");
+ }
+ }
+
+ /**
+ * Write a String as a VInt n, followed by n Bytes as in Text format.
+ *
+ * @param out
+ * @param s
+ * @throws IOException
+ */
+ public static void writeString(DataOutput out, String s) throws IOException {
+ if (s != null) {
+ Text text = new Text(s);
+ byte[] buffer = text.getBytes();
+ int len = text.getLength();
+ writeVInt(out, len);
+ out.write(buffer, 0, len);
+ } else {
+ writeVInt(out, -1);
+ }
+ }
+
+ /**
+ * Read a String as a VInt n, followed by n Bytes in Text format.
+ *
+ * @param in
+ * The input stream.
+ * @return The string
+ * @throws IOException
+ */
+ public static String readString(DataInput in) throws IOException {
+ int length = readVInt(in);
+ if (length == -1) return null;
+ byte[] buffer = new byte[length];
+ in.readFully(buffer);
+ return Text.decode(buffer);
+ }
+
+ /**
+ * A generic Version class. We suggest applications built on top of TFile use
+ * this class to maintain version information in their meta blocks.
+ *
+ * A version number consists of a major version and a minor version. The
+ * suggested usage of major and minor version number is to increment major
+ * version number when the new storage format is not backward compatible, and
+ * increment the minor version otherwise.
+ */
+ public static final class Version implements Comparable<Version> {
+ private final short major;
+ private final short minor;
+
+ /**
+ * Construct the Version object by reading from the input stream.
+ *
+ * @param in
+ * input stream
+ * @throws IOException
+ */
+ public Version(DataInput in) throws IOException {
+ major = in.readShort();
+ minor = in.readShort();
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param major
+ * major version.
+ * @param minor
+ * minor version.
+ */
+ public Version(short major, short minor) {
+ this.major = major;
+ this.minor = minor;
+ }
+
+ /**
+ * Write the objec to a DataOutput. The serialized format of the Version is
+ * major version followed by minor version, both as big-endian short
+ * integers.
+ *
+ * @param out
+ * The DataOutput object.
+ * @throws IOException
+ */
+ public void write(DataOutput out) throws IOException {
+ out.writeShort(major);
+ out.writeShort(minor);
+ }
+
+ /**
+ * Get the major version.
+ *
+ * @return Major version.
+ */
+ public int getMajor() {
+ return major;
+ }
+
+ /**
+ * Get the minor version.
+ *
+ * @return The minor version.
+ */
+ public int getMinor() {
+ return minor;
+ }
+
+ /**
+ * Get the size of the serialized Version object.
+ *
+ * @return serialized size of the version object.
+ */
+ public static int size() {
+ return (Short.SIZE + Short.SIZE) / Byte.SIZE;
+ }
+
+ /**
+ * Return a string representation of the version.
+ */
+ @Override
+ public String toString() {
+ return new StringBuilder("v").append(major).append(".").append(minor)
+ .toString();
+ }
+
+ /**
+ * Test compatibility.
+ *
+ * @param other
+ * The Version object to test compatibility with.
+ * @return true if both versions have the same major version number; false
+ * otherwise.
+ */
+ public boolean compatibleWith(Version other) {
+ return major == other.major;
+ }
+
+ /**
+ * Compare this version with another version.
+ */
+ @Override
+ public int compareTo(Version that) {
+ if (major != that.major) {
+ return major - that.major;
+ }
+ return minor - that.minor;
+ }
+
+ @Override
+ public boolean equals(Object other) {
+ if (this == other) return true;
+ if (!(other instanceof Version)) return false;
+ return compareTo((Version) other) == 0;
+ }
+
+ @Override
+ public int hashCode() {
+ return (major << 16 + minor);
+ }
+ }
+
+ /**
+ * Lower bound binary search. Find the index to the first element in the list
+ * that compares greater than or equal to key.
+ *
+ * @param <T>
+ * Type of the input key.
+ * @param list
+ * The list
+ * @param key
+ * The input key.
+ * @param cmp
+ * Comparator for the key.
+ * @return The index to the desired element if it exists; or list.size()
+ * otherwise.
+ */
+ public static <T> int lowerBound(List<? extends T> list, T key,
+ Comparator<? super T> cmp) {
+ int low = 0;
+ int high = list.size();
+
+ while (low < high) {
+ int mid = (low + high) >>> 1;
+ T midVal = list.get(mid);
+ int ret = cmp.compare(midVal, key);
+ if (ret < 0)
+ low = mid + 1;
+ else high = mid;
+ }
+ return low;
+ }
+
+ /**
+ * Upper bound binary search. Find the index to the first element in the list
+ * that compares greater than the input key.
+ *
+ * @param <T>
+ * Type of the input key.
+ * @param list
+ * The list
+ * @param key
+ * The input key.
+ * @param cmp
+ * Comparator for the key.
+ * @return The index to the desired element if it exists; or list.size()
+ * otherwise.
+ */
+ public static <T> int upperBound(List<? extends T> list, T key,
+ Comparator<? super T> cmp) {
+ int low = 0;
+ int high = list.size();
+
+ while (low < high) {
+ int mid = (low + high) >>> 1;
+ T midVal = list.get(mid);
+ int ret = cmp.compare(midVal, key);
+ if (ret <= 0)
+ low = mid + 1;
+ else high = mid;
+ }
+ return low;
+ }
+
+ /**
+ * Lower bound binary search. Find the index to the first element in the list
+ * that compares greater than or equal to key.
+ *
+ * @param <T>
+ * Type of the input key.
+ * @param list
+ * The list
+ * @param key
+ * The input key.
+ * @return The index to the desired element if it exists; or list.size()
+ * otherwise.
+ */
+ public static <T> int lowerBound(List<? extends Comparable<? super T>> list,
+ T key) {
+ int low = 0;
+ int high = list.size();
+
+ while (low < high) {
+ int mid = (low + high) >>> 1;
+ Comparable<? super T> midVal = list.get(mid);
+ int ret = midVal.compareTo(key);
+ if (ret < 0)
+ low = mid + 1;
+ else high = mid;
+ }
+ return low;
+ }
+
+ /**
+ * Upper bound binary search. Find the index to the first element in the list
+ * that compares greater than the input key.
+ *
+ * @param <T>
+ * Type of the input key.
+ * @param list
+ * The list
+ * @param key
+ * The input key.
+ * @return The index to the desired element if it exists; or list.size()
+ * otherwise.
+ */
+ public static <T> int upperBound(List<? extends Comparable<? super T>> list,
+ T key) {
+ int low = 0;
+ int high = list.size();
+
+ while (low < high) {
+ int mid = (low + high) >>> 1;
+ Comparable<? super T> midVal = list.get(mid);
+ int ret = midVal.compareTo(key);
+ if (ret <= 0)
+ low = mid + 1;
+ else high = mid;
+ }
+ return low;
+ }
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/CGSchema.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/CGSchema.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/CGSchema.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/CGSchema.java Sat Nov 21 15:36:12 2009
@@ -27,7 +27,7 @@
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.WritableUtils;
-import org.apache.hadoop.io.file.tfile.Utils.Version;
+import org.apache.hadoop.zebra.tfile.Utils.Version;
import org.apache.hadoop.zebra.schema.Schema;
import org.apache.hadoop.zebra.parser.ParseException;
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/SortInfo.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/SortInfo.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/SortInfo.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/java/org/apache/hadoop/zebra/types/SortInfo.java Sat Nov 21 15:36:12 2009
@@ -21,7 +21,7 @@
import java.io.IOException;
import org.apache.hadoop.zebra.schema.Schema;
import org.apache.hadoop.zebra.schema.ColumnType;
-import org.apache.hadoop.io.file.tfile.TFile;
+import org.apache.hadoop.zebra.tfile.TFile;
/**
* Sortness related Information
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestBasicTable.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestBasicTable.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestBasicTable.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestBasicTable.java Sat Nov 21 15:36:12 2009
@@ -31,7 +31,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.file.tfile.RawComparable;
+import org.apache.hadoop.zebra.tfile.RawComparable;
import org.apache.hadoop.zebra.io.BasicTable;
import org.apache.hadoop.zebra.io.BasicTableStatus;
import org.apache.hadoop.zebra.io.KeyDistribution;
@@ -81,7 +81,7 @@
return String.format("%s%09d", prefix, random.nextInt(max));
}
- static int createBasicTable(int parts, int rows, String strSchema, String storage, String sortColumns,
+ public static int createBasicTable(int parts, int rows, String strSchema, String storage, String sortColumns,
Path path, boolean properClose) throws IOException {
if (fs.exists(path)) {
BasicTable.drop(path, conf);
@@ -126,8 +126,11 @@
if (properClose) {
writer = new BasicTable.Writer(path, conf);
writer.close();
- BasicTableStatus status = getStatus(path);
- Assert.assertEquals(total, status.getRows());
+ /* We can only test number of rows on sorted tables.*/
+ if (sorted) {
+ BasicTableStatus status = getStatus(path);
+ Assert.assertEquals(total, status.getRows());
+ }
}
return total;
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestCollection.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestCollection.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestCollection.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestCollection.java Sat Nov 21 15:36:12 2009
@@ -1018,4 +1018,4 @@
System.out.println(e);
}
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroup.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroup.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroup.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroup.java Sat Nov 21 15:36:12 2009
@@ -30,7 +30,7 @@
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.BytesWritable;
-import org.apache.hadoop.io.file.tfile.RawComparable;
+import org.apache.hadoop.zebra.tfile.RawComparable;
import org.apache.hadoop.zebra.io.BasicTableStatus;
import org.apache.hadoop.zebra.io.ColumnGroup;
import org.apache.hadoop.zebra.io.KeyDistribution;
@@ -149,8 +149,11 @@
if (properClose) {
writer = new ColumnGroup.Writer(path, conf);
writer.close();
- BasicTableStatus status = getStatus(path);
- Assert.assertEquals(total, status.getRows());
+ /* We can only test number of rows on sorted tables.*/
+ if (sorted) {
+ BasicTableStatus status = getStatus(path);
+ Assert.assertEquals(total, status.getRows());
+ }
}
return total;
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName1.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName1.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName1.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName1.java Sat Nov 21 15:36:12 2009
@@ -170,4 +170,4 @@
reader.close();
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName2.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName2.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName2.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName2.java Sat Nov 21 15:36:12 2009
@@ -110,4 +110,4 @@
public void test1() throws IOException, Exception {
BasicTable.dumpInfo(path.toString(), System.out, conf);
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName3.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName3.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName3.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName3.java Sat Nov 21 15:36:12 2009
@@ -110,4 +110,4 @@
public void test1() throws IOException, Exception {
BasicTable.dumpInfo(path.toString(), System.out, conf);
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName4.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName4.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName4.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestColumnGroupName4.java Sat Nov 21 15:36:12 2009
@@ -110,4 +110,4 @@
public void test1() throws IOException, Exception {
BasicTable.dumpInfo(path.toString(), System.out, conf);
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMap.java Sat Nov 21 15:36:12 2009
@@ -280,4 +280,4 @@
reader.close();
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestMapOfRecord.java Sat Nov 21 15:36:12 2009
@@ -388,4 +388,4 @@
reader.close();
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestNegative.java Sat Nov 21 15:36:12 2009
@@ -616,4 +616,4 @@
}
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestRecord.java Sat Nov 21 15:36:12 2009
@@ -412,4 +412,4 @@
reader.close();
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/io/TestSimple.java Sat Nov 21 15:36:12 2009
@@ -184,4 +184,4 @@
reader.close();
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ArticleGenerator.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ArticleGenerator.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ArticleGenerator.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/ArticleGenerator.java Sat Nov 21 15:36:12 2009
@@ -26,8 +26,8 @@
import org.apache.hadoop.fs.FSDataOutputStream;
import org.apache.hadoop.fs.FileSystem;
import org.apache.hadoop.fs.Path;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.DiscreteRNG;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.Flat;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.DiscreteRNG;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.Flat;
/**
* Generate some input text files.
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/Dictionary.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/Dictionary.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/Dictionary.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/Dictionary.java Sat Nov 21 15:36:12 2009
@@ -24,9 +24,9 @@
import java.util.Random;
import java.util.Set;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.Binomial;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.DiscreteRNG;
-import org.apache.hadoop.io.file.tfile.RandomDistribution.Zipf;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.Binomial;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.DiscreteRNG;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.Zipf;
/**
* A dictionary that generates English words, whose frequency follows Zipf
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSample2.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSample2.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSample2.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMRSample2.java Sat Nov 21 15:36:12 2009
@@ -113,4 +113,4 @@
JobClient.runJob(jobConf);
}
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMapReduceExample.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMapReduceExample.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMapReduceExample.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TableMapReduceExample.java Sat Nov 21 15:36:12 2009
@@ -225,4 +225,4 @@
args);
System.exit(res);
}
-}
\ No newline at end of file
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestBasicTableIOFormatLocalFS.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestBasicTableIOFormatLocalFS.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestBasicTableIOFormatLocalFS.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestBasicTableIOFormatLocalFS.java Sat Nov 21 15:36:12 2009
@@ -46,7 +46,7 @@
import org.apache.hadoop.io.RawComparator;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
-import org.apache.hadoop.io.file.tfile.Utils;
+import org.apache.hadoop.zebra.tfile.Utils;
import org.apache.hadoop.mapred.FileInputFormat;
import org.apache.hadoop.mapred.FileSplit;
import org.apache.hadoop.mapred.JobClient;
@@ -163,6 +163,9 @@
JobConf getJobConf(String name) {
JobConf jobConf = mr.createJobConf();
jobConf.setJobName(name);
+ jobConf.setInt("table.input.split.minSize", 1);
+ options.minTableSplitSize = 1; // force more splits
+ jobConf.setInt("dfs.block.size", 1024); // force multiple blocks
jobConf.set("table.output.tfile.compression", options.compression);
jobConf.setInt("mapred.app.freqWords.count", options.numFreqWords);
return jobConf;
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestCheckin.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestCheckin.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestCheckin.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestCheckin.java Sat Nov 21 15:36:12 2009
@@ -24,7 +24,8 @@
@RunWith(Suite.class)
@Suite.SuiteClasses({
TestBasicTableIOFormatLocalFS.class,
- TestBasicTableIOFormatDFS.class
+ TestBasicTableIOFormatDFS.class,
+ TestTfileSplit.class
})
public class TestCheckin {
Added: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestTfileSplit.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestTfileSplit.java?rev=882929&view=auto
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestTfileSplit.java (added)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/mapred/TestTfileSplit.java Sat Nov 21 15:36:12 2009
@@ -0,0 +1,128 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.mapred;
+
+import java.io.IOException;
+import java.util.StringTokenizer;
+
+import org.apache.hadoop.conf.Configuration;
+import org.apache.hadoop.fs.Path;
+import org.apache.hadoop.mapred.JobConf;
+import org.apache.hadoop.mapred.InputSplit;
+import org.apache.hadoop.zebra.mapred.TableInputFormat;
+import org.apache.hadoop.zebra.io.BasicTable;
+import org.apache.hadoop.zebra.io.TestBasicTable;
+import org.apache.hadoop.zebra.parser.ParseException;
+import org.junit.AfterClass;
+import org.junit.Assert;
+import org.junit.BeforeClass;
+import org.junit.Test;
+
+public class TestTfileSplit {
+ private static Configuration conf;
+ private static Path path;
+
+ @BeforeClass
+ public static void setUpOnce() throws IOException {
+ TestBasicTable.setUpOnce();
+ conf = TestBasicTable.conf;
+ path = new Path(TestBasicTable.rootPath, "TfileSplitTest");
+ }
+
+ @AfterClass
+ public static void tearDown() throws IOException {
+ BasicTable.drop(path, conf);
+ }
+
+ /* In this test, we test creating input splits for the projection on non-existing column case.
+ * The first non-deleted column group should be used for split. */
+ @Test
+ public void testTfileSplit1()
+ throws IOException, ParseException {
+ BasicTable.drop(path, conf);
+ TestBasicTable.createBasicTable(1, 100, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path, true);
+
+ TableInputFormat inputFormat = new TableInputFormat();
+ JobConf jobConf = new JobConf(conf);
+ inputFormat.setInputPaths(jobConf, path);
+ inputFormat.setMinSplitSize(jobConf, 100);
+ inputFormat.setProjection(jobConf, "aa");
+ InputSplit[] splits = inputFormat.getSplits(jobConf, 40);
+
+ RowTableSplit split = (RowTableSplit) splits[0];
+ String str = split.getSplit().toString();
+ StringTokenizer tokens = new StringTokenizer(str, "\n");
+ str = tokens.nextToken();
+ tokens = new StringTokenizer(str, " ");
+ tokens.nextToken();
+ tokens.nextToken();
+ String s = tokens.nextToken();
+ s = s.substring(0, s.length()-1);
+ int cgIndex = Integer.parseInt(s);
+ Assert.assertEquals(cgIndex, 0);
+ }
+
+ /* In this test, we test creating input splits when dropped column groups are around.
+ * Here the projection involves all columns and only one valid column group is present.
+ * As such, that column group should be used for split.*/
+ @Test
+ public void testTfileSplit2()
+ throws IOException, ParseException {
+ BasicTable.drop(path, conf);
+ TestBasicTable.createBasicTable(1, 100, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path, true);
+ BasicTable.dropColumnGroup(path, conf, "CG0");
+ BasicTable.dropColumnGroup(path, conf, "CG2");
+
+ TableInputFormat inputFormat = new TableInputFormat();
+ JobConf jobConf = new JobConf(conf);
+ inputFormat.setInputPaths(jobConf, path);
+ inputFormat.setMinSplitSize(jobConf, 100);
+ InputSplit[] splits = inputFormat.getSplits(jobConf, 40);
+
+ RowTableSplit split = (RowTableSplit) splits[0];
+ String str = split.getSplit().toString();
+ StringTokenizer tokens = new StringTokenizer(str, "\n");
+ str = tokens.nextToken();
+ tokens = new StringTokenizer(str, " ");
+ tokens.nextToken();
+ tokens.nextToken();
+ String s = tokens.nextToken();
+ s = s.substring(0, s.length()-1);
+ int cgIndex = Integer.parseInt(s);
+ Assert.assertEquals(cgIndex, 1);
+ }
+
+ /* In this test, we test creating input splits when there is no valid column group present.
+ * Should return 0 splits. */
+ @Test
+ public void testTfileSplit3()
+ throws IOException, ParseException {
+ BasicTable.drop(path, conf);
+ TestBasicTable.createBasicTable(1, 100, "a, b, c, d, e, f", "[a, b]; [c, d]", null, path, true);
+ BasicTable.dropColumnGroup(path, conf, "CG0");
+ BasicTable.dropColumnGroup(path, conf, "CG1");
+ BasicTable.dropColumnGroup(path, conf, "CG2");
+
+ TableInputFormat inputFormat = new TableInputFormat();
+ JobConf jobConf = new JobConf(conf);
+ inputFormat.setInputPaths(jobConf, path);
+ inputFormat.setMinSplitSize(jobConf, 100);
+ InputSplit[] splits = inputFormat.getSplits(jobConf, 40);
+
+ Assert.assertEquals(splits.length, 0);
+ }
+}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestSimpleType.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestSimpleType.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestSimpleType.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestSimpleType.java Sat Nov 21 15:36:12 2009
@@ -165,7 +165,7 @@
t = tok.nextToken(); // '...<caller to getCurrentRoutine>'
return t;
}
-
+
// @Test
public void testReadSimpleStitch() throws IOException, ParseException {
String query = "records = LOAD '" + path.toString()
@@ -242,9 +242,9 @@
@Test
// Store same table
public void testStorer() throws ExecException, IOException {
- /*
- * Use pig LOAD to load testing data for store
- */
+ //
+ // Use pig LOAD to load testing data for store
+ //
String query = "records = LOAD '"
+ path.toString()
+ "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);";
@@ -256,9 +256,9 @@
System.out.println(RowValue);
}
- /*
- * Use pig STORE to store testing data
- */
+ //
+ // Use pig STORE to store testing data
+ //
Path newPath = new Path(getCurrentMethodName());
pigServer
.store(
@@ -528,19 +528,19 @@
@Test
// Store negative, store to same path. Store should fail
- public void testStorer5() throws ExecException, IOException {
- /*
- * Use pig LOAD to load testing data for store
- */
+ public void testStorer5() throws ExecException, IOException {
+ //
+ // Use pig LOAD to load testing data for store
+ //
String query = "records = LOAD '"
+ path.toString()
+ "' USING org.apache.hadoop.zebra.pig.TableLoader() as (s1,s2,s3,s4,s5,s6);";
pigServer.registerQuery(query);
- /*
- * Use pig STORE to store testing data
- */
-
+ //
+ // Use pig STORE to store testing data
+ //
+ System.out.println("path = " + path);
ExecJob pigJob = pigServer
.store(
"records",
@@ -548,7 +548,6 @@
TableStorer.class.getCanonicalName()
+ "('[s1, s2]; [s3, s4]')");
Assert.assertNotNull(pigJob.getException());
- System.out.println(pigJob.getException());
+ System.out.println("pig job exception : " + pigJob.getException());
}
-
}
Modified: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestUnionMixedTypes.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestUnionMixedTypes.java?rev=882929&r1=882928&r2=882929&view=diff
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestUnionMixedTypes.java (original)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/pig/TestUnionMixedTypes.java Sat Nov 21 15:36:12 2009
@@ -309,16 +309,15 @@
if (j == 1) {
System.out.println("j is : " + j);
- Assert.assertEquals(j + "." + j, cur2.get(0));
- Assert.assertEquals(j + "." + j + j, cur2.get(1));
+ /* The order of t1 and t2 in the union result can vary across runs. */
+ Assert.assertTrue("1.1".equals(cur2.get(0)) || "3.3".equals(cur2.get(0)));
+ Assert.assertTrue("1.11".equals(cur2.get(1)) || "3.33".equals(cur2.get(1)));
}
if (j == 2) {
System.out.println("j is : " + j);
- Assert.assertEquals((j - 1) + "." + (j - 1) + (j - 1) + (j - 1), cur2
- .get(0));
- Assert.assertEquals((j - 1) + "." + (j - 1) + (j - 1) + (j - 1)
- + (j - 1), cur2.get(1));
+ Assert.assertTrue("1.111".equals(cur2.get(0)) || "3.333".equals(cur2.get(0)));
+ Assert.assertTrue("1.1111".equals(cur2.get(1)) || "3.3333".equals(cur2.get(1)));
}
TypesUtils.resetTuple(cur2);
@@ -326,37 +325,37 @@
}// inner while
if (i == 1) {
System.out.println("i is : " + i);
-
- Assert.assertEquals("k11", ((Map) cur.get(1)).get("k1"));
+ Assert.assertTrue("k11".equals(((Map) cur.get(1)).get("k1")) || "k13".equals(((Map) cur.get(1)).get("k1")));
Assert.assertEquals(null, ((Map) cur.get(1)).get("k2"));
- Assert.assertEquals("1", cur.get(2));
+ Assert.assertTrue("1".equals(cur.get(2)) || "3".equals(cur.get(2)));
}
if (i == 2) {
System.out.println("i should see this line. ");
- Assert.assertEquals("k12", ((Map) cur.get(1)).get("k1"));
- Assert.assertEquals("k22", ((Map) cur.get(1)).get("k2"));
- Assert.assertEquals("2", cur.get(2));
+ Assert.assertTrue("k12".equals(((Map) cur.get(1)).get("k1")) || "k14".equals(((Map) cur.get(1)).get("k1")));
+ Assert.assertTrue("k22".equals(((Map) cur.get(1)).get("k2")) || "k24".equals(((Map) cur.get(1)).get("k2")));
+ Assert.assertTrue("2".equals(cur.get(2)) || "4".equals(cur.get(2)));
}
if (i == 3) {
System.out.println("i is : " + i);
- Assert.assertEquals("k13", ((Map) cur.get(1)).get("k1"));
+ Assert.assertTrue("k11".equals(((Map) cur.get(1)).get("k1")) || "k13".equals(((Map) cur.get(1)).get("k1")));
Assert.assertEquals(null, ((Map) cur.get(1)).get("k2"));
- Assert.assertEquals("3", cur.get(2));
+ Assert.assertTrue("1".equals(cur.get(2)) || "3".equals(cur.get(2)));
}
if (i == 4) {
System.out.println("i should see this line. ");
- Assert.assertEquals("k14", ((Map) cur.get(1)).get("k1"));
- Assert.assertEquals("k24", ((Map) cur.get(1)).get("k2"));
- Assert.assertEquals("4", cur.get(2));
+ Assert.assertTrue("k12".equals(((Map) cur.get(1)).get("k1")) || "k14".equals(((Map) cur.get(1)).get("k1")));
+ Assert.assertTrue("k22".equals(((Map) cur.get(1)).get("k2")) || "k24".equals(((Map) cur.get(1)).get("k2")));
+ Assert.assertTrue("2".equals(cur.get(2)) || "4".equals(cur.get(2)));
}
}// outer while
Assert.assertEquals(4, i);
}
+
@Test
// one common field only
public void testReader2() throws ExecException, IOException {
@@ -399,7 +398,7 @@
Assert.assertEquals(null, ((Map) cur.get(0)).get("k2"));
try {
cur.get(1);
- Assert.fail("should throw index out of bound excepiotn");
+ Assert.fail("should throw index out of bound exception");
} catch (Exception e) {
e.printStackTrace();
}
@@ -411,7 +410,7 @@
Assert.assertEquals(null, ((Map) cur.get(0)).get("k2"));
try {
cur.get(1);
- Assert.fail("should throw index out of bound excepiotn");
+ Assert.fail("should throw index out of bound exception");
} catch (Exception e) {
e.printStackTrace();
}
@@ -423,7 +422,7 @@
Assert.assertEquals(null, ((Map) cur.get(0)).get("k2"));
try {
cur.get(1);
- Assert.fail("should throw index out of bound excepiotn");
+ Assert.fail("should throw index out of bound exception");
} catch (Exception e) {
e.printStackTrace();
}
@@ -455,34 +454,35 @@
i++;
System.out.println(" line : " + i + " : " + cur.toString());
+
+
if (i == 1) {
System.out.println("i is : " + i);
- Assert.assertEquals("world1", cur.get(0));
+ Assert.assertTrue("world1".equals(cur.get(0)) || cur.get(0) == null);
try {
cur.get(1);
- Assert.fail("should throw index out of bound excepiotn");
+ Assert.fail("should throw index out of bound exception");
} catch (Exception e) {
e.printStackTrace();
}
}
if (i == 2) {
-
- Assert.assertEquals("world2", cur.get(0));
+ Assert.assertTrue("world2".equals(cur.get(0)) || cur.get(0) == null);
try {
cur.get(1);
- Assert.fail("should throw index out of bound excepiotn");
+ Assert.fail("should throw index out of bound exception");
} catch (Exception e) {
e.printStackTrace();
}
}
if (i == 3) {
- Assert.assertEquals(null, cur.get(0));
+ Assert.assertTrue("world1".equals(cur.get(0)) || cur.get(0) == null);
try {
cur.get(1);
- Assert.fail("should throw index out of bound excepiotn");
+ Assert.fail("should throw index out of bound exception");
} catch (Exception e) {
e.printStackTrace();
}
@@ -490,10 +490,10 @@
if (i == 4) {
- Assert.assertEquals(null, cur.get(0));
+ Assert.assertTrue("world2".equals(cur.get(0)) || cur.get(0) == null);
try {
cur.get(1);
- Assert.fail("should throw index out of bound excepiotn");
+ Assert.fail("should throw index out of bound exception");
} catch (Exception e) {
e.printStackTrace();
}
Added: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/KVGenerator.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/KVGenerator.java?rev=882929&view=auto
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/KVGenerator.java (added)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/KVGenerator.java Sat Nov 21 15:36:12 2009
@@ -0,0 +1,105 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.tfile;
+
+import java.util.Random;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.io.WritableComparator;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.DiscreteRNG;
+
+/**
+ * Generate random <key, value> pairs.
+ */
+class KVGenerator {
+ private final Random random;
+ private final byte[][] dict;
+ private final boolean sorted;
+ private final DiscreteRNG keyLenRNG, valLenRNG;
+ private BytesWritable lastKey;
+ private static final int MIN_KEY_LEN = 4;
+ private final byte prefix[] = new byte[MIN_KEY_LEN];
+
+ public KVGenerator(Random random, boolean sorted, DiscreteRNG keyLenRNG,
+ DiscreteRNG valLenRNG, DiscreteRNG wordLenRNG, int dictSize) {
+ this.random = random;
+ dict = new byte[dictSize][];
+ this.sorted = sorted;
+ this.keyLenRNG = keyLenRNG;
+ this.valLenRNG = valLenRNG;
+ for (int i = 0; i < dictSize; ++i) {
+ int wordLen = wordLenRNG.nextInt();
+ dict[i] = new byte[wordLen];
+ random.nextBytes(dict[i]);
+ }
+ lastKey = new BytesWritable();
+ fillKey(lastKey);
+ }
+
+ private void fillKey(BytesWritable o) {
+ int len = keyLenRNG.nextInt();
+ if (len < MIN_KEY_LEN) len = MIN_KEY_LEN;
+ o.setSize(len);
+ int n = MIN_KEY_LEN;
+ while (n < len) {
+ byte[] word = dict[random.nextInt(dict.length)];
+ int l = Math.min(word.length, len - n);
+ System.arraycopy(word, 0, o.get(), n, l);
+ n += l;
+ }
+ if (sorted
+ && WritableComparator.compareBytes(lastKey.get(), MIN_KEY_LEN, lastKey
+ .getSize()
+ - MIN_KEY_LEN, o.get(), MIN_KEY_LEN, o.getSize() - MIN_KEY_LEN) > 0) {
+ incrementPrefix();
+ }
+
+ System.arraycopy(prefix, 0, o.get(), 0, MIN_KEY_LEN);
+ lastKey.set(o);
+ }
+
+ private void fillValue(BytesWritable o) {
+ int len = valLenRNG.nextInt();
+ o.setSize(len);
+ int n = 0;
+ while (n < len) {
+ byte[] word = dict[random.nextInt(dict.length)];
+ int l = Math.min(word.length, len - n);
+ System.arraycopy(word, 0, o.get(), n, l);
+ n += l;
+ }
+ }
+
+ private void incrementPrefix() {
+ for (int i = MIN_KEY_LEN - 1; i >= 0; --i) {
+ ++prefix[i];
+ if (prefix[i] != 0) return;
+ }
+
+ throw new RuntimeException("Prefix overflown");
+ }
+
+ public void next(BytesWritable key, BytesWritable value, boolean dupKey) {
+ if (dupKey) {
+ key.set(lastKey);
+ }
+ else {
+ fillKey(key);
+ }
+ fillValue(value);
+ }
+}
Added: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/KeySampler.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/KeySampler.java?rev=882929&view=auto
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/KeySampler.java (added)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/KeySampler.java Sat Nov 21 15:36:12 2009
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.tfile;
+
+import java.io.IOException;
+import java.util.Random;
+
+import org.apache.hadoop.io.BytesWritable;
+import org.apache.hadoop.zebra.tfile.RandomDistribution.DiscreteRNG;
+
+class KeySampler {
+ Random random;
+ int min, max;
+ DiscreteRNG keyLenRNG;
+ private static final int MIN_KEY_LEN = 4;
+
+ public KeySampler(Random random, RawComparable first, RawComparable last,
+ DiscreteRNG keyLenRNG) throws IOException {
+ this.random = random;
+ min = keyPrefixToInt(first);
+ max = keyPrefixToInt(last);
+ this.keyLenRNG = keyLenRNG;
+ }
+
+ private int keyPrefixToInt(RawComparable key) throws IOException {
+ byte[] b = key.buffer();
+ int o = key.offset();
+ return (b[o] & 0xff) << 24 | (b[o + 1] & 0xff) << 16
+ | (b[o + 2] & 0xff) << 8 | (b[o + 3] & 0xff);
+ }
+
+ public void next(BytesWritable key) {
+ key.setSize(Math.max(MIN_KEY_LEN, keyLenRNG.nextInt()));
+ random.nextBytes(key.get());
+ int n = random.nextInt(max - min) + min;
+ byte[] b = key.get();
+ b[0] = (byte) (n >> 24);
+ b[1] = (byte) (n >> 16);
+ b[2] = (byte) (n >> 8);
+ b[3] = (byte) n;
+ }
+}
Added: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/NanoTimer.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/NanoTimer.java?rev=882929&view=auto
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/NanoTimer.java (added)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/NanoTimer.java Sat Nov 21 15:36:12 2009
@@ -0,0 +1,193 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.tfile;
+
+/**
+ * A nano-second timer.
+ */
+public class NanoTimer {
+ private long last = -1;
+ private boolean started = false;
+ private long cumulate = 0;
+
+ /**
+ * Constructor
+ *
+ * @param start
+ * Start the timer upon construction.
+ */
+ public NanoTimer(boolean start) {
+ if (start) this.start();
+ }
+
+ /**
+ * Start the timer.
+ *
+ * Note: No effect if timer is already started.
+ */
+ public void start() {
+ if (!this.started) {
+ this.last = System.nanoTime();
+ this.started = true;
+ }
+ }
+
+ /**
+ * Stop the timer.
+ *
+ * Note: No effect if timer is already stopped.
+ */
+ public void stop() {
+ if (this.started) {
+ this.started = false;
+ this.cumulate += System.nanoTime() - this.last;
+ }
+ }
+
+ /**
+ * Read the timer.
+ *
+ * @return the elapsed time in nano-seconds. Note: If the timer is never
+ * started before, -1 is returned.
+ */
+ public long read() {
+ if (!readable()) return -1;
+
+ return this.cumulate;
+ }
+
+ /**
+ * Reset the timer.
+ */
+ public void reset() {
+ this.last = -1;
+ this.started = false;
+ this.cumulate = 0;
+ }
+
+ /**
+ * Checking whether the timer is started
+ *
+ * @return true if timer is started.
+ */
+ public boolean isStarted() {
+ return this.started;
+ }
+
+ /**
+ * Format the elapsed time to a human understandable string.
+ *
+ * Note: If timer is never started, "ERR" will be returned.
+ */
+ public String toString() {
+ if (!readable()) {
+ return "ERR";
+ }
+
+ return NanoTimer.nanoTimeToString(this.cumulate);
+ }
+
+ /**
+ * A utility method to format a time duration in nano seconds into a human
+ * understandable stirng.
+ *
+ * @param t
+ * Time duration in nano seconds.
+ * @return String representation.
+ */
+ public static String nanoTimeToString(long t) {
+ if (t < 0) return "ERR";
+
+ if (t == 0) return "0";
+
+ if (t < 1000) {
+ return t + "ns";
+ }
+
+ double us = (double) t / 1000;
+ if (us < 1000) {
+ return String.format("%.2fus", us);
+ }
+
+ double ms = us / 1000;
+ if (ms < 1000) {
+ return String.format("%.2fms", ms);
+ }
+
+ double ss = ms / 1000;
+ if (ss < 1000) {
+ return String.format("%.2fs", ss);
+ }
+
+ long mm = (long) ss / 60;
+ ss -= mm * 60;
+ long hh = mm / 60;
+ mm -= hh * 60;
+ long dd = hh / 24;
+ hh -= dd * 24;
+
+ if (dd > 0) {
+ return String.format("%dd%dh", dd, hh);
+ }
+
+ if (hh > 0) {
+ return String.format("%dh%dm", hh, mm);
+ }
+
+ if (mm > 0) {
+ return String.format("%dm%.1fs", mm, ss);
+ }
+
+ return String.format("%.2fs", ss);
+
+ /**
+ * StringBuilder sb = new StringBuilder(); String sep = "";
+ *
+ * if (dd > 0) { String unit = (dd > 1) ? "days" : "day";
+ * sb.append(String.format("%s%d%s", sep, dd, unit)); sep = " "; }
+ *
+ * if (hh > 0) { String unit = (hh > 1) ? "hrs" : "hr";
+ * sb.append(String.format("%s%d%s", sep, hh, unit)); sep = " "; }
+ *
+ * if (mm > 0) { String unit = (mm > 1) ? "mins" : "min";
+ * sb.append(String.format("%s%d%s", sep, mm, unit)); sep = " "; }
+ *
+ * if (ss > 0) { String unit = (ss > 1) ? "secs" : "sec";
+ * sb.append(String.format("%s%.3f%s", sep, ss, unit)); sep = " "; }
+ *
+ * return sb.toString();
+ */
+ }
+
+ private boolean readable() {
+ return this.last != -1;
+ }
+
+ /**
+ * Simple tester.
+ *
+ * @param args
+ */
+ public static void main(String[] args) {
+ long i = 7;
+
+ for (int x = 0; x < 20; ++x, i *= 7) {
+ System.out.println(NanoTimer.nanoTimeToString(i));
+ }
+ }
+}
+
Added: hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/RandomDistribution.java
URL: http://svn.apache.org/viewvc/hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/RandomDistribution.java?rev=882929&view=auto
==============================================================================
--- hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/RandomDistribution.java (added)
+++ hadoop/pig/branches/branch-0.6/contrib/zebra/src/test/org/apache/hadoop/zebra/tfile/RandomDistribution.java Sat Nov 21 15:36:12 2009
@@ -0,0 +1,266 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with this
+ * work for additional information regarding copyright ownership. The ASF
+ * licenses this file to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
+ * WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the
+ * License for the specific language governing permissions and limitations under
+ * the License.
+ */
+package org.apache.hadoop.zebra.tfile;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.Random;
+
+/**
+ * A class that generates random numbers that follow some distribution.
+ */
+public class RandomDistribution {
+ /**
+ * Interface for discrete (integer) random distributions.
+ */
+ public static interface DiscreteRNG {
+ /**
+ * Get the next random number
+ *
+ * @return the next random number.
+ */
+ public int nextInt();
+ }
+
+ /**
+ * P(i)=1/(max-min)
+ */
+ public static final class Flat implements DiscreteRNG {
+ private final Random random;
+ private final int min;
+ private final int max;
+
+ /**
+ * Generate random integers from min (inclusive) to max (exclusive)
+ * following even distribution.
+ *
+ * @param random
+ * The basic random number generator.
+ * @param min
+ * Minimum integer
+ * @param max
+ * maximum integer (exclusive).
+ *
+ */
+ public Flat(Random random, int min, int max) {
+ if (min >= max) {
+ throw new IllegalArgumentException("Invalid range");
+ }
+ this.random = random;
+ this.min = min;
+ this.max = max;
+ }
+
+ /**
+ * @see DiscreteRNG#nextInt()
+ */
+ @Override
+ public int nextInt() {
+ return random.nextInt(max - min) + min;
+ }
+ }
+
+ /**
+ * Zipf distribution. The ratio of the probabilities of integer i and j is
+ * defined as follows:
+ *
+ * P(i)/P(j)=((j-min+1)/(i-min+1))^sigma.
+ */
+ public static final class Zipf implements DiscreteRNG {
+ private static final double DEFAULT_EPSILON = 0.001;
+ private final Random random;
+ private final ArrayList<Integer> k;
+ private final ArrayList<Double> v;
+
+ /**
+ * Constructor
+ *
+ * @param r
+ * The random number generator.
+ * @param min
+ * minimum integer (inclusvie)
+ * @param max
+ * maximum integer (exclusive)
+ * @param sigma
+ * parameter sigma. (sigma > 1.0)
+ */
+ public Zipf(Random r, int min, int max, double sigma) {
+ this(r, min, max, sigma, DEFAULT_EPSILON);
+ }
+
+ /**
+ * Constructor.
+ *
+ * @param r
+ * The random number generator.
+ * @param min
+ * minimum integer (inclusvie)
+ * @param max
+ * maximum integer (exclusive)
+ * @param sigma
+ * parameter sigma. (sigma > 1.0)
+ * @param epsilon
+ * Allowable error percentage (0 < epsilon < 1.0).
+ */
+ public Zipf(Random r, int min, int max, double sigma, double epsilon) {
+ if ((max <= min) || (sigma <= 1) || (epsilon <= 0)
+ || (epsilon >= 0.5)) {
+ throw new IllegalArgumentException("Invalid arguments");
+ }
+ random = r;
+ k = new ArrayList<Integer>();
+ v = new ArrayList<Double>();
+
+ double sum = 0;
+ int last = -1;
+ for (int i = min; i < max; ++i) {
+ sum += Math.exp(-sigma * Math.log(i - min + 1));
+ if ((last == -1) || i * (1 - epsilon) > last) {
+ k.add(i);
+ v.add(sum);
+ last = i;
+ }
+ }
+
+ if (last != max - 1) {
+ k.add(max - 1);
+ v.add(sum);
+ }
+
+ v.set(v.size() - 1, 1.0);
+
+ for (int i = v.size() - 2; i >= 0; --i) {
+ v.set(i, v.get(i) / sum);
+ }
+ }
+
+ /**
+ * @see DiscreteRNG#nextInt()
+ */
+ @Override
+ public int nextInt() {
+ double d = random.nextDouble();
+ int idx = Collections.binarySearch(v, d);
+
+ if (idx > 0) {
+ ++idx;
+ }
+ else {
+ idx = -(idx + 1);
+ }
+
+ if (idx >= v.size()) {
+ idx = v.size() - 1;
+ }
+
+ if (idx == 0) {
+ return k.get(0);
+ }
+
+ int ceiling = k.get(idx);
+ int lower = k.get(idx - 1);
+
+ return ceiling - random.nextInt(ceiling - lower);
+ }
+ }
+
+ /**
+ * Binomial distribution.
+ *
+ * P(k)=select(n, k)*p^k*(1-p)^(n-k) (k = 0, 1, ..., n)
+ *
+ * P(k)=select(max-min-1, k-min)*p^(k-min)*(1-p)^(k-min)*(1-p)^(max-k-1)
+ */
+ public static final class Binomial implements DiscreteRNG {
+ private final Random random;
+ private final int min;
+ private final int n;
+ private final double[] v;
+
+ private static double select(int n, int k) {
+ double ret = 1.0;
+ for (int i = k + 1; i <= n; ++i) {
+ ret *= (double) i / (i - k);
+ }
+ return ret;
+ }
+
+ private static double power(double p, int k) {
+ return Math.exp(k * Math.log(p));
+ }
+
+ /**
+ * Generate random integers from min (inclusive) to max (exclusive)
+ * following Binomial distribution.
+ *
+ * @param random
+ * The basic random number generator.
+ * @param min
+ * Minimum integer
+ * @param max
+ * maximum integer (exclusive).
+ * @param p
+ * parameter.
+ *
+ */
+ public Binomial(Random random, int min, int max, double p) {
+ if (min >= max) {
+ throw new IllegalArgumentException("Invalid range");
+ }
+ this.random = random;
+ this.min = min;
+ this.n = max - min - 1;
+ if (n > 0) {
+ v = new double[n + 1];
+ double sum = 0.0;
+ for (int i = 0; i <= n; ++i) {
+ sum += select(n, i) * power(p, i) * power(1 - p, n - i);
+ v[i] = sum;
+ }
+ for (int i = 0; i <= n; ++i) {
+ v[i] /= sum;
+ }
+ }
+ else {
+ v = null;
+ }
+ }
+
+ /**
+ * @see DiscreteRNG#nextInt()
+ */
+ @Override
+ public int nextInt() {
+ if (v == null) {
+ return min;
+ }
+ double d = random.nextDouble();
+ int idx = Arrays.binarySearch(v, d);
+ if (idx > 0) {
+ ++idx;
+ } else {
+ idx = -(idx + 1);
+ }
+
+ if (idx >= v.length) {
+ idx = v.length - 1;
+ }
+ return idx + min;
+ }
+ }
+}