You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by wh...@apache.org on 2016/01/05 20:52:44 UTC

[44/50] [abbrv] hadoop git commit: [partial-ns] Import snappy in hdfsdb.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/Makefile.am
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/Makefile.am b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/Makefile.am
new file mode 100644
index 0000000..735bc12
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/Makefile.am
@@ -0,0 +1,23 @@
+ACLOCAL_AMFLAGS = -I m4
+
+# Library.
+lib_LTLIBRARIES = libsnappy.la
+libsnappy_la_SOURCES = snappy.cc snappy-sinksource.cc snappy-stubs-internal.cc snappy-c.cc
+libsnappy_la_LDFLAGS = -version-info $(SNAPPY_LTVERSION)
+
+include_HEADERS = snappy.h snappy-sinksource.h snappy-stubs-public.h snappy-c.h
+noinst_HEADERS = snappy-internal.h snappy-stubs-internal.h snappy-test.h
+
+# Unit tests and benchmarks.
+snappy_unittest_CPPFLAGS = $(gflags_CFLAGS) $(GTEST_CPPFLAGS)
+snappy_unittest_SOURCES = snappy_unittest.cc snappy-test.cc
+snappy_unittest_LDFLAGS = $(GTEST_LDFLAGS)
+snappy_unittest_LDADD = libsnappy.la $(UNITTEST_LIBS) $(gflags_LIBS) $(GTEST_LIBS)
+TESTS = snappy_unittest
+noinst_PROGRAMS = $(TESTS)
+
+EXTRA_DIST = autogen.sh testdata/alice29.txt testdata/asyoulik.txt testdata/baddata1.snappy testdata/baddata2.snappy testdata/baddata3.snappy testdata/geo.protodata testdata/fireworks.jpeg testdata/html testdata/html_x_4 testdata/kppkn.gtb testdata/lcet10.txt testdata/paper-100k.pdf testdata/plrabn12.txt testdata/urls.10K
+dist_doc_DATA = ChangeLog COPYING INSTALL NEWS README format_description.txt framing_format.txt
+
+libtool: $(LIBTOOL_DEPS)
+	$(SHELL) ./config.status --recheck

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/NEWS
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/NEWS b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/NEWS
new file mode 100644
index 0000000..27a5b17
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/NEWS
@@ -0,0 +1,128 @@
+Snappy v1.1.2, February 28th 2014:
+
+This is a maintenance release with no changes to the actual library
+source code.
+
+  * Stop distributing benchmark data files that have unclear
+    or unsuitable licensing.
+
+  * Add support for padding chunks in the framing format.
+
+
+Snappy v1.1.1, October 15th 2013:
+
+  * Add support for uncompressing to iovecs (scatter I/O).
+    The bulk of this patch was contributed by Mohit Aron.
+
+  * Speed up decompression by ~2%; much more so (~13-20%) on
+    a few benchmarks on given compilers and CPUs.
+
+  * Fix a few issues with MSVC compilation.
+
+  * Support truncated test data in the benchmark.
+
+
+Snappy v1.1.0, January 18th 2013:
+
+  * Snappy now uses 64 kB block size instead of 32 kB. On average,
+    this means it compresses about 3% denser (more so for some
+    inputs), at the same or better speeds.
+
+  * libsnappy no longer depends on iostream.
+
+  * Some small performance improvements in compression on x86
+    (0.5–1%).
+
+  * Various portability fixes for ARM-based platforms, for MSVC,
+    and for GNU/Hurd.
+
+
+Snappy v1.0.5, February 24th 2012:
+
+  * More speed improvements. Exactly how big will depend on
+    the architecture:
+
+    - 3–10% faster decompression for the base case (x86-64).
+
+    - ARMv7 and higher can now use unaligned accesses,
+      and will see about 30% faster decompression and
+      20–40% faster compression.
+
+    - 32-bit platforms (ARM and 32-bit x86) will see 2–5%
+      faster compression.
+
+    These are all cumulative (e.g., ARM gets all three speedups).
+
+  * Fixed an issue where the unit test would crash on system
+    with less than 256 MB address space available,
+    e.g. some embedded platforms.
+
+  * Added a framing format description, for use over e.g. HTTP,
+    or for a command-line compressor. We do not have any
+    implementations of this at the current point, but there seems
+    to be enough of a general interest in the topic.
+    Also make the format description slightly clearer.
+
+  * Remove some compile-time warnings in -Wall
+    (mostly signed/unsigned comparisons), for easier embedding
+    into projects that use -Wall -Werror.
+
+
+Snappy v1.0.4, September 15th 2011:
+
+  * Speeded up the decompressor somewhat; typically about 2–8%
+    for Core i7, in 64-bit mode (comparable for Opteron).
+    Somewhat more for some tests, almost no gain for others.
+  
+  * Make Snappy compile on certain platforms it didn't before
+    (Solaris with SunPro C++, HP-UX, AIX).
+
+  * Correct some minor errors in the format description.
+
+
+Snappy v1.0.3, June 2nd 2011:
+
+  * Speeded up the decompressor somewhat; about 3-6% for Core 2,
+    6-13% for Core i7, and 5-12% for Opteron (all in 64-bit mode).
+
+  * Added compressed format documentation. This text is new,
+    but an earlier version from Zeev Tarantov was used as reference.
+
+  * Only link snappy_unittest against -lz and other autodetected
+    libraries, not libsnappy.so (which doesn't need any such dependency).
+
+  * Fixed some display issues in the microbenchmarks, one of which would
+    frequently make the test crash on GNU/Hurd.
+
+
+Snappy v1.0.2, April 29th 2011:
+
+  * Relicense to a BSD-type license.
+
+  * Added C bindings, contributed by Martin Gieseking.
+
+  * More Win32 fixes, in particular for MSVC.
+
+  * Replace geo.protodata with a newer version.
+
+  * Fix timing inaccuracies in the unit test when comparing Snappy
+    to other algorithms.
+
+
+Snappy v1.0.1, March 25th 2011:
+
+This is a maintenance release, mostly containing minor fixes.
+There is no new functionality. The most important fixes include:
+
+  * The COPYING file and all licensing headers now correctly state that
+    Snappy is licensed under the Apache 2.0 license.
+
+  * snappy_unittest should now compile natively under Windows,
+    as well as on embedded systems with no mmap().
+
+  * Various autotools nits have been fixed.
+
+
+Snappy v1.0, March 17th 2011:
+
+  * Initial version.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/README
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/README b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/README
new file mode 100644
index 0000000..3bc8888
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/README
@@ -0,0 +1,135 @@
+Snappy, a fast compressor/decompressor.
+
+
+Introduction
+============
+
+Snappy is a compression/decompression library. It does not aim for maximum
+compression, or compatibility with any other compression library; instead,
+it aims for very high speeds and reasonable compression. For instance,
+compared to the fastest mode of zlib, Snappy is an order of magnitude faster
+for most inputs, but the resulting compressed files are anywhere from 20% to
+100% bigger. (For more information, see "Performance", below.)
+
+Snappy has the following properties:
+
+ * Fast: Compression speeds at 250 MB/sec and beyond, with no assembler code.
+   See "Performance" below.
+ * Stable: Over the last few years, Snappy has compressed and decompressed
+   petabytes of data in Google's production environment. The Snappy bitstream
+   format is stable and will not change between versions.
+ * Robust: The Snappy decompressor is designed not to crash in the face of
+   corrupted or malicious input.
+ * Free and open source software: Snappy is licensed under a BSD-type license.
+   For more information, see the included COPYING file.
+
+Snappy has previously been called "Zippy" in some Google presentations
+and the like.
+
+
+Performance
+===========
+ 
+Snappy is intended to be fast. On a single core of a Core i7 processor
+in 64-bit mode, it compresses at about 250 MB/sec or more and decompresses at
+about 500 MB/sec or more. (These numbers are for the slowest inputs in our
+benchmark suite; others are much faster.) In our tests, Snappy usually
+is faster than algorithms in the same class (e.g. LZO, LZF, FastLZ, QuickLZ,
+etc.) while achieving comparable compression ratios.
+
+Typical compression ratios (based on the benchmark suite) are about 1.5-1.7x
+for plain text, about 2-4x for HTML, and of course 1.0x for JPEGs, PNGs and
+other already-compressed data. Similar numbers for zlib in its fastest mode
+are 2.6-2.8x, 3-7x and 1.0x, respectively. More sophisticated algorithms are
+capable of achieving yet higher compression rates, although usually at the
+expense of speed. Of course, compression ratio will vary significantly with
+the input.
+
+Although Snappy should be fairly portable, it is primarily optimized
+for 64-bit x86-compatible processors, and may run slower in other environments.
+In particular:
+
+ - Snappy uses 64-bit operations in several places to process more data at
+   once than would otherwise be possible.
+ - Snappy assumes unaligned 32- and 64-bit loads and stores are cheap.
+   On some platforms, these must be emulated with single-byte loads 
+   and stores, which is much slower.
+ - Snappy assumes little-endian throughout, and needs to byte-swap data in
+   several places if running on a big-endian platform.
+
+Experience has shown that even heavily tuned code can be improved.
+Performance optimizations, whether for 64-bit x86 or other platforms,
+are of course most welcome; see "Contact", below.
+
+
+Usage
+=====
+
+Note that Snappy, both the implementation and the main interface,
+is written in C++. However, several third-party bindings to other languages
+are available; see the Google Code page at http://code.google.com/p/snappy/
+for more information. Also, if you want to use Snappy from C code, you can
+use the included C bindings in snappy-c.h.
+
+To use Snappy from your own C++ program, include the file "snappy.h" from
+your calling file, and link against the compiled library.
+
+There are many ways to call Snappy, but the simplest possible is
+
+  snappy::Compress(input.data(), input.size(), &output);
+
+and similarly
+
+  snappy::Uncompress(input.data(), input.size(), &output);
+
+where "input" and "output" are both instances of std::string.
+
+There are other interfaces that are more flexible in various ways, including
+support for custom (non-array) input sources. See the header file for more
+information.
+
+
+Tests and benchmarks
+====================
+
+When you compile Snappy, snappy_unittest is compiled in addition to the
+library itself. You do not need it to use the compressor from your own library,
+but it contains several useful components for Snappy development.
+
+First of all, it contains unit tests, verifying correctness on your machine in
+various scenarios. If you want to change or optimize Snappy, please run the
+tests to verify you have not broken anything. Note that if you have the
+Google Test library installed, unit test behavior (especially failures) will be
+significantly more user-friendly. You can find Google Test at
+
+  http://code.google.com/p/googletest/
+
+You probably also want the gflags library for handling of command-line flags;
+you can find it at
+
+  http://code.google.com/p/google-gflags/
+
+In addition to the unit tests, snappy contains microbenchmarks used to
+tune compression and decompression performance. These are automatically run
+before the unit tests, but you can disable them using the flag
+--run_microbenchmarks=false if you have gflags installed (otherwise you will
+need to edit the source).
+
+Finally, snappy can benchmark Snappy against a few other compression libraries
+(zlib, LZO, LZF, FastLZ and QuickLZ), if they were detected at configure time.
+To benchmark using a given file, give the compression algorithm you want to test
+Snappy against (e.g. --zlib) and then a list of one or more file names on the
+command line. The testdata/ directory contains the files used by the
+microbenchmark, which should provide a reasonably balanced starting point for
+benchmarking. (Note that baddata[1-3].snappy are not intended as benchmarks; they
+are used to verify correctness in the presence of corrupted data in the unit
+test.)
+
+
+Contact
+=======
+
+Snappy is distributed through Google Code. For the latest version, a bug tracker,
+and other information, see
+
+  http://code.google.com/p/snappy/

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/autogen.sh
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/autogen.sh b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/autogen.sh
new file mode 100755
index 0000000..9d0ebe9
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/autogen.sh
@@ -0,0 +1,7 @@
+#! /bin/sh -e
+rm -rf autom4te.cache
+aclocal -I m4
+autoheader
+libtoolize --copy
+automake --add-missing --copy
+autoconf

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/configure.ac
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/configure.ac b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/configure.ac
new file mode 100644
index 0000000..3164b09
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/configure.ac
@@ -0,0 +1,133 @@
+m4_define([snappy_major], [1])
+m4_define([snappy_minor], [1])
+m4_define([snappy_patchlevel], [2])
+
+# Libtool shared library interface versions (current:revision:age)
+# Update this value for every release!  (A:B:C will map to foo.so.(A-C).C.B)
+# http://www.gnu.org/software/libtool/manual/html_node/Updating-version-info.html
+m4_define([snappy_ltversion], [3:1:2])
+
+AC_INIT([snappy], [snappy_major.snappy_minor.snappy_patchlevel])
+AC_CONFIG_MACRO_DIR([m4])
+
+# These are flags passed to automake (though they look like gcc flags!)
+AM_INIT_AUTOMAKE([-Wall])
+
+LT_INIT
+AC_SUBST([LIBTOOL_DEPS])
+AC_PROG_CXX
+AC_LANG([C++])
+AC_C_BIGENDIAN
+AC_TYPE_SIZE_T
+AC_TYPE_SSIZE_T
+AC_CHECK_HEADERS([stdint.h stddef.h sys/mman.h sys/resource.h windows.h byteswap.h sys/byteswap.h sys/endian.h sys/time.h])
+
+# Don't use AC_FUNC_MMAP, as it checks for mappings of already-mapped memory,
+# which we don't need (and does not exist on Windows).
+AC_CHECK_FUNC([mmap])
+
+GTEST_LIB_CHECK([], [true], [true # Ignore; we can live without it.])
+
+AC_ARG_WITH([gflags],
+  [AS_HELP_STRING(
+    [--with-gflags],
+    [use Google Flags package to enhance the unit test @<:@default=check@:>@])],
+    [],
+    [with_gflags=check])
+
+if test "x$with_gflags" != "xno"; then
+  PKG_CHECK_MODULES(
+    [gflags],
+    [libgflags],
+    [AC_DEFINE([HAVE_GFLAGS], [1], [Use the gflags package for command-line parsing.])],
+    [if test "x$with_gflags" != "xcheck"; then
+      AC_MSG_FAILURE([--with-gflags was given, but test for gflags failed])
+    fi])
+fi
+
+# See if we have __builtin_expect.
+# TODO: Use AC_CACHE.
+AC_MSG_CHECKING([if the compiler supports __builtin_expect])
+ 
+AC_TRY_COMPILE(, [
+    return __builtin_expect(1, 1) ? 1 : 0
+], [
+    snappy_have_builtin_expect=yes
+    AC_MSG_RESULT([yes])
+], [
+    snappy_have_builtin_expect=no
+    AC_MSG_RESULT([no])
+])
+if test x$snappy_have_builtin_expect = xyes ; then
+    AC_DEFINE([HAVE_BUILTIN_EXPECT], [1], [Define to 1 if the compiler supports __builtin_expect.])
+fi
+
+# See if we have working count-trailing-zeros intrinsics.
+# TODO: Use AC_CACHE.
+AC_MSG_CHECKING([if the compiler supports __builtin_ctzll])
+
+AC_TRY_COMPILE(, [
+    return (__builtin_ctzll(0x100000000LL) == 32) ? 1 : 0
+], [
+    snappy_have_builtin_ctz=yes
+    AC_MSG_RESULT([yes])
+], [
+    snappy_have_builtin_ctz=no
+    AC_MSG_RESULT([no])
+])
+if test x$snappy_have_builtin_ctz = xyes ; then
+    AC_DEFINE([HAVE_BUILTIN_CTZ], [1], [Define to 1 if the compiler supports __builtin_ctz and friends.])
+fi
+
+# Other compression libraries; the unit test can use these for comparison
+# if they are available. If they are not found, just ignore.
+UNITTEST_LIBS=""
+AC_DEFUN([CHECK_EXT_COMPRESSION_LIB], [
+  AH_CHECK_LIB([$1])
+  AC_CHECK_LIB(
+    [$1],
+    [$2],
+    [
+      AC_DEFINE_UNQUOTED(AS_TR_CPP(HAVE_LIB$1))
+      UNITTEST_LIBS="-l$1 $UNITTEST_LIBS"
+    ],
+    [true]
+  )
+])
+CHECK_EXT_COMPRESSION_LIB([z], [zlibVersion])
+CHECK_EXT_COMPRESSION_LIB([lzo2], [lzo1x_1_15_compress])
+CHECK_EXT_COMPRESSION_LIB([lzf], [lzf_compress])
+CHECK_EXT_COMPRESSION_LIB([fastlz], [fastlz_compress])
+CHECK_EXT_COMPRESSION_LIB([quicklz], [qlz_compress])
+AC_SUBST([UNITTEST_LIBS])
+
+# These are used by snappy-stubs-public.h.in.
+if test "$ac_cv_header_stdint_h" = "yes"; then
+    AC_SUBST([ac_cv_have_stdint_h], [1])
+else
+    AC_SUBST([ac_cv_have_stdint_h], [0])
+fi
+if test "$ac_cv_header_stddef_h" = "yes"; then
+    AC_SUBST([ac_cv_have_stddef_h], [1])
+else
+    AC_SUBST([ac_cv_have_stddef_h], [0])
+fi
+if test "$ac_cv_header_sys_uio_h" = "yes"; then
+    AC_SUBST([ac_cv_have_sys_uio_h], [1])
+else
+    AC_SUBST([ac_cv_have_sys_uio_h], [0])
+fi
+
+# Export the version to snappy-stubs-public.h.
+SNAPPY_MAJOR="snappy_major"
+SNAPPY_MINOR="snappy_minor"
+SNAPPY_PATCHLEVEL="snappy_patchlevel"
+
+AC_SUBST([SNAPPY_MAJOR])
+AC_SUBST([SNAPPY_MINOR])
+AC_SUBST([SNAPPY_PATCHLEVEL])
+AC_SUBST([SNAPPY_LTVERSION], snappy_ltversion)
+
+AC_CONFIG_HEADERS([config.h])
+AC_CONFIG_FILES([Makefile snappy-stubs-public.h])
+AC_OUTPUT

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/format_description.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/format_description.txt b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/format_description.txt
new file mode 100644
index 0000000..20db66c
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/format_description.txt
@@ -0,0 +1,110 @@
+Snappy compressed format description
+Last revised: 2011-10-05
+
+
+This is not a formal specification, but should suffice to explain most
+relevant parts of how the Snappy format works. It is originally based on
+text by Zeev Tarantov.
+
+Snappy is a LZ77-type compressor with a fixed, byte-oriented encoding.
+There is no entropy encoder backend nor framing layer -- the latter is
+assumed to be handled by other parts of the system.
+
+This document only describes the format, not how the Snappy compressor nor
+decompressor actually works. The correctness of the decompressor should not
+depend on implementation details of the compressor, and vice versa.
+
+
+1. Preamble
+
+The stream starts with the uncompressed length (up to a maximum of 2^32 - 1),
+stored as a little-endian varint. Varints consist of a series of bytes,
+where the lower 7 bits are data and the upper bit is set iff there are
+more bytes to be read. In other words, an uncompressed length of 64 would
+be stored as 0x40, and an uncompressed length of 2097150 (0x1FFFFE)
+would be stored as 0xFE 0xFF 0x7F.
+
+
+2. The compressed stream itself
+
+There are two types of elements in a Snappy stream: Literals and
+copies (backreferences). There is no restriction on the order of elements,
+except that the stream naturally cannot start with a copy. (Having
+two literals in a row is never optimal from a compression point of
+view, but nevertheless fully permitted.) Each element starts with a tag byte,
+and the lower two bits of this tag byte signal what type of element will
+follow:
+
+  00: Literal
+  01: Copy with 1-byte offset
+  10: Copy with 2-byte offset
+  11: Copy with 4-byte offset
+
+The interpretation of the upper six bits are element-dependent.
+
+
+2.1. Literals (00)
+
+Literals are uncompressed data stored directly in the byte stream.
+The literal length is stored differently depending on the length
+of the literal:
+
+ - For literals up to and including 60 bytes in length, the upper
+   six bits of the tag byte contain (len-1). The literal follows
+   immediately thereafter in the bytestream.
+ - For longer literals, the (len-1) value is stored after the tag byte,
+   little-endian. The upper six bits of the tag byte describe how
+   many bytes are used for the length; 60, 61, 62 or 63 for
+   1-4 bytes, respectively. The literal itself follows after the
+   length.
+
+
+2.2. Copies
+
+Copies are references back into previous decompressed data, telling
+the decompressor to reuse data it has previously decoded.
+They encode two values: The _offset_, saying how many bytes back
+from the current position to read, and the _length_, how many bytes
+to copy. Offsets of zero can be encoded, but are not legal;
+similarly, it is possible to encode backreferences that would
+go past the end of the block (offset > current decompressed position),
+which is also nonsensical and thus not allowed.
+
+As in most LZ77-based compressors, the length can be larger than the offset,
+yielding a form of run-length encoding (RLE). For instance,
+"xababab" could be encoded as
+
+  <literal: "xab"> <copy: offset=2 length=4>
+
+Note that since the current Snappy compressor works in 32 kB
+blocks and does not do matching across blocks, it will never produce
+a bitstream with offsets larger than about 32768. However, the
+decompressor should not rely on this, as it may change in the future.
+
+There are several different kinds of copy elements, depending on
+the amount of bytes to be copied (length), and how far back the
+data to be copied is (offset).
+
+
+2.2.1. Copy with 1-byte offset (01)
+
+These elements can encode lengths between [4..11] bytes and offsets
+between [0..2047] bytes. (len-4) occupies three bits and is stored
+in bits [2..4] of the tag byte. The offset occupies 11 bits, of which the
+upper three are stored in the upper three bits ([5..7]) of the tag byte,
+and the lower eight are stored in a byte following the tag byte.
+
+
+2.2.2. Copy with 2-byte offset (10)
+
+These elements can encode lengths between [1..64] and offsets from
+[0..65535]. (len-1) occupies six bits and is stored in the upper
+six bits ([2..7]) of the tag byte. The offset is stored as a
+little-endian 16-bit integer in the two bytes following the tag byte.
+
+
+2.2.3. Copy with 4-byte offset (11)
+
+These are like the copies with 2-byte offsets (see previous subsection),
+except that the offset is stored as a 32-bit integer instead of a
+16-bit integer (and thus will occupy four bytes).

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/framing_format.txt
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/framing_format.txt b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/framing_format.txt
new file mode 100644
index 0000000..9764e83
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/framing_format.txt
@@ -0,0 +1,135 @@
+Snappy framing format description
+Last revised: 2013-10-25
+
+This format decribes a framing format for Snappy, allowing compressing to
+files or streams that can then more easily be decompressed without having
+to hold the entire stream in memory. It also provides data checksums to
+help verify integrity. It does not provide metadata checksums, so it does
+not protect against e.g. all forms of truncations.
+
+Implementation of the framing format is optional for Snappy compressors and
+decompressor; it is not part of the Snappy core specification.
+
+
+1. General structure
+
+The file consists solely of chunks, lying back-to-back with no padding
+in between. Each chunk consists first a single byte of chunk identifier,
+then a three-byte little-endian length of the chunk in bytes (from 0 to
+16777215, inclusive), and then the data if any. The four bytes of chunk
+header is not counted in the data length.
+
+The different chunk types are listed below. The first chunk must always
+be the stream identifier chunk (see section 4.1, below). The stream
+ends when the file ends -- there is no explicit end-of-file marker.
+
+
+2. File type identification
+
+The following identifiers for this format are recommended where appropriate.
+However, note that none have been registered officially, so this is only to
+be taken as a guideline. We use "Snappy framed" to distinguish between this
+format and raw Snappy data.
+
+  File extension:         .sz
+  MIME type:              application/x-snappy-framed
+  HTTP Content-Encoding:  x-snappy-framed
+
+
+3. Checksum format
+
+Some chunks have data protected by a checksum (the ones that do will say so
+explicitly). The checksums are always masked CRC-32Cs.
+
+A description of CRC-32C can be found in RFC 3720, section 12.1, with
+examples in section B.4.
+
+Checksums are not stored directly, but masked, as checksumming data and
+then its own checksum can be problematic. The masking is the same as used
+in Apache Hadoop: Rotate the checksum by 15 bits, then add the constant
+0xa282ead8 (using wraparound as normal for unsigned integers). This is
+equivalent to the following C code:
+
+  uint32_t mask_checksum(uint32_t x) {
+    return ((x >> 15) | (x << 17)) + 0xa282ead8;
+  }
+
+Note that the masking is reversible.
+
+The checksum is always stored as a four bytes long integer, in little-endian.
+
+
+4. Chunk types
+
+The currently supported chunk types are described below. The list may
+be extended in the future.
+
+
+4.1. Stream identifier (chunk type 0xff)
+
+The stream identifier is always the first element in the stream.
+It is exactly six bytes long and contains "sNaPpY" in ASCII. This means that
+a valid Snappy framed stream always starts with the bytes
+
+  0xff 0x06 0x00 0x00 0x73 0x4e 0x61 0x50 0x70 0x59
+
+The stream identifier chunk can come multiple times in the stream besides
+the first; if such a chunk shows up, it should simply be ignored, assuming
+it has the right length and contents. This allows for easy concatenation of
+compressed files without the need for re-framing.
+
+
+4.2. Compressed data (chunk type 0x00)
+
+Compressed data chunks contain a normal Snappy compressed bitstream;
+see the compressed format specification. The compressed data is preceded by
+the CRC-32C (see section 3) of the _uncompressed_ data.
+
+Note that the data portion of the chunk, i.e., the compressed contents,
+can be at most 16777211 bytes (2^24 - 1, minus the checksum).
+However, we place an additional restriction that the uncompressed data
+in a chunk must be no longer than 65536 bytes. This allows consumers to
+easily use small fixed-size buffers.
+
+
+4.3. Uncompressed data (chunk type 0x01)
+
+Uncompressed data chunks allow a compressor to send uncompressed,
+raw data; this is useful if, for instance, uncompressible or
+near-incompressible data is detected, and faster decompression is desired.
+
+As in the compressed chunks, the data is preceded by its own masked
+CRC-32C (see section 3).
+
+An uncompressed data chunk, like compressed data chunks, should contain
+no more than 65536 data bytes, so the maximum legal chunk length with the
+checksum is 65540.
+
+
+4.4. Padding (chunk type 0xfe)
+
+Padding chunks allow a compressor to increase the size of the data stream
+so that it complies with external demands, e.g. that the total number of
+bytes is a multiple of some value.
+
+All bytes of the padding chunk, except the chunk byte itself and the length,
+should be zero, but decompressors must not try to interpret or verify the
+padding data in any way.
+
+
+4.5. Reserved unskippable chunks (chunk types 0x02-0x7f)
+
+These are reserved for future expansion. A decoder that sees such a chunk
+should immediately return an error, as it must assume it cannot decode the
+stream correctly.
+
+Future versions of this specification may define meanings for these chunks.
+
+
+4.6. Reserved skippable chunks (chunk types 0x80-0xfd)
+
+These are also reserved for future expansion, but unlike the chunks
+described in 4.5, a decoder seeing these must skip them and continue
+decoding.
+
+Future versions of this specification may define meanings for these chunks.

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/m4/gtest.m4
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/m4/gtest.m4 b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/m4/gtest.m4
new file mode 100644
index 0000000..98e61f9
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/m4/gtest.m4
@@ -0,0 +1,74 @@
+dnl GTEST_LIB_CHECK([minimum version [,
+dnl                  action if found [,action if not found]]])
+dnl
+dnl Check for the presence of the Google Test library, optionally at a minimum
+dnl version, and indicate a viable version with the HAVE_GTEST flag. It defines
+dnl standard variables for substitution including GTEST_CPPFLAGS,
+dnl GTEST_CXXFLAGS, GTEST_LDFLAGS, and GTEST_LIBS. It also defines
+dnl GTEST_VERSION as the version of Google Test found. Finally, it provides
+dnl optional custom action slots in the event GTEST is found or not.
+AC_DEFUN([GTEST_LIB_CHECK],
+[
+dnl Provide a flag to enable or disable Google Test usage.
+AC_ARG_ENABLE([gtest],
+  [AS_HELP_STRING([--enable-gtest],
+                  [Enable tests using the Google C++ Testing Framework.
+                  (Default is enabled.)])],
+  [],
+  [enable_gtest=])
+AC_ARG_VAR([GTEST_CONFIG],
+           [The exact path of Google Test's 'gtest-config' script.])
+AC_ARG_VAR([GTEST_CPPFLAGS],
+           [C-like preprocessor flags for Google Test.])
+AC_ARG_VAR([GTEST_CXXFLAGS],
+           [C++ compile flags for Google Test.])
+AC_ARG_VAR([GTEST_LDFLAGS],
+           [Linker path and option flags for Google Test.])
+AC_ARG_VAR([GTEST_LIBS],
+           [Library linking flags for Google Test.])
+AC_ARG_VAR([GTEST_VERSION],
+           [The version of Google Test available.])
+HAVE_GTEST="no"
+AS_IF([test "x${enable_gtest}" != "xno"],
+  [AC_MSG_CHECKING([for 'gtest-config'])
+   AS_IF([test "x${enable_gtest}" = "xyes"],
+     [AS_IF([test -x "${enable_gtest}/scripts/gtest-config"],
+        [GTEST_CONFIG="${enable_gtest}/scripts/gtest-config"],
+        [GTEST_CONFIG="${enable_gtest}/bin/gtest-config"])
+      AS_IF([test -x "${GTEST_CONFIG}"], [],
+        [AC_MSG_RESULT([no])
+         AC_MSG_ERROR([dnl
+Unable to locate either a built or installed Google Test.
+The specific location '${enable_gtest}' was provided for a built or installed
+Google Test, but no 'gtest-config' script could be found at this location.])
+         ])],
+     [AC_PATH_PROG([GTEST_CONFIG], [gtest-config])])
+   AS_IF([test -x "${GTEST_CONFIG}"],
+     [AC_MSG_RESULT([${GTEST_CONFIG}])
+      m4_ifval([$1],
+        [_gtest_min_version="--min-version=$1"
+         AC_MSG_CHECKING([for Google Test at least version >= $1])],
+        [_gtest_min_version="--min-version=0"
+         AC_MSG_CHECKING([for Google Test])])
+      AS_IF([${GTEST_CONFIG} ${_gtest_min_version}],
+        [AC_MSG_RESULT([yes])
+         HAVE_GTEST='yes'],
+        [AC_MSG_RESULT([no])])],
+     [AC_MSG_RESULT([no])])
+   AS_IF([test "x${HAVE_GTEST}" = "xyes"],
+     [GTEST_CPPFLAGS=`${GTEST_CONFIG} --cppflags`
+      GTEST_CXXFLAGS=`${GTEST_CONFIG} --cxxflags`
+      GTEST_LDFLAGS=`${GTEST_CONFIG} --ldflags`
+      GTEST_LIBS=`${GTEST_CONFIG} --libs`
+      GTEST_VERSION=`${GTEST_CONFIG} --version`
+      AC_DEFINE([HAVE_GTEST],[1],[Defined when Google Test is available.])],
+     [AS_IF([test "x${enable_gtest}" = "xyes"],
+        [AC_MSG_ERROR([dnl
+Google Test was enabled, but no viable version could be found.])
+         ])])])
+AC_SUBST([HAVE_GTEST])
+AM_CONDITIONAL([HAVE_GTEST],[test "x$HAVE_GTEST" = "xyes"])
+AS_IF([test "x$HAVE_GTEST" = "xyes"],
+  [m4_ifval([$2], [$2])],
+  [m4_ifval([$3], [$3])])
+])

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-c.cc
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-c.cc b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-c.cc
new file mode 100644
index 0000000..473a0b0
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-c.cc
@@ -0,0 +1,90 @@
+// Copyright 2011 Martin Gieseking <ma...@uos.de>.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "snappy.h"
+#include "snappy-c.h"
+
+extern "C" {
+
+snappy_status snappy_compress(const char* input,
+                              size_t input_length,
+                              char* compressed,
+                              size_t *compressed_length) {
+  if (*compressed_length < snappy_max_compressed_length(input_length)) {
+    return SNAPPY_BUFFER_TOO_SMALL;
+  }
+  snappy::RawCompress(input, input_length, compressed, compressed_length);
+  return SNAPPY_OK;
+}
+
+snappy_status snappy_uncompress(const char* compressed,
+                                size_t compressed_length,
+                                char* uncompressed,
+                                size_t* uncompressed_length) {
+  size_t real_uncompressed_length;
+  if (!snappy::GetUncompressedLength(compressed,
+                                     compressed_length,
+                                     &real_uncompressed_length)) {
+    return SNAPPY_INVALID_INPUT;
+  }
+  if (*uncompressed_length < real_uncompressed_length) {
+    return SNAPPY_BUFFER_TOO_SMALL;
+  }
+  if (!snappy::RawUncompress(compressed, compressed_length, uncompressed)) {
+    return SNAPPY_INVALID_INPUT;
+  }
+  *uncompressed_length = real_uncompressed_length;
+  return SNAPPY_OK;
+}
+
+size_t snappy_max_compressed_length(size_t source_length) {
+  return snappy::MaxCompressedLength(source_length);
+}
+
+snappy_status snappy_uncompressed_length(const char *compressed,
+                                         size_t compressed_length,
+                                         size_t *result) {
+  if (snappy::GetUncompressedLength(compressed,
+                                    compressed_length,
+                                    result)) {
+    return SNAPPY_OK;
+  } else {
+    return SNAPPY_INVALID_INPUT;
+  }
+}
+
+snappy_status snappy_validate_compressed_buffer(const char *compressed,
+                                                size_t compressed_length) {
+  if (snappy::IsValidCompressedBuffer(compressed, compressed_length)) {
+    return SNAPPY_OK;
+  } else {
+    return SNAPPY_INVALID_INPUT;
+  }
+}
+
+}  // extern "C"

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-c.h
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-c.h b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-c.h
new file mode 100644
index 0000000..c6c2a86
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-c.h
@@ -0,0 +1,138 @@
+/*
+ * Copyright 2011 Martin Gieseking <ma...@uos.de>.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met:
+ *
+ *     * Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ *     * Redistributions in binary form must reproduce the above
+ * copyright notice, this list of conditions and the following disclaimer
+ * in the documentation and/or other materials provided with the
+ * distribution.
+ *     * Neither the name of Google Inc. nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Plain C interface (a wrapper around the C++ implementation).
+ */
+
+#ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_
+#define UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+#include <stddef.h>
+
+/*
+ * Return values; see the documentation for each function to know
+ * what each can return.
+ */
+typedef enum {
+  SNAPPY_OK = 0,
+  SNAPPY_INVALID_INPUT = 1,
+  SNAPPY_BUFFER_TOO_SMALL = 2
+} snappy_status;
+
+/*
+ * Takes the data stored in "input[0..input_length-1]" and stores
+ * it in the array pointed to by "compressed".
+ *
+ * <compressed_length> signals the space available in "compressed".
+ * If it is not at least equal to "snappy_max_compressed_length(input_length)",
+ * SNAPPY_BUFFER_TOO_SMALL is returned. After successful compression,
+ * <compressed_length> contains the true length of the compressed output,
+ * and SNAPPY_OK is returned.
+ *
+ * Example:
+ *   size_t output_length = snappy_max_compressed_length(input_length);
+ *   char* output = (char*)malloc(output_length);
+ *   if (snappy_compress(input, input_length, output, &output_length)
+ *       == SNAPPY_OK) {
+ *     ... Process(output, output_length) ...
+ *   }
+ *   free(output);
+ */
+snappy_status snappy_compress(const char* input,
+                              size_t input_length,
+                              char* compressed,
+                              size_t* compressed_length);
+
+/*
+ * Given data in "compressed[0..compressed_length-1]" generated by
+ * calling the snappy_compress routine, this routine stores
+ * the uncompressed data to
+ *   uncompressed[0..uncompressed_length-1].
+ * Returns failure (a value not equal to SNAPPY_OK) if the message
+ * is corrupted and could not be decrypted.
+ *
+ * <uncompressed_length> signals the space available in "uncompressed".
+ * If it is not at least equal to the value returned by
+ * snappy_uncompressed_length for this stream, SNAPPY_BUFFER_TOO_SMALL
+ * is returned. After successful decompression, <uncompressed_length>
+ * contains the true length of the decompressed output.
+ *
+ * Example:
+ *   size_t output_length;
+ *   if (snappy_uncompressed_length(input, input_length, &output_length)
+ *       != SNAPPY_OK) {
+ *     ... fail ...
+ *   }
+ *   char* output = (char*)malloc(output_length);
+ *   if (snappy_uncompress(input, input_length, output, &output_length)
+ *       == SNAPPY_OK) {
+ *     ... Process(output, output_length) ...
+ *   }
+ *   free(output);
+ */
+snappy_status snappy_uncompress(const char* compressed,
+                                size_t compressed_length,
+                                char* uncompressed,
+                                size_t* uncompressed_length);
+
+/*
+ * Returns the maximal size of the compressed representation of
+ * input data that is "source_length" bytes in length.
+ */
+size_t snappy_max_compressed_length(size_t source_length);
+
+/*
+ * REQUIRES: "compressed[]" was produced by snappy_compress()
+ * Returns SNAPPY_OK and stores the length of the uncompressed data in
+ * *result normally. Returns SNAPPY_INVALID_INPUT on parsing error.
+ * This operation takes O(1) time.
+ */
+snappy_status snappy_uncompressed_length(const char* compressed,
+                                         size_t compressed_length,
+                                         size_t* result);
+
+/*
+ * Check if the contents of "compressed[]" can be uncompressed successfully.
+ * Does not return the uncompressed data; if so, returns SNAPPY_OK,
+ * or if not, returns SNAPPY_INVALID_INPUT.
+ * Takes time proportional to compressed_length, but is usually at least a
+ * factor of four faster than actual decompression.
+ */
+snappy_status snappy_validate_compressed_buffer(const char* compressed,
+                                                size_t compressed_length);
+
+#ifdef __cplusplus
+}  // extern "C"
+#endif
+
+#endif  /* UTIL_SNAPPY_OPENSOURCE_SNAPPY_C_H_ */

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-internal.h
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-internal.h b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-internal.h
new file mode 100644
index 0000000..c99d331
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-internal.h
@@ -0,0 +1,150 @@
+// Copyright 2008 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Internals shared between the Snappy implementation and its unittest.
+
+#ifndef UTIL_SNAPPY_SNAPPY_INTERNAL_H_
+#define UTIL_SNAPPY_SNAPPY_INTERNAL_H_
+
+#include "snappy-stubs-internal.h"
+
+namespace snappy {
+namespace internal {
+
+class WorkingMemory {
+ public:
+  WorkingMemory() : large_table_(NULL) { }
+  ~WorkingMemory() { delete[] large_table_; }
+
+  // Allocates and clears a hash table using memory in "*this",
+  // stores the number of buckets in "*table_size" and returns a pointer to
+  // the base of the hash table.
+  uint16* GetHashTable(size_t input_size, int* table_size);
+
+ private:
+  uint16 small_table_[1<<10];    // 2KB
+  uint16* large_table_;          // Allocated only when needed
+
+  DISALLOW_COPY_AND_ASSIGN(WorkingMemory);
+};
+
+// Flat array compression that does not emit the "uncompressed length"
+// prefix. Compresses "input" string to the "*op" buffer.
+//
+// REQUIRES: "input_length <= kBlockSize"
+// REQUIRES: "op" points to an array of memory that is at least
+// "MaxCompressedLength(input_length)" in size.
+// REQUIRES: All elements in "table[0..table_size-1]" are initialized to zero.
+// REQUIRES: "table_size" is a power of two
+//
+// Returns an "end" pointer into "op" buffer.
+// "end - op" is the compressed size of "input".
+char* CompressFragment(const char* input,
+                       size_t input_length,
+                       char* op,
+                       uint16* table,
+                       const int table_size);
+
+// Return the largest n such that
+//
+//   s1[0,n-1] == s2[0,n-1]
+//   and n <= (s2_limit - s2).
+//
+// Does not read *s2_limit or beyond.
+// Does not read *(s1 + (s2_limit - s2)) or beyond.
+// Requires that s2_limit >= s2.
+//
+// Separate implementation for x86_64, for speed.  Uses the fact that
+// x86_64 is little endian.
+#if defined(ARCH_K8)
+static inline int FindMatchLength(const char* s1,
+                                  const char* s2,
+                                  const char* s2_limit) {
+  assert(s2_limit >= s2);
+  int matched = 0;
+
+  // Find out how long the match is. We loop over the data 64 bits at a
+  // time until we find a 64-bit block that doesn't match; then we find
+  // the first non-matching bit and use that to calculate the total
+  // length of the match.
+  while (PREDICT_TRUE(s2 <= s2_limit - 8)) {
+    if (PREDICT_FALSE(UNALIGNED_LOAD64(s2) == UNALIGNED_LOAD64(s1 + matched))) {
+      s2 += 8;
+      matched += 8;
+    } else {
+      // On current (mid-2008) Opteron models there is a 3% more
+      // efficient code sequence to find the first non-matching byte.
+      // However, what follows is ~10% better on Intel Core 2 and newer,
+      // and we expect AMD's bsf instruction to improve.
+      uint64 x = UNALIGNED_LOAD64(s2) ^ UNALIGNED_LOAD64(s1 + matched);
+      int matching_bits = Bits::FindLSBSetNonZero64(x);
+      matched += matching_bits >> 3;
+      return matched;
+    }
+  }
+  while (PREDICT_TRUE(s2 < s2_limit)) {
+    if (PREDICT_TRUE(s1[matched] == *s2)) {
+      ++s2;
+      ++matched;
+    } else {
+      return matched;
+    }
+  }
+  return matched;
+}
+#else
+static inline int FindMatchLength(const char* s1,
+                                  const char* s2,
+                                  const char* s2_limit) {
+  // Implementation based on the x86-64 version, above.
+  assert(s2_limit >= s2);
+  int matched = 0;
+
+  while (s2 <= s2_limit - 4 &&
+         UNALIGNED_LOAD32(s2) == UNALIGNED_LOAD32(s1 + matched)) {
+    s2 += 4;
+    matched += 4;
+  }
+  if (LittleEndian::IsLittleEndian() && s2 <= s2_limit - 4) {
+    uint32 x = UNALIGNED_LOAD32(s2) ^ UNALIGNED_LOAD32(s1 + matched);
+    int matching_bits = Bits::FindLSBSetNonZero(x);
+    matched += matching_bits >> 3;
+  } else {
+    while ((s2 < s2_limit) && (s1[matched] == *s2)) {
+      ++s2;
+      ++matched;
+    }
+  }
+  return matched;
+}
+#endif
+
+}  // end namespace internal
+}  // end namespace snappy
+
+#endif  // UTIL_SNAPPY_SNAPPY_INTERNAL_H_

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-sinksource.cc
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-sinksource.cc b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-sinksource.cc
new file mode 100644
index 0000000..5844552
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-sinksource.cc
@@ -0,0 +1,71 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <string.h>
+
+#include "snappy-sinksource.h"
+
+namespace snappy {
+
+Source::~Source() { }
+
+Sink::~Sink() { }
+
+char* Sink::GetAppendBuffer(size_t length, char* scratch) {
+  return scratch;
+}
+
+ByteArraySource::~ByteArraySource() { }
+
+size_t ByteArraySource::Available() const { return left_; }
+
+const char* ByteArraySource::Peek(size_t* len) {
+  *len = left_;
+  return ptr_;
+}
+
+void ByteArraySource::Skip(size_t n) {
+  left_ -= n;
+  ptr_ += n;
+}
+
+UncheckedByteArraySink::~UncheckedByteArraySink() { }
+
+void UncheckedByteArraySink::Append(const char* data, size_t n) {
+  // Do no copying if the caller filled in the result of GetAppendBuffer()
+  if (data != dest_) {
+    memcpy(dest_, data, n);
+  }
+  dest_ += n;
+}
+
+char* UncheckedByteArraySink::GetAppendBuffer(size_t len, char* scratch) {
+  return dest_;
+}
+
+}

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-sinksource.h
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-sinksource.h b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-sinksource.h
new file mode 100644
index 0000000..faabfa1
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-sinksource.h
@@ -0,0 +1,137 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_
+#define UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_
+
+#include <stddef.h>
+
+
+namespace snappy {
+
+// A Sink is an interface that consumes a sequence of bytes.
+class Sink {
+ public:
+  Sink() { }
+  virtual ~Sink();
+
+  // Append "bytes[0,n-1]" to this.
+  virtual void Append(const char* bytes, size_t n) = 0;
+
+  // Returns a writable buffer of the specified length for appending.
+  // May return a pointer to the caller-owned scratch buffer which
+  // must have at least the indicated length.  The returned buffer is
+  // only valid until the next operation on this Sink.
+  //
+  // After writing at most "length" bytes, call Append() with the
+  // pointer returned from this function and the number of bytes
+  // written.  Many Append() implementations will avoid copying
+  // bytes if this function returned an internal buffer.
+  //
+  // If a non-scratch buffer is returned, the caller may only pass a
+  // prefix of it to Append().  That is, it is not correct to pass an
+  // interior pointer of the returned array to Append().
+  //
+  // The default implementation always returns the scratch buffer.
+  virtual char* GetAppendBuffer(size_t length, char* scratch);
+
+
+ private:
+  // No copying
+  Sink(const Sink&);
+  void operator=(const Sink&);
+};
+
+// A Source is an interface that yields a sequence of bytes
+class Source {
+ public:
+  Source() { }
+  virtual ~Source();
+
+  // Return the number of bytes left to read from the source
+  virtual size_t Available() const = 0;
+
+  // Peek at the next flat region of the source.  Does not reposition
+  // the source.  The returned region is empty iff Available()==0.
+  //
+  // Returns a pointer to the beginning of the region and store its
+  // length in *len.
+  //
+  // The returned region is valid until the next call to Skip() or
+  // until this object is destroyed, whichever occurs first.
+  //
+  // The returned region may be larger than Available() (for example
+  // if this ByteSource is a view on a substring of a larger source).
+  // The caller is responsible for ensuring that it only reads the
+  // Available() bytes.
+  virtual const char* Peek(size_t* len) = 0;
+
+  // Skip the next n bytes.  Invalidates any buffer returned by
+  // a previous call to Peek().
+  // REQUIRES: Available() >= n
+  virtual void Skip(size_t n) = 0;
+
+ private:
+  // No copying
+  Source(const Source&);
+  void operator=(const Source&);
+};
+
+// A Source implementation that yields the contents of a flat array
+class ByteArraySource : public Source {
+ public:
+  ByteArraySource(const char* p, size_t n) : ptr_(p), left_(n) { }
+  virtual ~ByteArraySource();
+  virtual size_t Available() const;
+  virtual const char* Peek(size_t* len);
+  virtual void Skip(size_t n);
+ private:
+  const char* ptr_;
+  size_t left_;
+};
+
+// A Sink implementation that writes to a flat array without any bound checks.
+class UncheckedByteArraySink : public Sink {
+ public:
+  explicit UncheckedByteArraySink(char* dest) : dest_(dest) { }
+  virtual ~UncheckedByteArraySink();
+  virtual void Append(const char* data, size_t n);
+  virtual char* GetAppendBuffer(size_t len, char* scratch);
+
+  // Return the current output pointer so that a caller can see how
+  // many bytes were produced.
+  // Note: this is not a Sink method.
+  char* CurrentDestination() const { return dest_; }
+ private:
+  char* dest_;
+};
+
+
+}
+
+#endif  // UTIL_SNAPPY_SNAPPY_SINKSOURCE_H_

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-internal.cc
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-internal.cc b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-internal.cc
new file mode 100644
index 0000000..6ed3343
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-internal.cc
@@ -0,0 +1,42 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include <algorithm>
+#include <string>
+
+#include "snappy-stubs-internal.h"
+
+namespace snappy {
+
+void Varint::Append32(string* s, uint32 value) {
+  char buf[Varint::kMax32];
+  const char* p = Varint::Encode32(buf, value);
+  s->append(buf, p - buf);
+}
+
+}  // namespace snappy

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-internal.h
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-internal.h b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-internal.h
new file mode 100644
index 0000000..12393b6
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-internal.h
@@ -0,0 +1,491 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Various stubs for the open-source version of Snappy.
+
+#ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
+#define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <string>
+
+#include <assert.h>
+#include <stdlib.h>
+#include <string.h>
+
+#ifdef HAVE_SYS_MMAN_H
+#include <sys/mman.h>
+#endif
+
+#include "snappy-stubs-public.h"
+
+#if defined(__x86_64__)
+
+// Enable 64-bit optimized versions of some routines.
+#define ARCH_K8 1
+
+#endif
+
+// Needed by OS X, among others.
+#ifndef MAP_ANONYMOUS
+#define MAP_ANONYMOUS MAP_ANON
+#endif
+
+// Pull in std::min, std::ostream, and the likes. This is safe because this
+// header file is never used from any public header files.
+using namespace std;
+
+// The size of an array, if known at compile-time.
+// Will give unexpected results if used on a pointer.
+// We undefine it first, since some compilers already have a definition.
+#ifdef ARRAYSIZE
+#undef ARRAYSIZE
+#endif
+#define ARRAYSIZE(a) (sizeof(a) / sizeof(*(a)))
+
+// Static prediction hints.
+#ifdef HAVE_BUILTIN_EXPECT
+#define PREDICT_FALSE(x) (__builtin_expect(x, 0))
+#define PREDICT_TRUE(x) (__builtin_expect(!!(x), 1))
+#else
+#define PREDICT_FALSE(x) x
+#define PREDICT_TRUE(x) x
+#endif
+
+// This is only used for recomputing the tag byte table used during
+// decompression; for simplicity we just remove it from the open-source
+// version (anyone who wants to regenerate it can just do the call
+// themselves within main()).
+#define DEFINE_bool(flag_name, default_value, description) \
+  bool FLAGS_ ## flag_name = default_value
+#define DECLARE_bool(flag_name) \
+  extern bool FLAGS_ ## flag_name
+
+namespace snappy {
+
+static const uint32 kuint32max = static_cast<uint32>(0xFFFFFFFF);
+static const int64 kint64max = static_cast<int64>(0x7FFFFFFFFFFFFFFFLL);
+
+// Potentially unaligned loads and stores.
+
+// x86 and PowerPC can simply do these loads and stores native.
+
+#if defined(__i386__) || defined(__x86_64__) || defined(__powerpc__)
+
+#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
+#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
+#define UNALIGNED_LOAD64(_p) (*reinterpret_cast<const uint64 *>(_p))
+
+#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
+#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
+#define UNALIGNED_STORE64(_p, _val) (*reinterpret_cast<uint64 *>(_p) = (_val))
+
+// ARMv7 and newer support native unaligned accesses, but only of 16-bit
+// and 32-bit values (not 64-bit); older versions either raise a fatal signal,
+// do an unaligned read and rotate the words around a bit, or do the reads very
+// slowly (trip through kernel mode). There's no simple #define that says just
+// “ARMv7 or higher”, so we have to filter away all ARMv5 and ARMv6
+// sub-architectures.
+//
+// This is a mess, but there's not much we can do about it.
+
+#elif defined(__arm__) && \
+      !defined(__ARM_ARCH_4__) && \
+      !defined(__ARM_ARCH_4T__) && \
+      !defined(__ARM_ARCH_5__) && \
+      !defined(__ARM_ARCH_5T__) && \
+      !defined(__ARM_ARCH_5TE__) && \
+      !defined(__ARM_ARCH_5TEJ__) && \
+      !defined(__ARM_ARCH_6__) && \
+      !defined(__ARM_ARCH_6J__) && \
+      !defined(__ARM_ARCH_6K__) && \
+      !defined(__ARM_ARCH_6Z__) && \
+      !defined(__ARM_ARCH_6ZK__) && \
+      !defined(__ARM_ARCH_6T2__)
+
+#define UNALIGNED_LOAD16(_p) (*reinterpret_cast<const uint16 *>(_p))
+#define UNALIGNED_LOAD32(_p) (*reinterpret_cast<const uint32 *>(_p))
+
+#define UNALIGNED_STORE16(_p, _val) (*reinterpret_cast<uint16 *>(_p) = (_val))
+#define UNALIGNED_STORE32(_p, _val) (*reinterpret_cast<uint32 *>(_p) = (_val))
+
+// TODO(user): NEON supports unaligned 64-bit loads and stores.
+// See if that would be more efficient on platforms supporting it,
+// at least for copies.
+
+inline uint64 UNALIGNED_LOAD64(const void *p) {
+  uint64 t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline void UNALIGNED_STORE64(void *p, uint64 v) {
+  memcpy(p, &v, sizeof v);
+}
+
+#else
+
+// These functions are provided for architectures that don't support
+// unaligned loads and stores.
+
+inline uint16 UNALIGNED_LOAD16(const void *p) {
+  uint16 t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline uint32 UNALIGNED_LOAD32(const void *p) {
+  uint32 t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline uint64 UNALIGNED_LOAD64(const void *p) {
+  uint64 t;
+  memcpy(&t, p, sizeof t);
+  return t;
+}
+
+inline void UNALIGNED_STORE16(void *p, uint16 v) {
+  memcpy(p, &v, sizeof v);
+}
+
+inline void UNALIGNED_STORE32(void *p, uint32 v) {
+  memcpy(p, &v, sizeof v);
+}
+
+inline void UNALIGNED_STORE64(void *p, uint64 v) {
+  memcpy(p, &v, sizeof v);
+}
+
+#endif
+
+// This can be more efficient than UNALIGNED_LOAD64 + UNALIGNED_STORE64
+// on some platforms, in particular ARM.
+inline void UnalignedCopy64(const void *src, void *dst) {
+  if (sizeof(void *) == 8) {
+    UNALIGNED_STORE64(dst, UNALIGNED_LOAD64(src));
+  } else {
+    const char *src_char = reinterpret_cast<const char *>(src);
+    char *dst_char = reinterpret_cast<char *>(dst);
+
+    UNALIGNED_STORE32(dst_char, UNALIGNED_LOAD32(src_char));
+    UNALIGNED_STORE32(dst_char + 4, UNALIGNED_LOAD32(src_char + 4));
+  }
+}
+
+// The following guarantees declaration of the byte swap functions.
+#ifdef WORDS_BIGENDIAN
+
+#ifdef HAVE_SYS_BYTEORDER_H
+#include <sys/byteorder.h>
+#endif
+
+#ifdef HAVE_SYS_ENDIAN_H
+#include <sys/endian.h>
+#endif
+
+#ifdef _MSC_VER
+#include <stdlib.h>
+#define bswap_16(x) _byteswap_ushort(x)
+#define bswap_32(x) _byteswap_ulong(x)
+#define bswap_64(x) _byteswap_uint64(x)
+
+#elif defined(__APPLE__)
+// Mac OS X / Darwin features
+#include <libkern/OSByteOrder.h>
+#define bswap_16(x) OSSwapInt16(x)
+#define bswap_32(x) OSSwapInt32(x)
+#define bswap_64(x) OSSwapInt64(x)
+
+#elif defined(HAVE_BYTESWAP_H)
+#include <byteswap.h>
+
+#elif defined(bswap32)
+// FreeBSD defines bswap{16,32,64} in <sys/endian.h> (already #included).
+#define bswap_16(x) bswap16(x)
+#define bswap_32(x) bswap32(x)
+#define bswap_64(x) bswap64(x)
+
+#elif defined(BSWAP_64)
+// Solaris 10 defines BSWAP_{16,32,64} in <sys/byteorder.h> (already #included).
+#define bswap_16(x) BSWAP_16(x)
+#define bswap_32(x) BSWAP_32(x)
+#define bswap_64(x) BSWAP_64(x)
+
+#else
+
+inline uint16 bswap_16(uint16 x) {
+  return (x << 8) | (x >> 8);
+}
+
+inline uint32 bswap_32(uint32 x) {
+  x = ((x & 0xff00ff00UL) >> 8) | ((x & 0x00ff00ffUL) << 8);
+  return (x >> 16) | (x << 16);
+}
+
+inline uint64 bswap_64(uint64 x) {
+  x = ((x & 0xff00ff00ff00ff00ULL) >> 8) | ((x & 0x00ff00ff00ff00ffULL) << 8);
+  x = ((x & 0xffff0000ffff0000ULL) >> 16) | ((x & 0x0000ffff0000ffffULL) << 16);
+  return (x >> 32) | (x << 32);
+}
+
+#endif
+
+#endif  // WORDS_BIGENDIAN
+
+// Convert to little-endian storage, opposite of network format.
+// Convert x from host to little endian: x = LittleEndian.FromHost(x);
+// convert x from little endian to host: x = LittleEndian.ToHost(x);
+//
+//  Store values into unaligned memory converting to little endian order:
+//    LittleEndian.Store16(p, x);
+//
+//  Load unaligned values stored in little endian converting to host order:
+//    x = LittleEndian.Load16(p);
+class LittleEndian {
+ public:
+  // Conversion functions.
+#ifdef WORDS_BIGENDIAN
+
+  static uint16 FromHost16(uint16 x) { return bswap_16(x); }
+  static uint16 ToHost16(uint16 x) { return bswap_16(x); }
+
+  static uint32 FromHost32(uint32 x) { return bswap_32(x); }
+  static uint32 ToHost32(uint32 x) { return bswap_32(x); }
+
+  static bool IsLittleEndian() { return false; }
+
+#else  // !defined(WORDS_BIGENDIAN)
+
+  static uint16 FromHost16(uint16 x) { return x; }
+  static uint16 ToHost16(uint16 x) { return x; }
+
+  static uint32 FromHost32(uint32 x) { return x; }
+  static uint32 ToHost32(uint32 x) { return x; }
+
+  static bool IsLittleEndian() { return true; }
+
+#endif  // !defined(WORDS_BIGENDIAN)
+
+  // Functions to do unaligned loads and stores in little-endian order.
+  static uint16 Load16(const void *p) {
+    return ToHost16(UNALIGNED_LOAD16(p));
+  }
+
+  static void Store16(void *p, uint16 v) {
+    UNALIGNED_STORE16(p, FromHost16(v));
+  }
+
+  static uint32 Load32(const void *p) {
+    return ToHost32(UNALIGNED_LOAD32(p));
+  }
+
+  static void Store32(void *p, uint32 v) {
+    UNALIGNED_STORE32(p, FromHost32(v));
+  }
+};
+
+// Some bit-manipulation functions.
+class Bits {
+ public:
+  // Return floor(log2(n)) for positive integer n.  Returns -1 iff n == 0.
+  static int Log2Floor(uint32 n);
+
+  // Return the first set least / most significant bit, 0-indexed.  Returns an
+  // undefined value if n == 0.  FindLSBSetNonZero() is similar to ffs() except
+  // that it's 0-indexed.
+  static int FindLSBSetNonZero(uint32 n);
+  static int FindLSBSetNonZero64(uint64 n);
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(Bits);
+};
+
+#ifdef HAVE_BUILTIN_CTZ
+
+inline int Bits::Log2Floor(uint32 n) {
+  return n == 0 ? -1 : 31 ^ __builtin_clz(n);
+}
+
+inline int Bits::FindLSBSetNonZero(uint32 n) {
+  return __builtin_ctz(n);
+}
+
+inline int Bits::FindLSBSetNonZero64(uint64 n) {
+  return __builtin_ctzll(n);
+}
+
+#else  // Portable versions.
+
+inline int Bits::Log2Floor(uint32 n) {
+  if (n == 0)
+    return -1;
+  int log = 0;
+  uint32 value = n;
+  for (int i = 4; i >= 0; --i) {
+    int shift = (1 << i);
+    uint32 x = value >> shift;
+    if (x != 0) {
+      value = x;
+      log += shift;
+    }
+  }
+  assert(value == 1);
+  return log;
+}
+
+inline int Bits::FindLSBSetNonZero(uint32 n) {
+  int rc = 31;
+  for (int i = 4, shift = 1 << 4; i >= 0; --i) {
+    const uint32 x = n << shift;
+    if (x != 0) {
+      n = x;
+      rc -= shift;
+    }
+    shift >>= 1;
+  }
+  return rc;
+}
+
+// FindLSBSetNonZero64() is defined in terms of FindLSBSetNonZero().
+inline int Bits::FindLSBSetNonZero64(uint64 n) {
+  const uint32 bottombits = static_cast<uint32>(n);
+  if (bottombits == 0) {
+    // Bottom bits are zero, so scan in top bits
+    return 32 + FindLSBSetNonZero(static_cast<uint32>(n >> 32));
+  } else {
+    return FindLSBSetNonZero(bottombits);
+  }
+}
+
+#endif  // End portable versions.
+
+// Variable-length integer encoding.
+class Varint {
+ public:
+  // Maximum lengths of varint encoding of uint32.
+  static const int kMax32 = 5;
+
+  // Attempts to parse a varint32 from a prefix of the bytes in [ptr,limit-1].
+  // Never reads a character at or beyond limit.  If a valid/terminated varint32
+  // was found in the range, stores it in *OUTPUT and returns a pointer just
+  // past the last byte of the varint32. Else returns NULL.  On success,
+  // "result <= limit".
+  static const char* Parse32WithLimit(const char* ptr, const char* limit,
+                                      uint32* OUTPUT);
+
+  // REQUIRES   "ptr" points to a buffer of length sufficient to hold "v".
+  // EFFECTS    Encodes "v" into "ptr" and returns a pointer to the
+  //            byte just past the last encoded byte.
+  static char* Encode32(char* ptr, uint32 v);
+
+  // EFFECTS    Appends the varint representation of "value" to "*s".
+  static void Append32(string* s, uint32 value);
+};
+
+inline const char* Varint::Parse32WithLimit(const char* p,
+                                            const char* l,
+                                            uint32* OUTPUT) {
+  const unsigned char* ptr = reinterpret_cast<const unsigned char*>(p);
+  const unsigned char* limit = reinterpret_cast<const unsigned char*>(l);
+  uint32 b, result;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result = b & 127;          if (b < 128) goto done;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result |= (b & 127) <<  7; if (b < 128) goto done;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result |= (b & 127) << 14; if (b < 128) goto done;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result |= (b & 127) << 21; if (b < 128) goto done;
+  if (ptr >= limit) return NULL;
+  b = *(ptr++); result |= (b & 127) << 28; if (b < 16) goto done;
+  return NULL;       // Value is too long to be a varint32
+ done:
+  *OUTPUT = result;
+  return reinterpret_cast<const char*>(ptr);
+}
+
+inline char* Varint::Encode32(char* sptr, uint32 v) {
+  // Operate on characters as unsigneds
+  unsigned char* ptr = reinterpret_cast<unsigned char*>(sptr);
+  static const int B = 128;
+  if (v < (1<<7)) {
+    *(ptr++) = v;
+  } else if (v < (1<<14)) {
+    *(ptr++) = v | B;
+    *(ptr++) = v>>7;
+  } else if (v < (1<<21)) {
+    *(ptr++) = v | B;
+    *(ptr++) = (v>>7) | B;
+    *(ptr++) = v>>14;
+  } else if (v < (1<<28)) {
+    *(ptr++) = v | B;
+    *(ptr++) = (v>>7) | B;
+    *(ptr++) = (v>>14) | B;
+    *(ptr++) = v>>21;
+  } else {
+    *(ptr++) = v | B;
+    *(ptr++) = (v>>7) | B;
+    *(ptr++) = (v>>14) | B;
+    *(ptr++) = (v>>21) | B;
+    *(ptr++) = v>>28;
+  }
+  return reinterpret_cast<char*>(ptr);
+}
+
+// If you know the internal layout of the std::string in use, you can
+// replace this function with one that resizes the string without
+// filling the new space with zeros (if applicable) --
+// it will be non-portable but faster.
+inline void STLStringResizeUninitialized(string* s, size_t new_size) {
+  s->resize(new_size);
+}
+
+// Return a mutable char* pointing to a string's internal buffer,
+// which may not be null-terminated. Writing through this pointer will
+// modify the string.
+//
+// string_as_array(&str)[i] is valid for 0 <= i < str.size() until the
+// next call to a string method that invalidates iterators.
+//
+// As of 2006-04, there is no standard-blessed way of getting a
+// mutable reference to a string's internal buffer. However, issue 530
+// (http://www.open-std.org/JTC1/SC22/WG21/docs/lwg-defects.html#530)
+// proposes this as the method. It will officially be part of the standard
+// for C++0x. This should already work on all current implementations.
+inline char* string_as_array(string* str) {
+  return str->empty() ? NULL : &*str->begin();
+}
+
+}  // namespace snappy
+
+#endif  // UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_INTERNAL_H_

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-public.h
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-public.h b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-public.h
new file mode 100644
index 0000000..d7bc09b
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-public.h
@@ -0,0 +1,82 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: sesse@google.com (Steinar H. Gunderson)
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Various type stubs for the open-source version of Snappy.
+//
+// This file cannot include config.h, as it is included from snappy.h,
+// which is a public header. Instead, snappy-stubs-public.h is generated by
+// from snappy-stubs-public.h.in at configure time.
+
+#ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
+#define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
+
+#include <stdint.h>
+#include <stddef.h>
+
+#if HAVE_SYS_UIO_H
+#include <sys/uio.h>
+#endif
+
+#define SNAPPY_MAJOR 1
+#define SNAPPY_MINOR 1
+#define SNAPPY_PATCHLEVEL 2
+#define SNAPPY_VERSION \
+    ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
+
+#include <string>
+
+namespace snappy {
+
+typedef int8_t int8;
+typedef uint8_t uint8;
+typedef int16_t int16;
+typedef uint16_t uint16;
+typedef int32_t int32;
+typedef uint32_t uint32;
+typedef int64_t int64;
+typedef uint64_t uint64;
+
+typedef std::string string;
+
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&);               \
+  void operator=(const TypeName&)
+
+#ifndef HAVE_SYS_UIO_H
+// Windows does not have an iovec type, yet the concept is universally useful.
+// It is simple to define it ourselves, so we put it inside our own namespace.
+struct iovec {
+	void* iov_base;
+	size_t iov_len;
+};
+#endif
+
+}  // namespace snappy
+
+#endif  // UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_

http://git-wip-us.apache.org/repos/asf/hadoop/blob/cb5ba73b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-public.h.in
----------------------------------------------------------------------
diff --git a/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-public.h.in b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-public.h.in
new file mode 100644
index 0000000..6c181a1
--- /dev/null
+++ b/hadoop-hdfs-project/hadoop-hdfsdb/src/main/native/snappy/snappy-stubs-public.h.in
@@ -0,0 +1,98 @@
+// Copyright 2011 Google Inc. All Rights Reserved.
+// Author: sesse@google.com (Steinar H. Gunderson)
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+//     * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+//     * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following disclaimer
+// in the documentation and/or other materials provided with the
+// distribution.
+//     * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived from
+// this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+//
+// Various type stubs for the open-source version of Snappy.
+//
+// This file cannot include config.h, as it is included from snappy.h,
+// which is a public header. Instead, snappy-stubs-public.h is generated by
+// from snappy-stubs-public.h.in at configure time.
+
+#ifndef UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
+#define UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_
+
+#if @ac_cv_have_stdint_h@
+#include <stdint.h>
+#endif
+
+#if @ac_cv_have_stddef_h@
+#include <stddef.h>
+#endif
+
+#if @ac_cv_have_sys_uio_h@
+#include <sys/uio.h>
+#endif
+
+#define SNAPPY_MAJOR @SNAPPY_MAJOR@
+#define SNAPPY_MINOR @SNAPPY_MINOR@
+#define SNAPPY_PATCHLEVEL @SNAPPY_PATCHLEVEL@
+#define SNAPPY_VERSION \
+    ((SNAPPY_MAJOR << 16) | (SNAPPY_MINOR << 8) | SNAPPY_PATCHLEVEL)
+
+#include <string>
+
+namespace snappy {
+
+#if @ac_cv_have_stdint_h@
+typedef int8_t int8;
+typedef uint8_t uint8;
+typedef int16_t int16;
+typedef uint16_t uint16;
+typedef int32_t int32;
+typedef uint32_t uint32;
+typedef int64_t int64;
+typedef uint64_t uint64;
+#else
+typedef signed char int8;
+typedef unsigned char uint8;
+typedef short int16;
+typedef unsigned short uint16;
+typedef int int32;
+typedef unsigned int uint32;
+typedef long long int64;
+typedef unsigned long long uint64;
+#endif
+
+typedef std::string string;
+
+#define DISALLOW_COPY_AND_ASSIGN(TypeName) \
+  TypeName(const TypeName&);               \
+  void operator=(const TypeName&)
+
+#if !@ac_cv_have_sys_uio_h@
+// Windows does not have an iovec type, yet the concept is universally useful.
+// It is simple to define it ourselves, so we put it inside our own namespace.
+struct iovec {
+	void* iov_base;
+	size_t iov_len;
+};
+#endif
+
+}  // namespace snappy
+
+#endif  // UTIL_SNAPPY_OPENSOURCE_SNAPPY_STUBS_PUBLIC_H_