You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2007/05/16 21:23:53 UTC
svn commit: r538693 [6/20] - in /lucene/hadoop/trunk: ./ bin/ src/c++/pipes/
src/c++/pipes/api/ src/c++/pipes/api/hadoop/ src/c++/pipes/impl/
src/c++/utils/ src/c++/utils/api/ src/c++/utils/api/hadoop/
src/c++/utils/impl/ src/c++/utils/m4/ src/examples...
Added: lucene/hadoop/trunk/src/c++/pipes/configure.ac
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/c%2B%2B/pipes/configure.ac?view=auto&rev=538693
==============================================================================
--- lucene/hadoop/trunk/src/c++/pipes/configure.ac (added)
+++ lucene/hadoop/trunk/src/c++/pipes/configure.ac Wed May 16 12:23:48 2007
@@ -0,0 +1,53 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# -*- Autoconf -*-
+# Process this file with autoconf to produce a configure script.
+
+AC_PREREQ(2.59)
+AC_INIT(hadoop-pipes, 0.13.0, omalley@apache.org)
+
+AM_INIT_AUTOMAKE([subdir-objects foreign no-dist])
+
+AC_CONFIG_SRCDIR([impl/HadoopPipes.cc])
+AC_CONFIG_HEADER([impl/config.h])
+AC_CONFIG_FILES([Makefile])
+
+AC_PREFIX_DEFAULT(`pwd`/../install)
+
+USE_HADOOP_UTILS
+CHECK_INSTALL_CFLAG
+
+# Checks for programs.
+AC_PROG_CXX
+AC_PROG_LIBTOOL
+
+# Checks for libraries.
+
+# Checks for header files.
+AC_LANG(C++)
+AC_CHECK_HEADERS([unistd.h])
+
+# Checks for typedefs, structures, and compiler characteristics.
+AC_HEADER_STDBOOL
+AC_C_CONST
+AC_TYPE_OFF_T
+AC_TYPE_SIZE_T
+AC_FUNC_STRERROR_R
+
+# Checks for library functions.
+AC_CHECK_FUNCS([mkdir uname])
+AC_OUTPUT
Added: lucene/hadoop/trunk/src/c++/pipes/depcomp
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/c%2B%2B/pipes/depcomp?view=auto&rev=538693
==============================================================================
--- lucene/hadoop/trunk/src/c++/pipes/depcomp (added)
+++ lucene/hadoop/trunk/src/c++/pipes/depcomp Wed May 16 12:23:48 2007
@@ -0,0 +1,522 @@
+#! /bin/sh
+# depcomp - compile a program generating dependencies as side-effects
+
+scriptversion=2004-05-31.23
+
+# Copyright (C) 1999, 2000, 2003, 2004 Free Software Foundation, Inc.
+
+# This program is free software; you can redistribute it and/or modify
+# it under the terms of the GNU General Public License as published by
+# the Free Software Foundation; either version 2, or (at your option)
+# any later version.
+
+# This program is distributed in the hope that it will be useful,
+# but WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
+# GNU General Public License for more details.
+
+# You should have received a copy of the GNU General Public License
+# along with this program; if not, write to the Free Software
+# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA
+# 02111-1307, USA.
+
+# As a special exception to the GNU General Public License, if you
+# distribute this file as part of a program that contains a
+# configuration script generated by Autoconf, you may include it under
+# the same distribution terms that you use for the rest of that program.
+
+# Originally written by Alexandre Oliva <ol...@dcc.unicamp.br>.
+
+case $1 in
+ '')
+ echo "$0: No command. Try \`$0 --help' for more information." 1>&2
+ exit 1;
+ ;;
+ -h | --h*)
+ cat <<\EOF
+Usage: depcomp [--help] [--version] PROGRAM [ARGS]
+
+Run PROGRAMS ARGS to compile a file, generating dependencies
+as side-effects.
+
+Environment variables:
+ depmode Dependency tracking mode.
+ source Source file read by `PROGRAMS ARGS'.
+ object Object file output by `PROGRAMS ARGS'.
+ DEPDIR directory where to store dependencies.
+ depfile Dependency file to output.
+ tmpdepfile Temporary file to use when outputing dependencies.
+ libtool Whether libtool is used (yes/no).
+
+Report bugs to <bu...@gnu.org>.
+EOF
+ exit 0
+ ;;
+ -v | --v*)
+ echo "depcomp $scriptversion"
+ exit 0
+ ;;
+esac
+
+if test -z "$depmode" || test -z "$source" || test -z "$object"; then
+ echo "depcomp: Variables source, object and depmode must be set" 1>&2
+ exit 1
+fi
+
+# Dependencies for sub/bar.o or sub/bar.obj go into sub/.deps/bar.Po.
+depfile=${depfile-`echo "$object" |
+ sed 's|[^\\/]*$|'${DEPDIR-.deps}'/&|;s|\.\([^.]*\)$|.P\1|;s|Pobj$|Po|'`}
+tmpdepfile=${tmpdepfile-`echo "$depfile" | sed 's/\.\([^.]*\)$/.T\1/'`}
+
+rm -f "$tmpdepfile"
+
+# Some modes work just like other modes, but use different flags. We
+# parameterize here, but still list the modes in the big case below,
+# to make depend.m4 easier to write. Note that we *cannot* use a case
+# here, because this file can only contain one case statement.
+if test "$depmode" = hp; then
+ # HP compiler uses -M and no extra arg.
+ gccflag=-M
+ depmode=gcc
+fi
+
+if test "$depmode" = dashXmstdout; then
+ # This is just like dashmstdout with a different argument.
+ dashmflag=-xM
+ depmode=dashmstdout
+fi
+
+case "$depmode" in
+gcc3)
+## gcc 3 implements dependency tracking that does exactly what
+## we want. Yay! Note: for some reason libtool 1.4 doesn't like
+## it if -MD -MP comes after the -MF stuff. Hmm.
+ "$@" -MT "$object" -MD -MP -MF "$tmpdepfile"
+ stat=$?
+ if test $stat -eq 0; then :
+ else
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ mv "$tmpdepfile" "$depfile"
+ ;;
+
+gcc)
+## There are various ways to get dependency output from gcc. Here's
+## why we pick this rather obscure method:
+## - Don't want to use -MD because we'd like the dependencies to end
+## up in a subdir. Having to rename by hand is ugly.
+## (We might end up doing this anyway to support other compilers.)
+## - The DEPENDENCIES_OUTPUT environment variable makes gcc act like
+## -MM, not -M (despite what the docs say).
+## - Using -M directly means running the compiler twice (even worse
+## than renaming).
+ if test -z "$gccflag"; then
+ gccflag=-MD,
+ fi
+ "$@" -Wp,"$gccflag$tmpdepfile"
+ stat=$?
+ if test $stat -eq 0; then :
+ else
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ rm -f "$depfile"
+ echo "$object : \\" > "$depfile"
+ alpha=ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz
+## The second -e expression handles DOS-style file names with drive letters.
+ sed -e 's/^[^:]*: / /' \
+ -e 's/^['$alpha']:\/[^:]*: / /' < "$tmpdepfile" >> "$depfile"
+## This next piece of magic avoids the `deleted header file' problem.
+## The problem is that when a header file which appears in a .P file
+## is deleted, the dependency causes make to die (because there is
+## typically no way to rebuild the header). We avoid this by adding
+## dummy dependencies for each header file. Too bad gcc doesn't do
+## this for us directly.
+ tr ' ' '
+' < "$tmpdepfile" |
+## Some versions of gcc put a space before the `:'. On the theory
+## that the space means something, we add a space to the output as
+## well.
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly. Breaking it into two sed invocations is a workaround.
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+hp)
+ # This case exists only to let depend.m4 do its work. It works by
+ # looking at the text of this script. This case will never be run,
+ # since it is checked for above.
+ exit 1
+ ;;
+
+sgi)
+ if test "$libtool" = yes; then
+ "$@" "-Wp,-MDupdate,$tmpdepfile"
+ else
+ "$@" -MDupdate "$tmpdepfile"
+ fi
+ stat=$?
+ if test $stat -eq 0; then :
+ else
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ rm -f "$depfile"
+
+ if test -f "$tmpdepfile"; then # yes, the sourcefile depend on other files
+ echo "$object : \\" > "$depfile"
+
+ # Clip off the initial element (the dependent). Don't try to be
+ # clever and replace this with sed code, as IRIX sed won't handle
+ # lines with more than a fixed number of characters (4096 in
+ # IRIX 6.2 sed, 8192 in IRIX 6.5). We also remove comment lines;
+ # the IRIX cc adds comments like `#:fec' to the end of the
+ # dependency line.
+ tr ' ' '
+' < "$tmpdepfile" \
+ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' | \
+ tr '
+' ' ' >> $depfile
+ echo >> $depfile
+
+ # The second pass generates a dummy entry for each header file.
+ tr ' ' '
+' < "$tmpdepfile" \
+ | sed -e 's/^.*\.o://' -e 's/#.*$//' -e '/^$/ d' -e 's/$/:/' \
+ >> $depfile
+ else
+ # The sourcefile does not contain any dependencies, so just
+ # store a dummy comment line, to avoid errors with the Makefile
+ # "include basename.Plo" scheme.
+ echo "#dummy" > "$depfile"
+ fi
+ rm -f "$tmpdepfile"
+ ;;
+
+aix)
+ # The C for AIX Compiler uses -M and outputs the dependencies
+ # in a .u file. In older versions, this file always lives in the
+ # current directory. Also, the AIX compiler puts `$object:' at the
+ # start of each line; $object doesn't have directory information.
+ # Version 6 uses the directory in both cases.
+ stripped=`echo "$object" | sed 's/\(.*\)\..*$/\1/'`
+ tmpdepfile="$stripped.u"
+ if test "$libtool" = yes; then
+ "$@" -Wc,-M
+ else
+ "$@" -M
+ fi
+ stat=$?
+
+ if test -f "$tmpdepfile"; then :
+ else
+ stripped=`echo "$stripped" | sed 's,^.*/,,'`
+ tmpdepfile="$stripped.u"
+ fi
+
+ if test $stat -eq 0; then :
+ else
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+
+ if test -f "$tmpdepfile"; then
+ outname="$stripped.o"
+ # Each line is of the form `foo.o: dependent.h'.
+ # Do two passes, one to just change these to
+ # `$object: dependent.h' and one to simply `dependent.h:'.
+ sed -e "s,^$outname:,$object :," < "$tmpdepfile" > "$depfile"
+ sed -e "s,^$outname: \(.*\)$,\1:," < "$tmpdepfile" >> "$depfile"
+ else
+ # The sourcefile does not contain any dependencies, so just
+ # store a dummy comment line, to avoid errors with the Makefile
+ # "include basename.Plo" scheme.
+ echo "#dummy" > "$depfile"
+ fi
+ rm -f "$tmpdepfile"
+ ;;
+
+icc)
+ # Intel's C compiler understands `-MD -MF file'. However on
+ # icc -MD -MF foo.d -c -o sub/foo.o sub/foo.c
+ # ICC 7.0 will fill foo.d with something like
+ # foo.o: sub/foo.c
+ # foo.o: sub/foo.h
+ # which is wrong. We want:
+ # sub/foo.o: sub/foo.c
+ # sub/foo.o: sub/foo.h
+ # sub/foo.c:
+ # sub/foo.h:
+ # ICC 7.1 will output
+ # foo.o: sub/foo.c sub/foo.h
+ # and will wrap long lines using \ :
+ # foo.o: sub/foo.c ... \
+ # sub/foo.h ... \
+ # ...
+
+ "$@" -MD -MF "$tmpdepfile"
+ stat=$?
+ if test $stat -eq 0; then :
+ else
+ rm -f "$tmpdepfile"
+ exit $stat
+ fi
+ rm -f "$depfile"
+ # Each line is of the form `foo.o: dependent.h',
+ # or `foo.o: dep1.h dep2.h \', or ` dep3.h dep4.h \'.
+ # Do two passes, one to just change these to
+ # `$object: dependent.h' and one to simply `dependent.h:'.
+ sed "s,^[^:]*:,$object :," < "$tmpdepfile" > "$depfile"
+ # Some versions of the HPUX 10.20 sed can't process this invocation
+ # correctly. Breaking it into two sed invocations is a workaround.
+ sed 's,^[^:]*: \(.*\)$,\1,;s/^\\$//;/^$/d;/:$/d' < "$tmpdepfile" |
+ sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+tru64)
+ # The Tru64 compiler uses -MD to generate dependencies as a side
+ # effect. `cc -MD -o foo.o ...' puts the dependencies into `foo.o.d'.
+ # At least on Alpha/Redhat 6.1, Compaq CCC V6.2-504 seems to put
+ # dependencies in `foo.d' instead, so we check for that too.
+ # Subdirectories are respected.
+ dir=`echo "$object" | sed -e 's|/[^/]*$|/|'`
+ test "x$dir" = "x$object" && dir=
+ base=`echo "$object" | sed -e 's|^.*/||' -e 's/\.o$//' -e 's/\.lo$//'`
+
+ if test "$libtool" = yes; then
+ # Dependencies are output in .lo.d with libtool 1.4.
+ # With libtool 1.5 they are output both in $dir.libs/$base.o.d
+ # and in $dir.libs/$base.o.d and $dir$base.o.d. We process the
+ # latter, because the former will be cleaned when $dir.libs is
+ # erased.
+ tmpdepfile1="$dir.libs/$base.lo.d"
+ tmpdepfile2="$dir$base.o.d"
+ tmpdepfile3="$dir.libs/$base.d"
+ "$@" -Wc,-MD
+ else
+ tmpdepfile1="$dir$base.o.d"
+ tmpdepfile2="$dir$base.d"
+ tmpdepfile3="$dir$base.d"
+ "$@" -MD
+ fi
+
+ stat=$?
+ if test $stat -eq 0; then :
+ else
+ rm -f "$tmpdepfile1" "$tmpdepfile2" "$tmpdepfile3"
+ exit $stat
+ fi
+
+ if test -f "$tmpdepfile1"; then
+ tmpdepfile="$tmpdepfile1"
+ elif test -f "$tmpdepfile2"; then
+ tmpdepfile="$tmpdepfile2"
+ else
+ tmpdepfile="$tmpdepfile3"
+ fi
+ if test -f "$tmpdepfile"; then
+ sed -e "s,^.*\.[a-z]*:,$object:," < "$tmpdepfile" > "$depfile"
+ # That's a tab and a space in the [].
+ sed -e 's,^.*\.[a-z]*:[ ]*,,' -e 's,$,:,' < "$tmpdepfile" >> "$depfile"
+ else
+ echo "#dummy" > "$depfile"
+ fi
+ rm -f "$tmpdepfile"
+ ;;
+
+#nosideeffect)
+ # This comment above is used by automake to tell side-effect
+ # dependency tracking mechanisms from slower ones.
+
+dashmstdout)
+ # Important note: in order to support this mode, a compiler *must*
+ # always write the preprocessed file to stdout, regardless of -o.
+ "$@" || exit $?
+
+ # Remove the call to Libtool.
+ if test "$libtool" = yes; then
+ while test $1 != '--mode=compile'; do
+ shift
+ done
+ shift
+ fi
+
+ # Remove `-o $object'.
+ IFS=" "
+ for arg
+ do
+ case $arg in
+ -o)
+ shift
+ ;;
+ $object)
+ shift
+ ;;
+ *)
+ set fnord "$@" "$arg"
+ shift # fnord
+ shift # $arg
+ ;;
+ esac
+ done
+
+ test -z "$dashmflag" && dashmflag=-M
+ # Require at least two characters before searching for `:'
+ # in the target name. This is to cope with DOS-style filenames:
+ # a dependency such as `c:/foo/bar' could be seen as target `c' otherwise.
+ "$@" $dashmflag |
+ sed 's:^[ ]*[^: ][^:][^:]*\:[ ]*:'"$object"'\: :' > "$tmpdepfile"
+ rm -f "$depfile"
+ cat < "$tmpdepfile" > "$depfile"
+ tr ' ' '
+' < "$tmpdepfile" | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly. Breaking it into two sed invocations is a workaround.
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+dashXmstdout)
+ # This case only exists to satisfy depend.m4. It is never actually
+ # run, as this mode is specially recognized in the preamble.
+ exit 1
+ ;;
+
+makedepend)
+ "$@" || exit $?
+ # Remove any Libtool call
+ if test "$libtool" = yes; then
+ while test $1 != '--mode=compile'; do
+ shift
+ done
+ shift
+ fi
+ # X makedepend
+ shift
+ cleared=no
+ for arg in "$@"; do
+ case $cleared in
+ no)
+ set ""; shift
+ cleared=yes ;;
+ esac
+ case "$arg" in
+ -D*|-I*)
+ set fnord "$@" "$arg"; shift ;;
+ # Strip any option that makedepend may not understand. Remove
+ # the object too, otherwise makedepend will parse it as a source file.
+ -*|$object)
+ ;;
+ *)
+ set fnord "$@" "$arg"; shift ;;
+ esac
+ done
+ obj_suffix="`echo $object | sed 's/^.*\././'`"
+ touch "$tmpdepfile"
+ ${MAKEDEPEND-makedepend} -o"$obj_suffix" -f"$tmpdepfile" "$@"
+ rm -f "$depfile"
+ cat < "$tmpdepfile" > "$depfile"
+ sed '1,2d' "$tmpdepfile" | tr ' ' '
+' | \
+## Some versions of the HPUX 10.20 sed can't process this invocation
+## correctly. Breaking it into two sed invocations is a workaround.
+ sed -e 's/^\\$//' -e '/^$/d' -e '/:$/d' | sed -e 's/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile" "$tmpdepfile".bak
+ ;;
+
+cpp)
+ # Important note: in order to support this mode, a compiler *must*
+ # always write the preprocessed file to stdout.
+ "$@" || exit $?
+
+ # Remove the call to Libtool.
+ if test "$libtool" = yes; then
+ while test $1 != '--mode=compile'; do
+ shift
+ done
+ shift
+ fi
+
+ # Remove `-o $object'.
+ IFS=" "
+ for arg
+ do
+ case $arg in
+ -o)
+ shift
+ ;;
+ $object)
+ shift
+ ;;
+ *)
+ set fnord "$@" "$arg"
+ shift # fnord
+ shift # $arg
+ ;;
+ esac
+ done
+
+ "$@" -E |
+ sed -n '/^# [0-9][0-9]* "\([^"]*\)".*/ s:: \1 \\:p' |
+ sed '$ s: \\$::' > "$tmpdepfile"
+ rm -f "$depfile"
+ echo "$object : \\" > "$depfile"
+ cat < "$tmpdepfile" >> "$depfile"
+ sed < "$tmpdepfile" '/^$/d;s/^ //;s/ \\$//;s/$/ :/' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+msvisualcpp)
+ # Important note: in order to support this mode, a compiler *must*
+ # always write the preprocessed file to stdout, regardless of -o,
+ # because we must use -o when running libtool.
+ "$@" || exit $?
+ IFS=" "
+ for arg
+ do
+ case "$arg" in
+ "-Gm"|"/Gm"|"-Gi"|"/Gi"|"-ZI"|"/ZI")
+ set fnord "$@"
+ shift
+ shift
+ ;;
+ *)
+ set fnord "$@" "$arg"
+ shift
+ shift
+ ;;
+ esac
+ done
+ "$@" -E |
+ sed -n '/^#line [0-9][0-9]* "\([^"]*\)"/ s::echo "`cygpath -u \\"\1\\"`":p' | sort | uniq > "$tmpdepfile"
+ rm -f "$depfile"
+ echo "$object : \\" > "$depfile"
+ . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s:: \1 \\:p' >> "$depfile"
+ echo " " >> "$depfile"
+ . "$tmpdepfile" | sed 's% %\\ %g' | sed -n '/^\(.*\)$/ s::\1\::p' >> "$depfile"
+ rm -f "$tmpdepfile"
+ ;;
+
+none)
+ exec "$@"
+ ;;
+
+*)
+ echo "Unknown depmode $depmode" 1>&2
+ exit 1
+ ;;
+esac
+
+exit 0
+
+# Local Variables:
+# mode: shell-script
+# sh-indentation: 2
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End:
Added: lucene/hadoop/trunk/src/c++/pipes/impl/HadoopPipes.cc
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/c%2B%2B/pipes/impl/HadoopPipes.cc?view=auto&rev=538693
==============================================================================
--- lucene/hadoop/trunk/src/c++/pipes/impl/HadoopPipes.cc (added)
+++ lucene/hadoop/trunk/src/c++/pipes/impl/HadoopPipes.cc Wed May 16 12:23:48 2007
@@ -0,0 +1,915 @@
+#include "hadoop/Pipes.hh"
+#include "hadoop/SerialUtils.hh"
+#include "hadoop/StringUtils.hh"
+
+#include <map>
+#include <vector>
+
+#include <errno.h>
+#include <netinet/in.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <strings.h>
+#include <sys/socket.h>
+
+using std::map;
+using std::string;
+using std::vector;
+
+using namespace HadoopUtils;
+
+namespace HadoopPipes {
+
+ class JobConfImpl: public JobConf {
+ private:
+ map<string, string> values;
+ public:
+ void set(const string& key, const string& value) {
+ values[key] = value;
+ }
+
+ virtual bool hasKey(const string& key) const {
+ return values.find(key) != values.end();
+ }
+
+ virtual const string& get(const string& key) const {
+ map<string,string>::const_iterator itr = values.find(key);
+ if (itr == values.end()) {
+ throw Error("Key " + key + " not found in JobConf");
+ }
+ return itr->second;
+ }
+
+ virtual int getInt(const string& key) const {
+ const string& val = get(key);
+ return toInt(val);
+ }
+
+ virtual float getFloat(const string& key) const {
+ const string& val = get(key);
+ return toFloat(val);
+ }
+
+ virtual bool getBoolean(const string&key) const {
+ const string& val = get(key);
+ return toBool(val);
+ }
+ };
+
+ class DownwardProtocol {
+ public:
+ virtual void start(int protocol) = 0;
+ virtual void setJobConf(vector<string> values) = 0;
+ virtual void setInputTypes(string keyType, string valueType) = 0;
+ virtual void runMap(string inputSplit, int numReduces, bool pipedInput)= 0;
+ virtual void mapItem(const string& key, const string& value) = 0;
+ virtual void runReduce(int reduce, bool pipedOutput) = 0;
+ virtual void reduceKey(const string& key) = 0;
+ virtual void reduceValue(const string& value) = 0;
+ virtual void close() = 0;
+ virtual void abort() = 0;
+ virtual ~DownwardProtocol() {}
+ };
+
+ class UpwardProtocol {
+ public:
+ virtual void output(const string& key, const string& value) = 0;
+ virtual void partitionedOutput(int reduce, const string& key,
+ const string& value) = 0;
+ virtual void status(const string& message) = 0;
+ virtual void progress(float progress) = 0;
+ virtual void done() = 0;
+ virtual ~UpwardProtocol() {}
+ };
+
+ class Protocol {
+ public:
+ virtual void nextEvent() = 0;
+ virtual UpwardProtocol* getUplink() = 0;
+ virtual ~Protocol() {}
+ };
+
+ class TextUpwardProtocol: public UpwardProtocol {
+ private:
+ FILE* stream;
+ static const char fieldSeparator = '\t';
+ static const char lineSeparator = '\n';
+
+ void writeBuffer(const string& buffer) {
+ fprintf(stream, quoteString(buffer, "\t\n").c_str());
+ }
+
+ public:
+ TextUpwardProtocol(FILE* _stream): stream(_stream) {}
+
+ virtual void output(const string& key, const string& value) {
+ fprintf(stream, "output%c", fieldSeparator);
+ writeBuffer(key);
+ fprintf(stream, "%c", fieldSeparator);
+ writeBuffer(value);
+ fprintf(stream, "%c", lineSeparator);
+ }
+
+ virtual void partitionedOutput(int reduce, const string& key,
+ const string& value) {
+ fprintf(stream, "parititionedOutput%c%d%c", fieldSeparator, reduce,
+ fieldSeparator);
+ writeBuffer(key);
+ fprintf(stream, "%c", fieldSeparator);
+ writeBuffer(value);
+ fprintf(stream, "%c", lineSeparator);
+ }
+
+ virtual void status(const string& message) {
+ fprintf(stream, "status%c%s%c", fieldSeparator, message.c_str(),
+ lineSeparator);
+ }
+
+ virtual void progress(float progress) {
+ fprintf(stream, "progress%c%f%c", fieldSeparator, progress,
+ lineSeparator);
+ }
+
+ virtual void done() {
+ fprintf(stream, "done%c", lineSeparator);
+ }
+ };
+
+ class TextProtocol: public Protocol {
+ private:
+ FILE* downStream;
+ DownwardProtocol* handler;
+ UpwardProtocol* uplink;
+ string key;
+ string value;
+
+ int readUpto(string& buffer, const char* limit) {
+ int ch;
+ buffer.clear();
+ while ((ch = getc(downStream)) != -1) {
+ if (strchr(limit, ch) != NULL) {
+ return ch;
+ }
+ buffer += ch;
+ }
+ return -1;
+ }
+
+ static const char* delim;
+ public:
+
+ TextProtocol(FILE* down, DownwardProtocol* _handler, FILE* up) {
+ downStream = down;
+ uplink = new TextUpwardProtocol(up);
+ handler = _handler;
+ }
+
+ UpwardProtocol* getUplink() {
+ return uplink;
+ }
+
+ virtual void nextEvent() {
+ string command;
+ string arg;
+ int sep;
+ sep = readUpto(command, delim);
+ if (command == "mapItem") {
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ sep = readUpto(key, delim);
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ sep = readUpto(value, delim);
+ HADOOP_ASSERT(sep == '\n', "Long text protocol command " + command);
+ handler->mapItem(key, value);
+ } else if (command == "reduceValue") {
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ sep = readUpto(value, delim);
+ HADOOP_ASSERT(sep == '\n', "Long text protocol command " + command);
+ handler->reduceValue(value);
+ } else if (command == "reduceKey") {
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ sep = readUpto(key, delim);
+ HADOOP_ASSERT(sep == '\n', "Long text protocol command " + command);
+ handler->reduceKey(key);
+ } else if (command == "start") {
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ sep = readUpto(arg, delim);
+ HADOOP_ASSERT(sep == '\n', "Long text protocol command " + command);
+ handler->start(toInt(arg));
+ } else if (command == "setJobConf") {
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ sep = readUpto(arg, delim);
+ int len = toInt(arg);
+ vector<string> values(len);
+ for(int i=0; i < len; ++i) {
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ sep = readUpto(arg, delim);
+ values.push_back(arg);
+ }
+ HADOOP_ASSERT(sep == '\n', "Long text protocol command " + command);
+ handler->setJobConf(values);
+ } else if (command == "setInputTypes") {
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ sep = readUpto(key, delim);
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ sep = readUpto(value, delim);
+ HADOOP_ASSERT(sep == '\n', "Long text protocol command " + command);
+ handler->setInputTypes(key, value);
+ } else if (command == "runMap") {
+ string split;
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ sep = readUpto(split, delim);
+ string reduces;
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ sep = readUpto(reduces, delim);
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ sep = readUpto(arg, delim);
+ HADOOP_ASSERT(sep == '\n', "Long text protocol command " + command);
+ handler->runMap(split, toInt(reduces), toBool(arg));
+ } else if (command == "runReduce") {
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ sep = readUpto(arg, delim);
+ HADOOP_ASSERT(sep == '\t', "Short text protocol command " + command);
+ string piped;
+ sep = readUpto(piped, delim);
+ HADOOP_ASSERT(sep == '\n', "Long text protocol command " + command);
+ handler->runReduce(toInt(arg), toBool(piped));
+ } else if (command == "abort") {
+ HADOOP_ASSERT(sep == '\n', "Long text protocol command " + command);
+ handler->abort();
+ } else if (command == "close") {
+ HADOOP_ASSERT(sep == '\n', "Long text protocol command " + command);
+ handler->close();
+ } else {
+ throw Error("Illegal text protocol command " + command);
+ }
+ }
+
+ ~TextProtocol() {
+ delete uplink;
+ }
+ };
+ const char* TextProtocol::delim = "\t\n";
+
+ enum MESSAGE_TYPE {START_MESSAGE, SET_JOB_CONF, SET_INPUT_TYPES, RUN_MAP,
+ MAP_ITEM, RUN_REDUCE, REDUCE_KEY, REDUCE_VALUE,
+ CLOSE, ABORT,
+ OUTPUT=50, PARTITIONED_OUTPUT, STATUS, PROGRESS, DONE};
+
+ class BinaryUpwardProtocol: public UpwardProtocol {
+ private:
+ FileOutStream* stream;
+ public:
+ BinaryUpwardProtocol(FILE* _stream) {
+ stream = new FileOutStream();
+ HADOOP_ASSERT(stream->open(_stream), "problem opening stream");
+ }
+
+ virtual void output(const string& key, const string& value) {
+ serializeInt(OUTPUT, *stream);
+ serializeString(key, *stream);
+ serializeString(value, *stream);
+ }
+
+ virtual void partitionedOutput(int reduce, const string& key,
+ const string& value) {
+ serializeInt(PARTITIONED_OUTPUT, *stream);
+ serializeInt(reduce, *stream);
+ serializeString(key, *stream);
+ serializeString(value, *stream);
+ }
+
+ virtual void status(const string& message) {
+ serializeInt(STATUS, *stream);
+ serializeString(message, *stream);
+ }
+
+ virtual void progress(float progress) {
+ serializeInt(PROGRESS, *stream);
+ serializeFloat(progress, *stream);
+ }
+
+ virtual void done() {
+ serializeInt(DONE, *stream);
+ }
+
+ ~BinaryUpwardProtocol() {
+ delete stream;
+ }
+ };
+
+ class BinaryProtocol: public Protocol {
+ private:
+ FileInStream* downStream;
+ DownwardProtocol* handler;
+ BinaryUpwardProtocol * uplink;
+ string key;
+ string value;
+
+ public:
+ BinaryProtocol(FILE* down, DownwardProtocol* _handler, FILE* up) {
+ downStream = new FileInStream();
+ downStream->open(down);
+ uplink = new BinaryUpwardProtocol(up);
+ handler = _handler;
+ }
+
+ UpwardProtocol* getUplink() {
+ return uplink;
+ }
+
+ virtual void nextEvent() {
+ int32_t cmd;
+ cmd = deserializeInt(*downStream);
+ switch (cmd) {
+ case START_MESSAGE: {
+ int32_t prot;
+ prot = deserializeInt(*downStream);
+ handler->start(prot);
+ break;
+ }
+ case SET_JOB_CONF: {
+ int32_t entries;
+ entries = deserializeInt(*downStream);
+ vector<string> result(entries);
+ for(int i=0; i < entries; ++i) {
+ string item;
+ deserializeString(item, *downStream);
+ result.push_back(item);
+ }
+ handler->setJobConf(result);
+ break;
+ }
+ case SET_INPUT_TYPES: {
+ string keyType;
+ string valueType;
+ deserializeString(keyType, *downStream);
+ deserializeString(valueType, *downStream);
+ handler->setInputTypes(keyType, valueType);
+ break;
+ }
+ case RUN_MAP: {
+ string split;
+ int32_t numReduces;
+ int32_t piped;
+ deserializeString(split, *downStream);
+ numReduces = deserializeInt(*downStream);
+ piped = deserializeInt(*downStream);
+ handler->runMap(split, numReduces, piped);
+ break;
+ }
+ case MAP_ITEM: {
+ deserializeString(key, *downStream);
+ deserializeString(value, *downStream);
+ handler->mapItem(key, value);
+ break;
+ }
+ case RUN_REDUCE: {
+ int32_t reduce;
+ int32_t piped;
+ reduce = deserializeInt(*downStream);
+ piped = deserializeInt(*downStream);
+ handler->runReduce(reduce, piped);
+ break;
+ }
+ case REDUCE_KEY: {
+ deserializeString(key, *downStream);
+ handler->reduceKey(key);
+ break;
+ }
+ case REDUCE_VALUE: {
+ deserializeString(value, *downStream);
+ handler->reduceValue(value);
+ break;
+ }
+ case CLOSE:
+ handler->close();
+ break;
+ case ABORT:
+ handler->abort();
+ break;
+ default:
+ HADOOP_ASSERT(false, "Unknown binary command " + toString(cmd));
+ }
+ }
+
+ virtual ~BinaryProtocol() {
+ delete downStream;
+ delete uplink;
+ }
+ };
+
+ /**
+ * Define a context object to give to combiners that will let them
+ * go through the values and emit their results correctly.
+ */
+ class CombineContext: public ReduceContext {
+ private:
+ ReduceContext* baseContext;
+ Partitioner* partitioner;
+ int numReduces;
+ UpwardProtocol* uplink;
+ bool firstKey;
+ bool firstValue;
+ map<string, vector<string> >::iterator keyItr;
+ map<string, vector<string> >::iterator endKeyItr;
+ vector<string>::iterator valueItr;
+ vector<string>::iterator endValueItr;
+
+ public:
+ CombineContext(ReduceContext* _baseContext,
+ Partitioner* _partitioner,
+ int _numReduces,
+ UpwardProtocol* _uplink,
+ map<string, vector<string> >& data) {
+ baseContext = _baseContext;
+ partitioner = _partitioner;
+ numReduces = _numReduces;
+ uplink = _uplink;
+ keyItr = data.begin();
+ endKeyItr = data.end();
+ firstKey = true;
+ firstValue = true;
+ }
+
+ virtual const JobConf* getJobConf() {
+ return baseContext->getJobConf();
+ }
+
+ virtual const std::string& getInputKey() {
+ return keyItr->first;
+ }
+
+ virtual const std::string& getInputValue() {
+ return *valueItr;
+ }
+
+ virtual void emit(const std::string& key, const std::string& value) {
+ if (partitioner != NULL) {
+ uplink->partitionedOutput(partitioner->partition(key, numReduces),
+ key, value);
+ } else {
+ uplink->output(key, value);
+ }
+ }
+
+ virtual void progress() {
+ baseContext->progress();
+ }
+
+ virtual void setStatus(const std::string& status) {
+ baseContext->setStatus(status);
+ }
+
+ bool nextKey() {
+ if (firstKey) {
+ firstKey = false;
+ } else {
+ ++keyItr;
+ }
+ if (keyItr != endKeyItr) {
+ valueItr = keyItr->second.begin();
+ endValueItr = keyItr->second.end();
+ firstValue = true;
+ return true;
+ }
+ return false;
+ }
+
+ virtual bool nextValue() {
+ if (firstValue) {
+ firstValue = false;
+ } else {
+ ++valueItr;
+ }
+ return valueItr != endValueItr;
+ }
+
+ };
+
+ /**
+ * A RecordWriter that will take the map outputs, buffer them up and then
+ * combine then when the buffer is full.
+ */
+ class CombineRunner: public RecordWriter {
+ private:
+ map<string, vector<string> > data;
+ int64_t spillSize;
+ int64_t numBytes;
+ ReduceContext* baseContext;
+ Partitioner* partitioner;
+ int numReduces;
+ UpwardProtocol* uplink;
+ Reducer* combiner;
+ public:
+ CombineRunner(int64_t _spillSize, ReduceContext* _baseContext,
+ Reducer* _combiner, UpwardProtocol* _uplink,
+ Partitioner* _partitioner, int _numReduces) {
+ numBytes = 0;
+ spillSize = _spillSize;
+ baseContext = _baseContext;
+ partitioner = _partitioner;
+ numReduces = _numReduces;
+ uplink = _uplink;
+ combiner = _combiner;
+ }
+
+ virtual void emit(const std::string& key,
+ const std::string& value) {
+ numBytes += key.length() + value.length();
+ data[key].push_back(value);
+ if (numBytes >= spillSize) {
+ spillAll();
+ }
+ }
+
+ virtual void close() {
+ spillAll();
+ }
+
+ private:
+ void spillAll() {
+ CombineContext context(baseContext, partitioner, numReduces,
+ uplink, data);
+ while (context.nextKey()) {
+ combiner->reduce(context);
+ }
+ data.clear();
+ }
+ };
+
+ class TaskContextImpl: public MapContext, public ReduceContext,
+ public DownwardProtocol {
+ private:
+ bool done;
+ JobConf* jobConf;
+ string key;
+ const string* newKey;
+ const string* value;
+ bool hasTask;
+ bool isNewKey;
+ bool isNewValue;
+ string* inputKeyClass;
+ string* inputValueClass;
+ string status;
+ float progressFloat;
+ uint64_t lastProgress;
+ bool statusSet;
+ Protocol* protocol;
+ UpwardProtocol *uplink;
+ string* inputSplit;
+ RecordReader* reader;
+ Mapper* mapper;
+ Reducer* reducer;
+ RecordWriter* writer;
+ Partitioner* partitioner;
+ int numReduces;
+ const Factory* factory;
+
+ public:
+
+ TaskContextImpl(const Factory& _factory) {
+ statusSet = false;
+ done = false;
+ newKey = NULL;
+ factory = &_factory;
+ jobConf = NULL;
+ inputKeyClass = NULL;
+ inputValueClass = NULL;
+ inputSplit = NULL;
+ mapper = NULL;
+ reducer = NULL;
+ reader = NULL;
+ writer = NULL;
+ partitioner = NULL;
+ protocol = NULL;
+ isNewKey = false;
+ isNewValue = false;
+ lastProgress = 0;
+ progressFloat = 0.0f;
+ hasTask = false;
+ }
+
+ void setProtocol(Protocol* _protocol, UpwardProtocol* _uplink) {
+
+ protocol = _protocol;
+ uplink = _uplink;
+ }
+
+ virtual void start(int protocol) {
+ if (protocol != 0) {
+ throw Error("Protocol version " + toString(protocol) +
+ " not supported");
+ }
+ }
+
+ virtual void setJobConf(vector<string> values) {
+ int len = values.size();
+ JobConfImpl* result = new JobConfImpl();
+ HADOOP_ASSERT(len % 2 == 0, "Odd length of job conf values");
+ for(int i=0; i < len; i += 2) {
+ result->set(values[i], values[i+1]);
+ }
+ jobConf = result;
+ }
+
+ virtual void setInputTypes(string keyType, string valueType) {
+ inputKeyClass = new string(keyType);
+ inputValueClass = new string(valueType);
+ }
+
+ virtual void runMap(string _inputSplit, int _numReduces, bool pipedInput) {
+ inputSplit = new string(_inputSplit);
+ reader = factory->createRecordReader(*this);
+ HADOOP_ASSERT((reader == NULL) == pipedInput,
+ pipedInput ? "RecordReader defined when not needed.":
+ "RecordReader not defined");
+ if (reader != NULL) {
+ value = new string();
+ }
+ mapper = factory->createMapper(*this);
+ reducer = factory->createCombiner(*this);
+ partitioner = factory->createPartitioner(*this);
+ numReduces = _numReduces;
+ if (reducer != NULL) {
+ int64_t spillSize = 100;
+ if (jobConf->hasKey("io.sort.mb")) {
+ spillSize = jobConf->getInt("io.sort.mb");
+ }
+ writer = new CombineRunner(spillSize * 1024 * 1024, this, reducer,
+ uplink, partitioner, numReduces);
+ }
+ hasTask = true;
+ }
+
+ virtual void mapItem(const string& _key, const string& _value) {
+ newKey = &_key;
+ value = &_value;
+ isNewKey = true;
+ }
+
+ virtual void runReduce(int reduce, bool pipedOutput) {
+ reducer = factory->createReducer(*this);
+ writer = factory->createRecordWriter(*this);
+ HADOOP_ASSERT((writer == NULL) == pipedOutput,
+ pipedOutput ? "RecordWriter defined when not needed.":
+ "RecordWriter not defined");
+ hasTask = true;
+ }
+
+ virtual void reduceKey(const string& _key) {
+ isNewKey = true;
+ newKey = &_key;
+ }
+
+ virtual void reduceValue(const string& _value) {
+ isNewValue = true;
+ value = &_value;
+ }
+
+ virtual bool isDone() {
+ return done;
+ }
+
+ virtual void close() {
+ done = true;
+ }
+
+ virtual void abort() {
+ throw Error("Aborted by driver");
+ }
+
+ void waitForTask() {
+ while (!done && !hasTask) {
+ protocol->nextEvent();
+ }
+ }
+
+ bool nextKey() {
+ if (reader == NULL) {
+ while (!isNewKey) {
+ nextValue();
+ if (done) {
+ return false;
+ }
+ }
+ key = *newKey;
+ } else {
+ if (!reader->next(key, const_cast<string&>(*value))) {
+ done = true;
+ return false;
+ }
+ progressFloat = reader->getProgress();
+ }
+ isNewKey = false;
+ if (mapper != NULL) {
+ mapper->map(*this);
+ } else {
+ reducer->reduce(*this);
+ }
+ return true;
+ }
+
+ /**
+ * Advance to the next value.
+ */
+ virtual bool nextValue() {
+ if (isNewKey || done) {
+ return false;
+ }
+ isNewValue = false;
+ progress();
+ protocol->nextEvent();
+ return isNewValue;
+ }
+
+ /**
+ * Get the JobConf for the current task.
+ */
+ virtual JobConf* getJobConf() {
+ return jobConf;
+ }
+
+ /**
+ * Get the current key.
+ * @return the current key or NULL if called before the first map or reduce
+ */
+ virtual const string& getInputKey() {
+ return key;
+ }
+
+ /**
+ * Get the current value.
+ * @return the current value or NULL if called before the first map or
+ * reduce
+ */
+ virtual const string& getInputValue() {
+ return *value;
+ }
+
+ /**
+ * Mark your task as having made progress without changing the status
+ * message.
+ */
+ virtual void progress() {
+ if (uplink != 0) {
+ uint64_t now = getCurrentMillis();
+ if (now - lastProgress > 1000) {
+ lastProgress = now;
+ uplink->progress(progressFloat);
+ if (statusSet) {
+ uplink->status(status);
+ statusSet = false;
+ }
+ }
+ }
+ }
+
+ /**
+ * Set the status message and call progress.
+ */
+ virtual void setStatus(const string& status) {
+ this->status = status;
+ statusSet = true;
+ progress();
+ }
+
+ /**
+ * Get the name of the key class of the input to this task.
+ */
+ virtual const string& getInputKeyClass() {
+ return *inputKeyClass;
+ }
+
+ /**
+ * Get the name of the value class of the input to this task.
+ */
+ virtual const string& getInputValueClass() {
+ return *inputValueClass;
+ }
+
+ /**
+ * Access the InputSplit of the mapper.
+ */
+ virtual const std::string& getInputSplit() {
+ return *inputSplit;
+ }
+
+ virtual void emit(const string& key, const string& value) {
+ progress();
+ if (writer != NULL) {
+ writer->emit(key, value);
+ } else if (partitioner != NULL) {
+ int part = partitioner->partition(key, numReduces);
+ uplink->partitionedOutput(part, key, value);
+ } else {
+ uplink->output(key, value);
+ }
+ }
+
+ void closeAll() {
+ if (reader) {
+ reader->close();
+ }
+ if (mapper) {
+ mapper->close();
+ }
+ if (reducer) {
+ reducer->close();
+ }
+ if (writer) {
+ writer->close();
+ }
+ }
+
+ virtual ~TaskContextImpl() {
+ delete jobConf;
+ delete inputKeyClass;
+ delete inputValueClass;
+ delete inputSplit;
+ if (reader) {
+ delete value;
+ }
+ delete reader;
+ delete mapper;
+ delete reducer;
+ delete writer;
+ delete partitioner;
+ }
+ };
+
+ /**
+ * Run the assigned task in the framework.
+ * The user's main function should set the various functions using the
+ * set* functions above and then call this.
+ * @return true, if the task succeeded.
+ */
+ bool runTask(const Factory& factory) {
+ try {
+ TaskContextImpl* context = new TaskContextImpl(factory);
+ Protocol* connection;
+ char* portStr = getenv("hadoop.pipes.command.port");
+ int sock = -1;
+ FILE* stream = NULL;
+ FILE* outStream = NULL;
+ if (portStr) {
+ sock = socket(PF_INET, SOCK_STREAM, 0);
+ HADOOP_ASSERT(sock != - 1,
+ string("problem creating socket: ") + strerror(errno));
+ sockaddr_in addr;
+ addr.sin_family = AF_INET;
+ addr.sin_port = htons(toInt(portStr));
+ addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+ HADOOP_ASSERT(connect(sock, (sockaddr*) &addr, sizeof(addr)) == 0,
+ string("problem connecting command socket: ") +
+ strerror(errno));
+ stream = fdopen(sock, "r");
+ outStream = fdopen(sock, "w");
+ connection = new BinaryProtocol(stream, context, outStream);
+ } else if (getenv("hadoop.pipes.command.file")) {
+ char* filename = getenv("hadoop.pipes.command.file");
+ string outFilename = filename;
+ outFilename += ".out";
+ stream = fopen(filename, "r");
+ outStream = fopen(outFilename.c_str(), "w");
+ connection = new BinaryProtocol(stream, context, outStream);
+ } else {
+ connection = new TextProtocol(stdin, context, stdout);
+ }
+ context->setProtocol(connection, connection->getUplink());
+ context->waitForTask();
+ while (!context->isDone()) {
+ context->nextKey();
+ }
+ context->closeAll();
+ connection->getUplink()->done();
+ delete context;
+ delete connection;
+ if (stream != NULL) {
+ fflush(stream);
+ }
+ if (outStream != NULL) {
+ fflush(outStream);
+ }
+ fflush(stdout);
+ if (sock != -1) {
+ int result = shutdown(sock, SHUT_RDWR);
+ HADOOP_ASSERT(result == 0, "problem shutting socket");
+ result = close(sock);
+ HADOOP_ASSERT(result == 0, "problem closing socket");
+ }
+ if (stream != NULL) {
+ //fclose(stream);
+ }
+ if (outStream != NULL) {
+ //fclose(outStream);
+ }
+ return true;
+ } catch (Error& err) {
+ fprintf(stderr, "Hadoop Pipes Exception: %s\n",
+ err.getMessage().c_str());
+ return false;
+ }
+ }
+}
+
Added: lucene/hadoop/trunk/src/c++/pipes/impl/config.h.in
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/c%2B%2B/pipes/impl/config.h.in?view=auto&rev=538693
==============================================================================
--- lucene/hadoop/trunk/src/c++/pipes/impl/config.h.in (added)
+++ lucene/hadoop/trunk/src/c++/pipes/impl/config.h.in Wed May 16 12:23:48 2007
@@ -0,0 +1,97 @@
+/* impl/config.h.in. Generated from configure.ac by autoheader. */
+
+/* Define to 1 if you have the declaration of `strerror_r', and to 0 if you
+ don't. */
+#undef HAVE_DECL_STRERROR_R
+
+/* Define to 1 if you have the <dlfcn.h> header file. */
+#undef HAVE_DLFCN_H
+
+/* Define to 1 if you have the <inttypes.h> header file. */
+#undef HAVE_INTTYPES_H
+
+/* Define to 1 if you have the <memory.h> header file. */
+#undef HAVE_MEMORY_H
+
+/* Define to 1 if you have the `mkdir' function. */
+#undef HAVE_MKDIR
+
+/* Define to 1 if stdbool.h conforms to C99. */
+#undef HAVE_STDBOOL_H
+
+/* Define to 1 if you have the <stdint.h> header file. */
+#undef HAVE_STDINT_H
+
+/* Define to 1 if you have the <stdlib.h> header file. */
+#undef HAVE_STDLIB_H
+
+/* Define to 1 if you have the `strerror_r' function. */
+#undef HAVE_STRERROR_R
+
+/* Define to 1 if you have the <strings.h> header file. */
+#undef HAVE_STRINGS_H
+
+/* Define to 1 if you have the <string.h> header file. */
+#undef HAVE_STRING_H
+
+/* Define to 1 if you have the <sys/stat.h> header file. */
+#undef HAVE_SYS_STAT_H
+
+/* Define to 1 if you have the <sys/types.h> header file. */
+#undef HAVE_SYS_TYPES_H
+
+/* Define to 1 if you have the `uname' function. */
+#undef HAVE_UNAME
+
+/* Define to 1 if you have the <unistd.h> header file. */
+#undef HAVE_UNISTD_H
+
+/* Define to 1 if the system has the type `_Bool'. */
+#undef HAVE__BOOL
+
+/* Name of package */
+#undef PACKAGE
+
+/* Define to the address where bug reports for this package should be sent. */
+#undef PACKAGE_BUGREPORT
+
+/* Define to the full name of this package. */
+#undef PACKAGE_NAME
+
+/* Define to the full name and version of this package. */
+#undef PACKAGE_STRING
+
+/* Define to the one symbol short name of this package. */
+#undef PACKAGE_TARNAME
+
+/* Define to the version of this package. */
+#undef PACKAGE_VERSION
+
+/* Define to 1 if you have the ANSI C header files. */
+#undef STDC_HEADERS
+
+/* Define to 1 if strerror_r returns char *. */
+#undef STRERROR_R_CHAR_P
+
+/* Version number of package */
+#undef VERSION
+
+/* Number of bits in a file offset, on hosts where this is settable. */
+#undef _FILE_OFFSET_BITS
+
+/* Enable GNU extensions on systems that have them. */
+#ifndef _GNU_SOURCE
+# undef _GNU_SOURCE
+#endif
+
+/* Define for large files, on AIX-style hosts. */
+#undef _LARGE_FILES
+
+/* Define to empty if `const' does not conform to ANSI C. */
+#undef const
+
+/* Define to `long' if <sys/types.h> does not define. */
+#undef off_t
+
+/* Define to `unsigned' if <sys/types.h> does not define. */
+#undef size_t
Added: lucene/hadoop/trunk/src/c++/pipes/install-sh
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/c%2B%2B/pipes/install-sh?view=auto&rev=538693
==============================================================================
--- lucene/hadoop/trunk/src/c++/pipes/install-sh (added)
+++ lucene/hadoop/trunk/src/c++/pipes/install-sh Wed May 16 12:23:48 2007
@@ -0,0 +1,322 @@
+#!/bin/sh
+# install - install a program, script, or datafile
+
+scriptversion=2004-07-05.00
+
+# This originates from X11R5 (mit/util/scripts/install.sh), which was
+# later released in X11R6 (xc/config/util/install.sh) with the
+# following copyright and license.
+#
+# Copyright (C) 1994 X Consortium
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in
+# all copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# X CONSORTIUM BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN
+# AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNEC-
+# TION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+#
+# Except as contained in this notice, the name of the X Consortium shall not
+# be used in advertising or otherwise to promote the sale, use or other deal-
+# ings in this Software without prior written authorization from the X Consor-
+# tium.
+#
+#
+# FSF changes to this file are in the public domain.
+#
+# Calling this script install-sh is preferred over install.sh, to prevent
+# `make' implicit rules from creating a file called install from it
+# when there is no Makefile.
+#
+# This script is compatible with the BSD install script, but was written
+# from scratch. It can only install one file at a time, a restriction
+# shared with many OS's install programs.
+
+# set DOITPROG to echo to test this script
+
+# Don't use :- since 4.3BSD and earlier shells don't like it.
+doit="${DOITPROG-}"
+
+# put in absolute paths if you don't have them in your path; or use env. vars.
+
+mvprog="${MVPROG-mv}"
+cpprog="${CPPROG-cp}"
+chmodprog="${CHMODPROG-chmod}"
+chownprog="${CHOWNPROG-chown}"
+chgrpprog="${CHGRPPROG-chgrp}"
+stripprog="${STRIPPROG-strip}"
+rmprog="${RMPROG-rm}"
+mkdirprog="${MKDIRPROG-mkdir}"
+
+chmodcmd="$chmodprog 0755"
+chowncmd=
+chgrpcmd=
+stripcmd=
+rmcmd="$rmprog -f"
+mvcmd="$mvprog"
+src=
+dst=
+dir_arg=
+dstarg=
+no_target_directory=
+
+usage="Usage: $0 [OPTION]... [-T] SRCFILE DSTFILE
+ or: $0 [OPTION]... SRCFILES... DIRECTORY
+ or: $0 [OPTION]... -t DIRECTORY SRCFILES...
+ or: $0 [OPTION]... -d DIRECTORIES...
+
+In the 1st form, copy SRCFILE to DSTFILE.
+In the 2nd and 3rd, copy all SRCFILES to DIRECTORY.
+In the 4th, create DIRECTORIES.
+
+Options:
+-c (ignored)
+-d create directories instead of installing files.
+-g GROUP $chgrpprog installed files to GROUP.
+-m MODE $chmodprog installed files to MODE.
+-o USER $chownprog installed files to USER.
+-s $stripprog installed files.
+-t DIRECTORY install into DIRECTORY.
+-T report an error if DSTFILE is a directory.
+--help display this help and exit.
+--version display version info and exit.
+
+Environment variables override the default commands:
+ CHGRPPROG CHMODPROG CHOWNPROG CPPROG MKDIRPROG MVPROG RMPROG STRIPPROG
+"
+
+while test -n "$1"; do
+ case $1 in
+ -c) shift
+ continue;;
+
+ -d) dir_arg=true
+ shift
+ continue;;
+
+ -g) chgrpcmd="$chgrpprog $2"
+ shift
+ shift
+ continue;;
+
+ --help) echo "$usage"; exit 0;;
+
+ -m) chmodcmd="$chmodprog $2"
+ shift
+ shift
+ continue;;
+
+ -o) chowncmd="$chownprog $2"
+ shift
+ shift
+ continue;;
+
+ -s) stripcmd=$stripprog
+ shift
+ continue;;
+
+ -t) dstarg=$2
+ shift
+ shift
+ continue;;
+
+ -T) no_target_directory=true
+ shift
+ continue;;
+
+ --version) echo "$0 $scriptversion"; exit 0;;
+
+ *) # When -d is used, all remaining arguments are directories to create.
+ # When -t is used, the destination is already specified.
+ test -n "$dir_arg$dstarg" && break
+ # Otherwise, the last argument is the destination. Remove it from $@.
+ for arg
+ do
+ if test -n "$dstarg"; then
+ # $@ is not empty: it contains at least $arg.
+ set fnord "$@" "$dstarg"
+ shift # fnord
+ fi
+ shift # arg
+ dstarg=$arg
+ done
+ break;;
+ esac
+done
+
+if test -z "$1"; then
+ if test -z "$dir_arg"; then
+ echo "$0: no input file specified." >&2
+ exit 1
+ fi
+ # It's OK to call `install-sh -d' without argument.
+ # This can happen when creating conditional directories.
+ exit 0
+fi
+
+for src
+do
+ # Protect names starting with `-'.
+ case $src in
+ -*) src=./$src ;;
+ esac
+
+ if test -n "$dir_arg"; then
+ dst=$src
+ src=
+
+ if test -d "$dst"; then
+ mkdircmd=:
+ chmodcmd=
+ else
+ mkdircmd=$mkdirprog
+ fi
+ else
+ # Waiting for this to be detected by the "$cpprog $src $dsttmp" command
+ # might cause directories to be created, which would be especially bad
+ # if $src (and thus $dsttmp) contains '*'.
+ if test ! -f "$src" && test ! -d "$src"; then
+ echo "$0: $src does not exist." >&2
+ exit 1
+ fi
+
+ if test -z "$dstarg"; then
+ echo "$0: no destination specified." >&2
+ exit 1
+ fi
+
+ dst=$dstarg
+ # Protect names starting with `-'.
+ case $dst in
+ -*) dst=./$dst ;;
+ esac
+
+ # If destination is a directory, append the input filename; won't work
+ # if double slashes aren't ignored.
+ if test -d "$dst"; then
+ if test -n "$no_target_directory"; then
+ echo "$0: $dstarg: Is a directory" >&2
+ exit 1
+ fi
+ dst=$dst/`basename "$src"`
+ fi
+ fi
+
+ # This sed command emulates the dirname command.
+ dstdir=`echo "$dst" | sed -e 's,[^/]*$,,;s,/$,,;s,^$,.,'`
+
+ # Make sure that the destination directory exists.
+
+ # Skip lots of stat calls in the usual case.
+ if test ! -d "$dstdir"; then
+ defaultIFS='
+ '
+ IFS="${IFS-$defaultIFS}"
+
+ oIFS=$IFS
+ # Some sh's can't handle IFS=/ for some reason.
+ IFS='%'
+ set - `echo "$dstdir" | sed -e 's@/@%@g' -e 's@^%@/@'`
+ IFS=$oIFS
+
+ pathcomp=
+
+ while test $# -ne 0 ; do
+ pathcomp=$pathcomp$1
+ shift
+ if test ! -d "$pathcomp"; then
+ $mkdirprog "$pathcomp"
+ # mkdir can fail with a `File exist' error in case several
+ # install-sh are creating the directory concurrently. This
+ # is OK.
+ test -d "$pathcomp" || exit
+ fi
+ pathcomp=$pathcomp/
+ done
+ fi
+
+ if test -n "$dir_arg"; then
+ $doit $mkdircmd "$dst" \
+ && { test -z "$chowncmd" || $doit $chowncmd "$dst"; } \
+ && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dst"; } \
+ && { test -z "$stripcmd" || $doit $stripcmd "$dst"; } \
+ && { test -z "$chmodcmd" || $doit $chmodcmd "$dst"; }
+
+ else
+ dstfile=`basename "$dst"`
+
+ # Make a couple of temp file names in the proper directory.
+ dsttmp=$dstdir/_inst.$$_
+ rmtmp=$dstdir/_rm.$$_
+
+ # Trap to clean up those temp files at exit.
+ trap 'status=$?; rm -f "$dsttmp" "$rmtmp" && exit $status' 0
+ trap '(exit $?); exit' 1 2 13 15
+
+ # Copy the file name to the temp name.
+ $doit $cpprog "$src" "$dsttmp" &&
+
+ # and set any options; do chmod last to preserve setuid bits.
+ #
+ # If any of these fail, we abort the whole thing. If we want to
+ # ignore errors from any of these, just make sure not to ignore
+ # errors from the above "$doit $cpprog $src $dsttmp" command.
+ #
+ { test -z "$chowncmd" || $doit $chowncmd "$dsttmp"; } \
+ && { test -z "$chgrpcmd" || $doit $chgrpcmd "$dsttmp"; } \
+ && { test -z "$stripcmd" || $doit $stripcmd "$dsttmp"; } \
+ && { test -z "$chmodcmd" || $doit $chmodcmd "$dsttmp"; } &&
+
+ # Now rename the file to the real destination.
+ { $doit $mvcmd -f "$dsttmp" "$dstdir/$dstfile" 2>/dev/null \
+ || {
+ # The rename failed, perhaps because mv can't rename something else
+ # to itself, or perhaps because mv is so ancient that it does not
+ # support -f.
+
+ # Now remove or move aside any old file at destination location.
+ # We try this two ways since rm can't unlink itself on some
+ # systems and the destination file might be busy for other
+ # reasons. In this case, the final cleanup might fail but the new
+ # file should still install successfully.
+ {
+ if test -f "$dstdir/$dstfile"; then
+ $doit $rmcmd -f "$dstdir/$dstfile" 2>/dev/null \
+ || $doit $mvcmd -f "$dstdir/$dstfile" "$rmtmp" 2>/dev/null \
+ || {
+ echo "$0: cannot unlink or rename $dstdir/$dstfile" >&2
+ (exit 1); exit
+ }
+ else
+ :
+ fi
+ } &&
+
+ # Now rename the file to the real destination.
+ $doit $mvcmd "$dsttmp" "$dstdir/$dstfile"
+ }
+ }
+ fi || { (exit 1); exit; }
+done
+
+# The final little trick to "correctly" pass the exit status to the exit trap.
+{
+ (exit 0); exit
+}
+
+# Local variables:
+# eval: (add-hook 'write-file-hooks 'time-stamp)
+# time-stamp-start: "scriptversion="
+# time-stamp-format: "%:y-%02m-%02d.%02H"
+# time-stamp-end: "$"
+# End: