You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2006/05/04 04:04:07 UTC
svn commit: r399509 [1/5] - in /lucene/hadoop/trunk: ./ bin/ src/c++/ src/c++/librecordio/ src/c++/librecordio/test/ src/java/org/apache/hadoop/record/ src/java/org/apache/hadoop/record/compiler/ src/java/org/apache/hadoop/record/compiler/generated/ sr...

Author: cutting
Date: Wed May  3 19:04:01 2006
New Revision: 399509

URL: http://svn.apache.org/viewcvs?rev=399509&view=rev
Log:
HADOOP-65.  Initial version of multi-language record system.  Contributed by Milind Bhandarkar.

Added:
    lucene/hadoop/trunk/bin/rcc   (with props)
    lucene/hadoop/trunk/src/c++/
    lucene/hadoop/trunk/src/c++/librecordio/
    lucene/hadoop/trunk/src/c++/librecordio/Makefile
    lucene/hadoop/trunk/src/c++/librecordio/archive.hh
    lucene/hadoop/trunk/src/c++/librecordio/binarchive.cc
    lucene/hadoop/trunk/src/c++/librecordio/binarchive.hh
    lucene/hadoop/trunk/src/c++/librecordio/csvarchive.cc
    lucene/hadoop/trunk/src/c++/librecordio/csvarchive.hh
    lucene/hadoop/trunk/src/c++/librecordio/exception.cc
    lucene/hadoop/trunk/src/c++/librecordio/exception.hh
    lucene/hadoop/trunk/src/c++/librecordio/filestream.cc
    lucene/hadoop/trunk/src/c++/librecordio/filestream.hh
    lucene/hadoop/trunk/src/c++/librecordio/recordio.cc
    lucene/hadoop/trunk/src/c++/librecordio/recordio.hh
    lucene/hadoop/trunk/src/c++/librecordio/test/
    lucene/hadoop/trunk/src/c++/librecordio/test/Makefile
    lucene/hadoop/trunk/src/c++/librecordio/test/test.cc
    lucene/hadoop/trunk/src/c++/librecordio/test/test.hh
    lucene/hadoop/trunk/src/c++/librecordio/test/test.jr
    lucene/hadoop/trunk/src/c++/librecordio/test/testFromJava.cc
    lucene/hadoop/trunk/src/c++/librecordio/test/testFromJava.hh
    lucene/hadoop/trunk/src/c++/librecordio/xmlarchive.cc
    lucene/hadoop/trunk/src/c++/librecordio/xmlarchive.hh
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/BinaryInputArchive.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/BinaryOutputArchive.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/CsvInputArchive.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/CsvOutputArchive.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/Index.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/InputArchive.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/OutputArchive.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/Record.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/RecordReader.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/RecordWriter.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/Utils.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/XmlInputArchive.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/XmlOutputArchive.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/CppGenerator.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JBoolean.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JBuffer.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JByte.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JCompType.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JDouble.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JField.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JFile.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JFloat.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JInt.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JLong.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JMap.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JRecord.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JString.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JType.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JVector.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/JavaGenerator.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/ParseException.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/Rcc.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/RccConstants.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/RccTokenManager.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/SimpleCharStream.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/Token.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/TokenMgrError.java
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/compiler/generated/rcc.jj
    lucene/hadoop/trunk/src/java/org/apache/hadoop/record/package.html
    lucene/hadoop/trunk/src/test/ddl/
    lucene/hadoop/trunk/src/test/ddl/buffer.jr
    lucene/hadoop/trunk/src/test/ddl/int.jr
    lucene/hadoop/trunk/src/test/ddl/links.jr
    lucene/hadoop/trunk/src/test/ddl/location.jr
    lucene/hadoop/trunk/src/test/ddl/string.jr
    lucene/hadoop/trunk/src/test/ddl/test.jr
    lucene/hadoop/trunk/src/test/org/apache/hadoop/record/
    lucene/hadoop/trunk/src/test/org/apache/hadoop/record/test/
    lucene/hadoop/trunk/src/test/org/apache/hadoop/record/test/FromCpp.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/record/test/RecBuffer.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/record/test/RecInt.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/record/test/RecRecord0.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/record/test/RecRecord1.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/record/test/RecString.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/record/test/TestMapRed.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/record/test/TestRecordIO.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/record/test/TestWritable.java
    lucene/hadoop/trunk/src/test/org/apache/hadoop/record/test/ToCpp.java
Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/build.xml

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/CHANGES.txt?rev=399509&r1=399508&r2=399509&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed May  3 19:04:01 2006
@@ -171,6 +171,12 @@
     correctly handle job jar files that contain a lib directory with
     nested jar files.  (cutting)
 
+45. HADOOP-65.  Initial version of record I/O framework that enables
+    the specification of record types and generates marshalling code
+    in both Java and C++.  Generated Java code implements
+    WritableComparable, but is not yet otherwise used by
+    Hadoop. (Milind Bhandarkar via cutting)
+
 
 Release 0.1.1 - 2006-04-08
 

Added: lucene/hadoop/trunk/bin/rcc
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/bin/rcc?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/bin/rcc (added)
+++ lucene/hadoop/trunk/bin/rcc Wed May  3 19:04:01 2006
@@ -0,0 +1,97 @@
+#!/bin/bash
+# 
+# The Hadoop record compiler
+#
+# Environment Variables
+#
+#   JAVA_HOME        The java implementation to use.  Overrides JAVA_HOME.
+#
+#   HADOOP_OPTS      Extra Java runtime options.
+#
+#   HADOOP_CONF_DIR  Alternate conf dir. Default is ${HADOOP_HOME}/conf.
+#
+
+# resolve links - $0 may be a softlink
+THIS="$0"
+while [ -h "$THIS" ]; do
+  ls=`ls -ld "$THIS"`
+  link=`expr "$ls" : '.*-> \(.*\)$'`
+  if expr "$link" : '.*/.*' > /dev/null; then
+    THIS="$link"
+  else
+    THIS=`dirname "$THIS"`/"$link"
+  fi
+done
+
+# some directories
+THIS_DIR=`dirname "$THIS"`
+HADOOP_HOME=`cd "$THIS_DIR/.." ; pwd`
+
+# Allow alternate conf dir location.
+HADOOP_CONF_DIR="${HADOOP_CONF_DIR:-$HADOOP_HOME/conf}"
+
+if [ -f "${HADOOP_CONF_DIR}/hadoop-env.sh" ]; then
+  source "${HADOOP_CONF_DIR}/hadoop-env.sh"
+fi
+
+# some Java parameters
+if [ "$JAVA_HOME" != "" ]; then
+  #echo "run java in $JAVA_HOME"
+  JAVA_HOME=$JAVA_HOME
+fi
+  
+if [ "$JAVA_HOME" = "" ]; then
+  echo "Error: JAVA_HOME is not set."
+  exit 1
+fi
+
+JAVA=$JAVA_HOME/bin/java
+JAVA_HEAP_MAX=-Xmx1000m 
+
+# CLASSPATH initially contains $HADOOP_CONF_DIR
+CLASSPATH="${HADOOP_CONF_DIR}"
+CLASSPATH=${CLASSPATH}:$JAVA_HOME/lib/tools.jar
+
+# for developers, add Hadoop classes to CLASSPATH
+if [ -d "$HADOOP_HOME/build/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/classes
+fi
+if [ -d "$HADOOP_HOME/build/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build
+fi
+if [ -d "$HADOOP_HOME/build/test/classes" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME/build/test/classes
+fi
+
+# so that filenames w/ spaces are handled correctly in loops below
+IFS=
+
+# for releases, add hadoop jars & webapps to CLASSPATH
+if [ -d "$HADOOP_HOME/webapps" ]; then
+  CLASSPATH=${CLASSPATH}:$HADOOP_HOME
+fi
+for f in $HADOOP_HOME/hadoop-*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# add libs to CLASSPATH
+for f in $HADOOP_HOME/lib/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+for f in $HADOOP_HOME/lib/jetty-ext/*.jar; do
+  CLASSPATH=${CLASSPATH}:$f;
+done
+
+# restore ordinary behaviour
+unset IFS
+
+CLASS='org.apache.hadoop.record.compiler.generated.Rcc'
+
+# cygwin path translation
+if expr `uname` : 'CYGWIN*' > /dev/null; then
+  CLASSPATH=`cygpath -p -w "$CLASSPATH"`
+fi
+
+# run it
+exec "$JAVA" $HADOOP_OPTS -classpath "$CLASSPATH" $CLASS "$@"

Propchange: lucene/hadoop/trunk/bin/rcc
------------------------------------------------------------------------------
    svn:executable = *

Modified: lucene/hadoop/trunk/build.xml
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/build.xml?rev=399509&r1=399508&r2=399509&view=diff
==============================================================================
--- lucene/hadoop/trunk/build.xml (original)
+++ lucene/hadoop/trunk/build.xml Wed May  3 19:04:01 2006
@@ -108,7 +108,15 @@
       <classpath refid="classpath"/>
   </taskdef>
 
-  <target name="compile" depends="init">
+  <target name="record-parser" depends="init" if="javacc.home">
+      <javacc
+          target="${src.dir}/org/apache/hadoop/record/compiler/generated/rcc.jj"
+          outputdirectory="${src.dir}/org/apache/hadoop/record/compiler/generated"
+          javacchome="${javacc.home}"
+      />
+  </target>
+
+  <target name="compile" depends="init, record-parser">
 
     <jsp-compile
      uriroot="${src.webapps}/mapred"

Added: lucene/hadoop/trunk/src/c++/librecordio/Makefile
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/Makefile?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/Makefile (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/Makefile Wed May  3 19:04:01 2006
@@ -0,0 +1,52 @@
+#
+# Copyright 2005 The Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+all: librecordio.a test
+
+librecordio.a: recordio.o filestream.o binarchive.o csvarchive.o xmlarchive.o exception.o
+	ar cru librecordio.a recordio.o filestream.o binarchive.o csvarchive.o xmlarchive.o exception.o
+
+recordio.o: recordio.cc
+	g++ -g3 -O0 -c -I${XERCESCROOT}/include -o recordio.o recordio.cc
+	
+filestream.o: filestream.cc
+	g++ -g3 -O0 -c -o filestream.o filestream.cc
+	
+binarchive.o: binarchive.cc
+	g++ -g3 -O0 -c -o binarchive.o binarchive.cc
+
+csvarchive.o: csvarchive.cc
+	g++ -g3 -O0 -c -o csvarchive.o csvarchive.cc
+
+xmlarchive.o: xmlarchive.cc
+	g++ -g3 -O0 -c -I${XERCESCROOT}/include -o xmlarchive.o xmlarchive.cc
+		
+exception.o: exception.cc
+	g++ -g3 -O0 -c -o exception.o exception.cc
+	
+recordio.cc: recordio.hh archive.hh exception.hh
+filestream.cc: recordio.hh filestream.hh 
+binarchive.cc: recordio.hh binarchive.hh 
+csvarchive.cc: recordio.hh csvarchive.hh 
+xmlarchive.cc: recordio.hh xmlarchive.hh 
+exception.cc: exception.hh 
+
+test: librecordio.a
+	make -C test all
+	
+clean:
+	rm -f *~ *.o *.a
+	make -C test clean

Added: lucene/hadoop/trunk/src/c++/librecordio/archive.hh
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/archive.hh?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/archive.hh (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/archive.hh Wed May  3 19:04:01 2006
@@ -0,0 +1,118 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef ARCHIVE_HH_
+#define ARCHIVE_HH_
+#include "recordio.hh"
+
+namespace hadoop {
+
+class Index {
+public:
+  virtual bool done() = 0;
+  virtual void incr() = 0;
+  virtual ~Index() {}
+};
+
+class IArchive {
+public:
+  virtual void deserialize(int8_t& t, const char* tag) = 0;
+  virtual void deserialize(bool& t, const char* tag) = 0;
+  virtual void deserialize(int32_t& t, const char* tag) = 0;
+  virtual void deserialize(int64_t& t, const char* tag) = 0;
+  virtual void deserialize(float& t, const char* tag) = 0;
+  virtual void deserialize(double& t, const char* tag) = 0;
+  virtual void deserialize(std::string& t, const char* tag) = 0;
+  virtual void deserialize(std::string& t, size_t& len, const char* tag) = 0;
+  virtual void startRecord(hadoop::Record& s, const char* tag) = 0;
+  virtual void endRecord(hadoop::Record& s, const char* tag) = 0;
+  virtual Index* startVector(const char* tag) = 0;
+  virtual void endVector(Index* idx, const char* tag) = 0;
+  virtual Index* startMap(const char* tag) = 0;
+  virtual void endMap(Index* idx, const char* tag) = 0;
+  virtual void deserialize(hadoop::Record& s, const char* tag) {
+    s.deserialize(*this, tag);
+  }
+  template <typename T>
+  void deserialize(std::vector<T>& v, const char* tag) {
+    Index* idx = startVector(tag);
+    while (!idx->done()) {
+      T t;
+      deserialize(t, tag);
+      v.push_back(t);
+      idx->incr();
+    }
+    endVector(idx, tag);
+  }
+  template <typename K, typename V>
+  void deserialize(std::map<K,V>& v, const char* tag) {
+    Index* idx = startMap(tag);
+    while (!idx->done()) {
+      K key;
+      deserialize(key, tag);
+      V value;
+      deserialize(value, tag);
+      v[key] = value;
+      idx->incr();
+    }
+    endMap(idx, tag);
+  }
+};
+
+class OArchive {
+public:
+  virtual void serialize(int8_t t, const char* tag) = 0;
+  virtual void serialize(bool t, const char* tag) = 0;
+  virtual void serialize(int32_t t, const char* tag) = 0;
+  virtual void serialize(int64_t t, const char* tag) = 0;
+  virtual void serialize(float t, const char* tag) = 0;
+  virtual void serialize(double t, const char* tag) = 0;
+  virtual void serialize(const std::string& t, const char* tag) = 0;
+  virtual void serialize(const std::string& t, size_t len, const char* tag) = 0;
+  virtual void startRecord(const hadoop::Record& s, const char* tag) = 0;
+  virtual void endRecord(const hadoop::Record& s, const char* tag) = 0;
+  virtual void startVector(size_t len, const char* tag) = 0;
+  virtual void endVector(size_t len, const char* tag) = 0;
+  virtual void startMap(size_t len, const char* tag) = 0;
+  virtual void endMap(size_t len, const char* tag) = 0;
+  virtual void serialize(hadoop::Record& s, const char* tag) {
+    s.serialize(*this, tag);
+  }
+  template <typename T>
+  void serialize(const std::vector<T>& v, const char* tag) {
+    startVector(v.size(), tag);
+    if (v.size()>0) {
+      for (size_t cur = 0; cur<v.size(); cur++) {
+        serialize(v[cur], tag);
+      }
+    }
+    endVector(v.size(), tag);
+  }
+  template <typename K, typename V>
+  void serialize(const std::map<K,V>& v, const char* tag) {
+    startMap(v.size(), tag);
+    if (v.size()>0) {
+      typedef typename std::map<K,V>::const_iterator CI;
+      for (CI cur = v.begin(); cur!=v.end(); cur++) {
+        serialize(cur->first, tag);
+        serialize(cur->second, tag);
+      }
+    }
+    endMap(v.size(), tag);
+ }
+};
+}; // end namespace hadoop
+#endif /*ARCHIVE_HH_*/

Added: lucene/hadoop/trunk/src/c++/librecordio/binarchive.cc
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/binarchive.cc?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/binarchive.cc (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/binarchive.cc Wed May  3 19:04:01 2006
@@ -0,0 +1,349 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "binarchive.hh"
+
+using namespace hadoop;
+
+template <typename T>
+static void serialize(T t, OutStream& stream)
+{
+  if (sizeof(T) != stream.write((const void *) &t, sizeof(T))) {
+    throw new IOException("Error serializing data.");
+  }
+}
+
+template <typename T>
+static void deserialize(T& t, InStream& stream)
+{
+  if (sizeof(T) != stream.read((void *) &t, sizeof(T))) {
+    throw new IOException("Error deserializing data.");
+  }
+}
+
+static void serializeInt(int32_t t, OutStream& stream)
+{
+  if (t >= -120 && t <= 127) {
+    int8_t b = t;
+    stream.write(&b, 1);
+    return;
+  }
+        
+  int8_t len = -120;
+  if (t < 0) {
+    t &= 0x7FFFFFFF; // reset the sign bit
+    len = -124;
+  }
+        
+  uint32_t tmp = t;
+  while (tmp != 0) {
+    tmp = tmp >> 8;
+    len--;
+  }
+  
+  stream.write(&len, 1);      
+  len = (len < -124) ? -(len + 124) : -(len + 120);
+        
+  for (uint32_t idx = len; idx != 0; idx--) {
+    uint32_t shiftbits = (idx - 1) * 8;
+    uint32_t mask = 0xFF << shiftbits;
+    uint8_t b = (t & mask) >> shiftbits;
+    stream.write(&b, 1);
+  }
+}
+
+static void deserializeInt(int32_t& t, InStream& stream)
+{
+  int8_t b;
+  if (1 != stream.read(&b, 1)) {
+    throw new IOException("Error deserializing int");
+  }
+  if (b >= -120) {
+    t = b;
+    return;
+  }
+  b = (b < -124) ? -(b + 124) : -(b + 120);
+  uint8_t barr[b];
+  if (b != stream.read(barr, b)) {
+    throw new IOException("Error deserializing long");
+  }
+  t = 0;
+  for (int idx = 0; idx < b; idx++) {
+    t = t << 8;
+    t |= (barr[idx] & 0xFF);
+  }
+}
+
+static void serializeLong(int64_t t, OutStream& stream)
+{
+  if (t >= -112 && t <= 127) {
+    int8_t b = t;
+    stream.write(&b, 1);
+    return;
+  }
+        
+  int8_t len = -112;
+  if (t < 0) {
+    t &= 0x7FFFFFFFFFFFFFFFLL; // reset the sign bit
+    len = -120;
+  }
+        
+  uint64_t tmp = t;
+  while (tmp != 0) {
+    tmp = tmp >> 8;
+    len--;
+  }
+  
+  stream.write(&len, 1);
+        
+  len = (len < -120) ? -(len + 120) : -(len + 112);
+        
+  for (uint32_t idx = len; idx != 0; idx--) {
+    uint32_t shiftbits = (idx - 1) * 8;
+    uint64_t mask = 0xFFLL << shiftbits;
+    uint8_t b = (t & mask) >> shiftbits;
+    stream.write(&b, 1);
+  }
+}
+
+static void deserializeLong(int64_t& t, InStream& stream)
+{
+  int8_t b;
+  if (1 != stream.read(&b, 1)) {
+    throw new IOException("Error deserializing long.");
+  }
+  if (b >= -112) {
+    t = b;
+    return;
+  }
+  b = (b < -120) ? -(b + 120) : -(b + 112);
+  uint8_t barr[b];
+  if (b != stream.read(barr, b)) {
+    throw new IOException("Error deserializing long.");
+  }
+  t = 0;
+  for (int idx = 0; idx < b; idx++) {
+    t = t << 8;
+    t |= (barr[idx] & 0xFF);
+  }
+}
+
+static void serializeFloat(float t, OutStream& stream)
+{
+  char buf[sizeof(float)];
+  XDR xdrs;
+  xdrmem_create(&xdrs, buf, sizeof(float), XDR_ENCODE);
+  xdr_float(&xdrs, &t);
+  stream.write(buf, sizeof(float));
+}
+
+static void deserializeFloat(float& t, InStream& stream)
+{
+  char buf[sizeof(float)];
+  if (sizeof(float) != stream.read(buf, sizeof(float))) {
+    throw new IOException("Error deserializing float.");
+  }
+  XDR xdrs;
+  xdrmem_create(&xdrs, buf, sizeof(float), XDR_DECODE);
+  xdr_float(&xdrs, &t);
+}
+
+static void serializeDouble(double t, OutStream& stream)
+{
+  char buf[sizeof(double)];
+  XDR xdrs;
+  xdrmem_create(&xdrs, buf, sizeof(double), XDR_ENCODE);
+  xdr_double(&xdrs, &t);
+  stream.write(buf, sizeof(double));
+}
+
+static void deserializeDouble(double& t, InStream& stream)
+{
+  char buf[sizeof(double)];
+  stream.read(buf, sizeof(double));
+  XDR xdrs;
+  xdrmem_create(&xdrs, buf, sizeof(double), XDR_DECODE);
+  xdr_double(&xdrs, &t);
+}
+
+static void serializeString(const std::string& t, OutStream& stream)
+{
+  ::serializeInt(t.length(), stream);
+  if (t.length() > 0) {
+    stream.write(t.data(), t.length());
+  }
+}
+
+static void deserializeString(std::string& t, InStream& stream)
+{
+  int32_t len = 0;
+  ::deserializeInt(len, stream);
+  if (len > 0) {
+    char buf[len];
+    stream.read((void*) buf, len);
+    std::string s(buf, len);
+    t = s;
+  }
+}
+
+void hadoop::IBinArchive::deserialize(int8_t& t, const char* tag)
+{
+  ::deserialize(t, stream);
+}
+
+void hadoop::IBinArchive::deserialize(bool& t, const char* tag)
+{
+  ::deserialize(t, stream);
+}
+
+void hadoop::IBinArchive::deserialize(int32_t& t, const char* tag)
+{
+  ::deserializeInt(t, stream);
+}
+
+void hadoop::IBinArchive::deserialize(int64_t& t, const char* tag)
+{
+  ::deserializeLong(t, stream);
+}
+
+void hadoop::IBinArchive::deserialize(float& t, const char* tag)
+{
+  ::deserializeFloat(t, stream);
+}
+
+void hadoop::IBinArchive::deserialize(double& t, const char* tag)
+{
+  ::deserializeDouble(t, stream);
+}
+
+void hadoop::IBinArchive::deserialize(std::string& t, const char* tag)
+{
+  ::deserializeString(t, stream);
+}
+
+void hadoop::IBinArchive::deserialize(std::string& t, size_t& len, const char* tag)
+{
+  ::deserializeString(t, stream);
+  len = t.length();
+}
+
+void hadoop::IBinArchive::startRecord(Record& s, const char* tag)
+{
+}
+
+void hadoop::IBinArchive::endRecord(Record& s, const char* tag)
+{
+}
+
+Index* hadoop::IBinArchive::startVector(const char* tag)
+{
+  int32_t len;
+  ::deserializeInt(len, stream);
+  BinIndex *idx = new BinIndex((size_t) len);
+  return idx;
+}
+
+void hadoop::IBinArchive::endVector(Index* idx, const char* tag)
+{
+  delete idx;
+}
+
+Index* hadoop::IBinArchive::startMap(const char* tag)
+{
+  int32_t len;
+  ::deserializeInt(len, stream);
+  BinIndex *idx = new BinIndex((size_t) len);
+  return idx;
+}
+
+void hadoop::IBinArchive::endMap(Index* idx, const char* tag)
+{
+  delete idx;
+}
+
+hadoop::IBinArchive::~IBinArchive()
+{
+}
+
+void hadoop::OBinArchive::serialize(int8_t t, const char* tag)
+{
+  ::serialize(t, stream);
+}
+
+void hadoop::OBinArchive::serialize(bool t, const char* tag)
+{
+  ::serialize(t, stream);
+}
+
+void hadoop::OBinArchive::serialize(int32_t t, const char* tag)
+{
+  ::serializeInt(t, stream);
+}
+
+void hadoop::OBinArchive::serialize(int64_t t, const char* tag)
+{
+  ::serializeLong(t, stream);
+}
+
+void hadoop::OBinArchive::serialize(float t, const char* tag)
+{
+  ::serializeFloat(t, stream);
+}
+
+void hadoop::OBinArchive::serialize(double t, const char* tag)
+{
+  ::serializeDouble(t, stream);
+}
+
+void hadoop::OBinArchive::serialize(const std::string& t, const char* tag)
+{
+  ::serializeString(t, stream);
+}
+
+void hadoop::OBinArchive::serialize(const std::string& t, size_t len, const char* tag)
+{
+  ::serializeString(t, stream);
+}
+
+void hadoop::OBinArchive::startRecord(const Record& s, const char* tag)
+{
+}
+
+void hadoop::OBinArchive::endRecord(const Record& s, const char* tag)
+{
+}
+
+void hadoop::OBinArchive::startVector(size_t len, const char* tag)
+{
+  ::serializeInt(len, stream);
+}
+
+void hadoop::OBinArchive::endVector(size_t len, const char* tag)
+{
+}
+
+void hadoop::OBinArchive::startMap(size_t len, const char* tag)
+{
+  ::serializeInt(len, stream);
+}
+
+void hadoop::OBinArchive::endMap(size_t len, const char* tag)
+{
+}
+
+hadoop::OBinArchive::~OBinArchive()
+{
+}

Added: lucene/hadoop/trunk/src/c++/librecordio/binarchive.hh
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/binarchive.hh?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/binarchive.hh (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/binarchive.hh Wed May  3 19:04:01 2006
@@ -0,0 +1,80 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef BINARCHIVE_HH_
+#define BINARCHIVE_HH_
+
+#include "recordio.hh"
+#include <rpc/xdr.h>
+
+namespace hadoop {
+
+class BinIndex : public Index {
+private:
+  size_t size;
+public:
+  BinIndex(size_t size_) { size = size_; }
+  bool done() { return (size==0); }
+  void incr() { size--; }
+  ~BinIndex() {}
+};
+  
+class IBinArchive : public IArchive {
+private:
+  InStream& stream;
+public:
+  IBinArchive(InStream& _stream) : stream(_stream) {}
+  virtual void deserialize(int8_t& t, const char* tag);
+  virtual void deserialize(bool& t, const char* tag);
+  virtual void deserialize(int32_t& t, const char* tag);
+  virtual void deserialize(int64_t& t, const char* tag);
+  virtual void deserialize(float& t, const char* tag);
+  virtual void deserialize(double& t, const char* tag);
+  virtual void deserialize(std::string& t, const char* tag);
+  virtual void deserialize(std::string& t, size_t& len, const char* tag);
+  virtual void startRecord(Record& s, const char* tag);
+  virtual void endRecord(Record& s, const char* tag);
+  virtual Index* startVector(const char* tag);
+  virtual void endVector(Index* idx, const char* tag);
+  virtual Index* startMap(const char* tag);
+  virtual void endMap(Index* idx, const char* tag);
+  virtual ~IBinArchive();
+};
+
+class OBinArchive : public OArchive {
+private:
+  OutStream& stream;
+public:
+  OBinArchive(OutStream& _stream) : stream(_stream) {}
+  virtual void serialize(int8_t t, const char* tag);
+  virtual void serialize(bool t, const char* tag);
+  virtual void serialize(int32_t t, const char* tag);
+  virtual void serialize(int64_t t, const char* tag);
+  virtual void serialize(float t, const char* tag);
+  virtual void serialize(double t, const char* tag);
+  virtual void serialize(const std::string& t, const char* tag);
+  virtual void serialize(const std::string& t, size_t len, const char* tag);
+  virtual void startRecord(const Record& s, const char* tag);
+  virtual void endRecord(const Record& s, const char* tag);
+  virtual void startVector(size_t len, const char* tag);
+  virtual void endVector(size_t len, const char* tag);
+  virtual void startMap(size_t len, const char* tag);
+  virtual void endMap(size_t len, const char* tag);
+  virtual ~OBinArchive();
+};
+
+}
+#endif /*BINARCHIVE_HH_*/

Added: lucene/hadoop/trunk/src/c++/librecordio/csvarchive.cc
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/csvarchive.cc?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/csvarchive.cc (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/csvarchive.cc Wed May  3 19:04:01 2006
@@ -0,0 +1,355 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "csvarchive.hh"
+#include <stdlib.h>
+
+using namespace hadoop;
+
+static std::string readUptoTerminator(PushBackInStream& stream)
+{
+  std::string s;
+  while (1) {
+    char c;
+    if (1 != stream.read(&c, 1)) {
+      throw new IOException("Error in deserialization.");
+    }
+    if (c == ',' || c == '\n' || c == '}') {
+      if (c != ',') {
+        stream.pushBack(c);
+      }
+      break;
+    }
+    s.push_back(c);
+  }
+  return s;
+}
+
+void hadoop::ICsvArchive::deserialize(int8_t& t, const char* tag)
+{
+  std::string s = readUptoTerminator(stream);
+  t = (int8_t) strtol(s.c_str(), NULL, 10);
+}
+
+void hadoop::ICsvArchive::deserialize(bool& t, const char* tag)
+{
+  std::string s = readUptoTerminator(stream);
+  t = (s == "T") ? true : false;
+}
+
+void hadoop::ICsvArchive::deserialize(int32_t& t, const char* tag)
+{
+  std::string s = readUptoTerminator(stream);
+  t = strtol(s.c_str(), NULL, 10);
+}
+
+void hadoop::ICsvArchive::deserialize(int64_t& t, const char* tag)
+{
+  std::string s = readUptoTerminator(stream);
+  t = strtoll(s.c_str(), NULL, 10);
+}
+
+void hadoop::ICsvArchive::deserialize(float& t, const char* tag)
+{
+  std::string s = readUptoTerminator(stream);
+  t = strtof(s.c_str(), NULL);
+}
+
+void hadoop::ICsvArchive::deserialize(double& t, const char* tag)
+{
+  std::string s = readUptoTerminator(stream);
+  t = strtod(s.c_str(), NULL);
+}
+
+static void replaceAll(std::string s, const char *src, char c)
+{
+  std::string::size_type pos = 0;
+  while (pos != std::string::npos) {
+    pos = s.find(src);
+    if (pos != std::string::npos) {
+      s.replace(pos, strlen(src), 1, c);
+    }
+  }
+}
+
+void hadoop::ICsvArchive::deserialize(std::string& t, const char* tag)
+{
+  t = readUptoTerminator(stream);
+  if (t[0] != '\'') {
+    throw new IOException("Errror deserializing string.");
+  }
+  t.erase(0, 1); /// erase first character
+  replaceAll(t, "%0D", 0x0D);
+  replaceAll(t, "%0A", 0x0A);
+  replaceAll(t, "%7D", 0x7D);
+  replaceAll(t, "%00", 0x00);
+  replaceAll(t, "%2C", 0x2C);
+  replaceAll(t, "%25", 0x25);
+
+}
+
+void hadoop::ICsvArchive::deserialize(std::string& t, size_t& len, const char* tag)
+{
+  std::string s = readUptoTerminator(stream);
+  if (s[0] != '#') {
+    throw new IOException("Errror deserializing buffer.");
+  }
+  s.erase(0, 1); /// erase first character
+  len = s.length();
+  if (len%2 == 1) { // len is guaranteed to be even
+    throw new IOException("Errror deserializing buffer.");
+  }
+  len >> 1;
+  for (size_t idx = 0; idx < len; idx++) {
+    char buf[3];
+    buf[0] = s[2*idx];
+    buf[1] = s[2*idx+1];
+    buf[2] = '\0';
+    int i;
+    if (1 != sscanf(buf, "%2x", &i)) {
+      throw new IOException("Errror deserializing buffer.");
+    }
+    t.push_back((char) i);
+  }
+  len = t.length();
+}
+
+void hadoop::ICsvArchive::startRecord(Record& s, const char* tag)
+{
+  if (tag != NULL) {
+    char mark[2];
+    if (2 != stream.read(mark, 2)) {
+      throw new IOException("Error deserializing record.");
+    }
+    if (mark[0] != 's' || mark[1] != '{') {
+      throw new IOException("Error deserializing record.");
+    }
+  }
+}
+
+void hadoop::ICsvArchive::endRecord(Record& s, const char* tag)
+{
+  char mark;
+  if (1 != stream.read(&mark, 1)) {
+    throw new IOException("Error deserializing record.");
+  }
+  if (tag == NULL) {
+    if (mark != '\n') {
+      throw new IOException("Error deserializing record.");
+    }
+  } else if (mark != '}') {
+    throw new IOException("Error deserializing record.");
+  } else {
+    readUptoTerminator(stream);
+  }
+}
+
+Index* hadoop::ICsvArchive::startVector(const char* tag)
+{
+  char mark[2];
+  if (2 != stream.read(mark, 2)) {
+    throw new IOException("Error deserializing vector.");
+  }
+  if (mark[0] != 'v' || mark[1] != '{') {
+    throw new IOException("Error deserializing vector.");
+  }
+  return new CsvIndex(stream);
+}
+
+void hadoop::ICsvArchive::endVector(Index* idx, const char* tag)
+{
+  delete idx;
+  char mark;
+  if (1 != stream.read(&mark, 1)) {
+    throw new IOException("Error deserializing vector.");
+  }
+  if (mark != '}') {
+    throw new IOException("Error deserializing vector.");
+  }
+  readUptoTerminator(stream);
+}
+
+Index* hadoop::ICsvArchive::startMap(const char* tag)
+{
+  char mark[2];
+  if (2 != stream.read(mark, 2)) {
+    throw new IOException("Error deserializing map.");
+  }
+  if (mark[0] != 'm' || mark[1] != '{') {
+    throw new IOException("Error deserializing map.");
+  }
+
+  return new CsvIndex(stream);
+}
+
+void hadoop::ICsvArchive::endMap(Index* idx, const char* tag)
+{
+  delete idx;
+  char mark;
+  if (1 != stream.read(&mark, 1)) {
+    throw new IOException("Error deserializing map.");
+  }
+  if (mark != '}') {
+    throw new IOException("Error deserializing map.");
+  }
+  readUptoTerminator(stream);
+}
+
+hadoop::ICsvArchive::~ICsvArchive()
+{
+}
+
+void hadoop::OCsvArchive::serialize(int8_t t, const char* tag)
+{
+  printCommaUnlessFirst();
+  char sval[5];
+  sprintf(sval, "%d", t);
+  stream.write(sval, strlen(sval));
+}
+
+void hadoop::OCsvArchive::serialize(bool t, const char* tag)
+{
+  printCommaUnlessFirst();
+  const char *sval = t ? "T" : "F";
+  stream.write(sval,1);  
+}
+
+void hadoop::OCsvArchive::serialize(int32_t t, const char* tag)
+{
+  printCommaUnlessFirst();
+  char sval[128];
+  sprintf(sval, "%d", t);
+  stream.write(sval, strlen(sval));
+}
+
+void hadoop::OCsvArchive::serialize(int64_t t, const char* tag)
+{
+  printCommaUnlessFirst();
+  char sval[128];
+  sprintf(sval, "%lld", t);
+  stream.write(sval, strlen(sval));
+}
+
+void hadoop::OCsvArchive::serialize(float t, const char* tag)
+{
+  printCommaUnlessFirst();
+  char sval[128];
+  sprintf(sval, "%f", t);
+  stream.write(sval, strlen(sval));
+}
+
+void hadoop::OCsvArchive::serialize(double t, const char* tag)
+{
+  printCommaUnlessFirst();
+  char sval[128];
+  sprintf(sval, "%lf", t);
+  stream.write(sval, strlen(sval));
+}
+
+void hadoop::OCsvArchive::serialize(const std::string& t, const char* tag)
+{
+  printCommaUnlessFirst();
+  stream.write("'",1);
+  int len = t.length();
+  for (int idx = 0; idx < len; idx++) {
+    char c = t[idx];
+    switch(c) {
+      case '\0':
+        stream.write("%00",3);
+        break;
+      case 0x0A:
+        stream.write("%0A",3);
+        break;
+      case 0x0D:
+        stream.write("%0D",3);
+        break;
+      case 0x25:
+        stream.write("%25",3);
+        break;
+      case 0x2C:
+        stream.write("%2C",3);
+        break;
+      case 0x7D:
+        stream.write("%7D",3);
+        break;
+      default:
+        stream.write(&c,1);
+        break;
+    }
+  }
+}
+
+void hadoop::OCsvArchive::serialize(const std::string& t, size_t len, const char* tag)
+{
+  printCommaUnlessFirst();
+  stream.write("#",1);
+  for(int idx = 0; idx < len; idx++) {
+    uint8_t b = t[idx];
+    char sval[3];
+    sprintf(sval,"%2x",b);
+    stream.write(sval, 2);
+  }
+}
+
+void hadoop::OCsvArchive::startRecord(const Record& s, const char* tag)
+{
+  printCommaUnlessFirst();
+  if (tag != NULL && strlen(tag) != 0) {
+    stream.write("s{",2);
+  }
+  isFirst = true;
+}
+
+void hadoop::OCsvArchive::endRecord(const Record& s, const char* tag)
+{
+  if (tag == NULL || strlen(tag) == 0) {
+    stream.write("\n",1);
+    isFirst = true;
+  } else {
+    stream.write("}",1);
+    isFirst = false;
+  }
+}
+
+void hadoop::OCsvArchive::startVector(size_t len, const char* tag)
+{
+  printCommaUnlessFirst();
+  stream.write("v{",2);
+  isFirst = true;
+}
+
+void hadoop::OCsvArchive::endVector(size_t len, const char* tag)
+{
+  stream.write("}",1);
+  isFirst = false;
+}
+
+void hadoop::OCsvArchive::startMap(size_t len, const char* tag)
+{
+  printCommaUnlessFirst();
+  stream.write("m{",2);
+  isFirst = true;
+}
+
+void hadoop::OCsvArchive::endMap(size_t len, const char* tag)
+{
+  stream.write("}",1);
+  isFirst = false;
+}
+
+hadoop::OCsvArchive::~OCsvArchive()
+{
+}

Added: lucene/hadoop/trunk/src/c++/librecordio/csvarchive.hh
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/csvarchive.hh?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/csvarchive.hh (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/csvarchive.hh Wed May  3 19:04:01 2006
@@ -0,0 +1,126 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef CSVARCHIVE_HH_
+#define CSVARCHIVE_HH_
+
+#include "recordio.hh"
+
+namespace hadoop {
+
+class PushBackInStream {
+private:
+  InStream* stream;
+  bool isAvail;
+  char pbchar;
+public:
+  void setStream(InStream* stream_) {
+    stream = stream_;
+    isAvail = false;
+    pbchar = 0;
+  }
+  ssize_t read(void* buf, size_t len) {
+    if (len > 0 && isAvail) {
+      char* p = (char*) buf;
+      *p = pbchar;
+      isAvail = false;
+      if (len > 1) {
+        ssize_t ret = stream->read((char*)buf + 1, len - 1);
+        return ret + 1;
+      } else {
+        return 1;
+      }
+    } else {
+      return stream->read(buf, len);
+    }
+  }
+  void pushBack(char c) {
+    pbchar = c;
+    isAvail = true;
+  }
+};
+
+class CsvIndex : public Index {
+private:
+  PushBackInStream& stream;
+public:
+  CsvIndex(PushBackInStream& _stream) : stream(_stream) {}
+  bool done() {
+    char c;
+    stream.read(&c, 1);
+    if (c != ',') {
+      stream.pushBack(c);
+    }
+    return (c == '}') ? true : false;
+  }
+  void incr() {}
+  ~CsvIndex() {} 
+};
+  
+class ICsvArchive : public IArchive {
+private:
+  PushBackInStream stream;
+public:
+  ICsvArchive(InStream& _stream) { stream.setStream(&_stream); }
+  virtual void deserialize(int8_t& t, const char* tag);
+  virtual void deserialize(bool& t, const char* tag);
+  virtual void deserialize(int32_t& t, const char* tag);
+  virtual void deserialize(int64_t& t, const char* tag);
+  virtual void deserialize(float& t, const char* tag);
+  virtual void deserialize(double& t, const char* tag);
+  virtual void deserialize(std::string& t, const char* tag);
+  virtual void deserialize(std::string& t, size_t& len, const char* tag);
+  virtual void startRecord(Record& s, const char* tag);
+  virtual void endRecord(Record& s, const char* tag);
+  virtual Index* startVector(const char* tag);
+  virtual void endVector(Index* idx, const char* tag);
+  virtual Index* startMap(const char* tag);
+  virtual void endMap(Index* idx, const char* tag);
+  virtual ~ICsvArchive();
+};
+
+class OCsvArchive : public OArchive {
+private:
+  OutStream& stream;
+  bool isFirst;
+  
+  void printCommaUnlessFirst() {
+    if (!isFirst) {
+      stream.write(",",1);
+    }
+    isFirst = false;
+  }
+public:
+  OCsvArchive(OutStream& _stream) : stream(_stream) {isFirst = true;}
+  virtual void serialize(int8_t t, const char* tag);
+  virtual void serialize(bool t, const char* tag);
+  virtual void serialize(int32_t t, const char* tag);
+  virtual void serialize(int64_t t, const char* tag);
+  virtual void serialize(float t, const char* tag);
+  virtual void serialize(double t, const char* tag);
+  virtual void serialize(const std::string& t, const char* tag);
+  virtual void serialize(const std::string& t, size_t len, const char* tag);
+  virtual void startRecord(const Record& s, const char* tag);
+  virtual void endRecord(const Record& s, const char* tag);
+  virtual void startVector(size_t len, const char* tag);
+  virtual void endVector(size_t len, const char* tag);
+  virtual void startMap(size_t len, const char* tag);
+  virtual void endMap(size_t len, const char* tag);
+  virtual ~OCsvArchive();
+};
+
+}
+#endif /*CSVARCHIVE_HH_*/

Added: lucene/hadoop/trunk/src/c++/librecordio/exception.cc
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/exception.cc?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/exception.cc (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/exception.cc Wed May  3 19:04:01 2006
@@ -0,0 +1,140 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "exception.hh"
+#include <execinfo.h>
+
+#include <errno.h>
+#include <sstream>
+#include <typeinfo>
+
+using std::string;
+
+namespace hadoop {
+
+  /**
+   * Create an exception.
+   * @param message The message to give to the user.
+   * @param reason The exception that caused the new exception.
+   */
+  Exception::Exception(const string& message,
+                       const string& component,
+                       const string& location,
+                       const Exception* reason
+                       ): mMessage(message),
+                          mComponent(component),
+                          mLocation(location),
+                          mReason(reason)
+                          
+  {
+    mCalls = backtrace(mCallStack, sMaxCallStackDepth);
+  }
+
+  /**
+   * Copy the exception.
+   * Clones the reason, if there is one.
+   */
+  Exception::Exception(const Exception& other
+                       ): mMessage(other.mMessage), 
+                          mComponent(other.mComponent),
+                          mLocation(other.mLocation),
+                          mCalls(other.mCalls)
+  {
+    for(int i=0; i < mCalls; ++i) {
+      mCallStack[i] = other.mCallStack[i];
+    }
+    if (other.mReason) {
+      mReason = other.mReason->clone();
+    } else {
+      mReason = NULL;
+    }
+  }
+
+  Exception::~Exception() throw () {
+    delete mReason;
+  }
+
+  /**
+   * Print all of the information about the exception.
+   */
+  void Exception::print(std::ostream& stream) const {
+    stream << "Exception " << getTypename();
+    if (mComponent.size() != 0) {
+      stream << " (" << mComponent << ")";
+    }
+    stream << ": " << mMessage << "\n";
+    if (mLocation.size() != 0) {
+      stream << "  thrown at " << mLocation << "\n";
+    }
+    printCallStack(stream);
+    if (mReason) {
+      stream << "caused by: ";
+      mReason->print(stream);
+    }
+    stream.flush();
+  }
+
+  /**
+   * Result of print() as a string.
+   */
+  string Exception::toString() const {
+    std::ostringstream stream;
+    print(stream);
+    return stream.str();
+}
+
+  /**
+   * Print the call stack where the exception was created.
+   */
+  void Exception::printCallStack(std::ostream& stream) const {
+      char ** symbols = backtrace_symbols(mCallStack, mCalls);
+      for(int i=0; i < mCalls; ++i) {
+        stream << "  ";
+        if (i == 0) {
+          stream << "at ";
+        } else {
+          stream << "from ";
+        }
+        stream << symbols[i] << "\n";
+      }
+      free(symbols);
+  }
+
+  const char* Exception::getTypename() const {
+    return "Exception";
+  }
+
+  Exception* Exception::clone() const {
+    return new Exception(*this);
+  }
+
+  IOException::IOException(const string& message,
+                         const string& component,
+                         const string& location,
+                         const Exception* reason
+                         ): Exception(message, component, location, reason) 
+  {
+  }
+
+  const char* IOException::getTypename() const {
+    return "IOException";
+  }
+
+  IOException* IOException::clone() const {
+    return new IOException(*this);
+  }
+
+}

Added: lucene/hadoop/trunk/src/c++/librecordio/exception.hh
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/exception.hh?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/exception.hh (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/exception.hh Wed May  3 19:04:01 2006
@@ -0,0 +1,125 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef EXCEPTION_HH
+#define EXCEPTION_HH
+
+#include <exception>
+#include <iostream>
+#include <string>
+
+namespace hadoop {
+
+  /**
+   * Parent-type for all exceptions in hadoop.
+   * Provides an application specified message to the user, a call stack from
+   * where the exception was created, and optionally an exception that caused 
+   * this one.
+   */
+  class Exception: public std::exception {
+  public:
+
+    /**
+     * Create an exception.
+     * @param message The message to give to the user.
+     * @param reason The exception that caused the new exception.
+     */
+    explicit Exception(const std::string& message,
+                       const std::string& component="",
+                       const std::string& location="",
+                       const Exception* reason=NULL);
+
+    /**
+     * Copy the exception.
+     * Clones the reason, if there is one.
+     */
+    Exception(const Exception&);
+
+    virtual ~Exception() throw ();
+
+    /**
+     * Make a new copy of the given exception by dynamically allocating
+     * memory.
+     */
+    virtual Exception* clone() const;
+
+    /**
+     * Print all of the information about the exception.
+     */
+    virtual void print(std::ostream& stream=std::cerr) const;
+
+    /**
+     * Result of print() as a string.
+     */
+    virtual std::string toString() const;
+
+    /**
+     * Print the call stack where the exception was created.
+     */
+    virtual void printCallStack(std::ostream& stream=std::cerr) const;
+
+    const std::string& getMessage() const {
+      return mMessage;
+    }
+
+    const std::string& getComponent() const {
+      return mComponent;
+    }
+
+    const std::string& getLocation() const {
+      return mLocation;
+    }
+
+    const Exception* getReason() const {
+      return mReason;
+    }
+
+    /**
+     * Provide a body for the virtual from std::exception.
+     */
+    virtual const char* what() const throw () {
+      return mMessage.c_str();
+    }
+
+    virtual const char* getTypename() const;
+
+  private:
+    const static int sMaxCallStackDepth = 10;
+    const std::string mMessage;
+    const std::string mComponent;
+    const std::string mLocation;
+    int mCalls;
+    void* mCallStack[sMaxCallStackDepth];
+    const Exception* mReason;
+
+    // NOT IMPLEMENTED
+    std::exception& operator=(const std::exception& right) throw ();
+  };
+
+  class IOException: public Exception {
+  public:
+    IOException(const std::string& message,
+                const std::string& component="",
+                const std::string& location="",
+                const Exception* reason = NULL);
+
+    virtual IOException* clone() const;
+    virtual const char* getTypename() const;
+
+  };
+
+}
+#endif

Added: lucene/hadoop/trunk/src/c++/librecordio/filestream.cc
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/filestream.cc?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/filestream.cc (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/filestream.cc Wed May  3 19:04:01 2006
@@ -0,0 +1,96 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "filestream.hh"
+
+using namespace hadoop;
+
+hadoop::FileInStream::FileInStream()
+{
+  mFile = NULL;
+}
+
+bool hadoop::FileInStream::open(const std::string& name)
+{
+  mFile = fopen(name.c_str(), "rb");
+  return (mFile != NULL);
+}
+
+ssize_t hadoop::FileInStream::read(void *buf, size_t len)
+{
+  return fread(buf, 1, len, mFile);
+}
+
+bool hadoop::FileInStream::skip(size_t nbytes)
+{
+  return (0==fseek(mFile, nbytes, SEEK_CUR));
+}
+
+bool hadoop::FileInStream::close()
+{
+  int ret = fclose(mFile);
+  mFile = NULL;
+  return (ret==0);
+}
+
+hadoop::FileInStream::~FileInStream()
+{
+  if (mFile != NULL) {
+    close();
+  }
+}
+
+hadoop::FileOutStream::FileOutStream()
+{
+  mFile = NULL;
+}
+
+bool hadoop::FileOutStream::open(const std::string& name, bool overwrite)
+{
+  if (!overwrite) {
+    mFile = fopen(name.c_str(), "rb");
+    if (mFile != NULL) {
+      fclose(mFile);
+      return false;
+    }
+  }
+  mFile = fopen(name.c_str(), "wb");
+  return (mFile != NULL);
+}
+
+ssize_t hadoop::FileOutStream::write(const void* buf, size_t len)
+{
+  return fwrite(buf, 1, len, mFile);
+}
+
+bool hadoop::FileOutStream::advance(size_t nbytes)
+{
+  return (0==fseek(mFile, nbytes, SEEK_CUR));
+}
+
+bool hadoop::FileOutStream::close()
+{
+  int ret = fclose(mFile);
+  mFile = NULL;
+  return (ret == 0);
+}
+
+hadoop::FileOutStream::~FileOutStream()
+{
+  if (mFile != NULL) {
+    close();
+  }
+}

Added: lucene/hadoop/trunk/src/c++/librecordio/filestream.hh
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/filestream.hh?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/filestream.hh (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/filestream.hh Wed May  3 19:04:01 2006
@@ -0,0 +1,53 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef FILESTREAM_HH_
+#define FILESTREAM_HH_
+
+#include <stdio.h>
+#include <stdint.h>
+#include <string>
+#include "recordio.hh"
+
+namespace hadoop {
+
+class FileInStream : public InStream {
+public:
+  FileInStream();
+  bool open(const std::string& name);
+  ssize_t read(void *buf, size_t buflen);
+  bool skip(size_t nbytes);
+  bool close();
+  virtual ~FileInStream();
+private:
+  FILE *mFile;
+};
+
+
+class FileOutStream: public OutStream {
+public:
+  FileOutStream();
+  bool open(const std::string& name, bool overwrite);
+  ssize_t write(const void* buf, size_t len);
+  bool advance(size_t nbytes);
+  bool close();
+  virtual ~FileOutStream();
+private:
+  FILE *mFile;
+};
+
+}; // end namespace
+#endif /*FILESTREAM_HH_*/

Added: lucene/hadoop/trunk/src/c++/librecordio/recordio.cc
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/recordio.cc?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/recordio.cc (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/recordio.cc Wed May  3 19:04:01 2006
@@ -0,0 +1,73 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "recordio.hh"
+#include "binarchive.hh"
+#include "csvarchive.hh"
+#include "xmlarchive.hh"
+
+using namespace hadoop;
+
+hadoop::RecordReader::RecordReader(InStream& stream, RecFormat f)
+{
+  switch (f) {
+    case kBinary:
+      mpArchive = new IBinArchive(stream);
+      break;
+    case kCSV:
+      mpArchive = new ICsvArchive(stream);
+      break;
+    case kXML:
+      mpArchive = new IXmlArchive(stream);
+      break;
+  }
+}
+
+hadoop::RecordReader::~RecordReader()
+{
+  delete mpArchive;
+}
+
+void hadoop::RecordReader::read(Record& record)
+{
+  record.deserialize(*mpArchive, (const char*) NULL);
+}
+
+hadoop::RecordWriter::RecordWriter(OutStream& stream, RecFormat f)
+{
+  switch (f) {
+    case kBinary:
+      mpArchive = new OBinArchive(stream);
+      break;
+    case kCSV:
+      mpArchive = new OCsvArchive(stream);
+      break;
+    case kXML:
+      mpArchive = new OXmlArchive(stream);
+      break;
+  }
+}
+
+hadoop::RecordWriter::~RecordWriter()
+{
+  delete mpArchive;
+}
+
+void hadoop::RecordWriter::write(Record& record)
+{
+  record.serialize(*mpArchive, (const char*) NULL);
+}
+

Added: lucene/hadoop/trunk/src/c++/librecordio/recordio.hh
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/recordio.hh?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/recordio.hh (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/recordio.hh Wed May  3 19:04:01 2006
@@ -0,0 +1,78 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef RECORDIO_HH_
+#define RECORDIO_HH_
+
+#include <stdio.h>
+#include <stdint.h>
+#include <iostream>
+#include <cstring>
+#include <string>
+#include <vector>
+#include <map>
+#include <bitset>
+
+namespace hadoop {
+  
+class InStream {
+public:
+  virtual ssize_t read(void *buf, size_t buflen) = 0;
+};
+
+class OutStream {
+public:
+  virtual ssize_t write(const void *buf, size_t len) = 0;
+};
+
+class IArchive;
+class OArchive;
+
+class Record {
+public:
+  virtual bool validate() const = 0;
+  virtual void serialize(OArchive& archive, const char* tag) = 0;
+  virtual void deserialize(IArchive& archive, const char* tag) = 0;
+  virtual const std::string& type() const = 0;
+  virtual const std::string& signature() const = 0;
+};
+
+enum RecFormat { kBinary, kXML, kCSV };
+
+class RecordReader {
+private:
+  IArchive* mpArchive;
+public:
+  RecordReader(InStream& stream, RecFormat f);
+  virtual void read(hadoop::Record& record);
+  virtual ~RecordReader();
+};
+
+class RecordWriter {
+private:
+  OArchive* mpArchive;
+public:
+  RecordWriter(OutStream& stream, RecFormat f);
+  virtual void write(hadoop::Record& record);
+  virtual ~RecordWriter();
+};
+}; // end namspace hadoop
+
+#include "archive.hh"
+#include "exception.hh"
+
+#endif /*RECORDIO_HH_*/
+

Added: lucene/hadoop/trunk/src/c++/librecordio/test/Makefile
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/test/Makefile?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/test/Makefile (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/test/Makefile Wed May  3 19:04:01 2006
@@ -0,0 +1,46 @@
+#
+# Copyright 2005 The Apache Software Foundation
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+
+all: test testFromJava
+
+test: test.o test.jr.o
+	g++ -g3 -O0 -o test test.o test.jr.o -L.. -L${XERCESCROOT}/lib -lrecordio -lxerces-c
+	
+test.o: test.cc
+	g++ -g3 -O0 -c -I.. -o test.o test.cc
+
+testFromJava: testFromJava.o test.jr.o
+	g++ -g3 -O0 -o testFromJava testFromJava.o test.jr.o -L.. -L${XERCESCROOT}/lib -lrecordio -lxerces-c
+	
+testFromJava.o: testFromJava.cc
+	g++ -g3 -O0 -c -I.. -o testFromJava.o testFromJava.cc
+
+
+test.jr.o: test.jr.cc
+	g++ -g3 -O0 -c -I.. -o test.jr.o test.jr.cc
+
+%.jr.cc %.jr.hh: %.jr
+	${HADOOP_HOME}/bin/rcc --language c++ $<
+
+%: %.o
+%: %.cc
+
+test.cc: test.hh
+test.hh: test.jr.hh ../recordio.hh ../filestream.hh
+
+clean:
+	rm -f *~ *.o test testFromJava *.jr.*
+

Added: lucene/hadoop/trunk/src/c++/librecordio/test/test.cc
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/test/test.cc?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/test/test.cc (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/test/test.cc Wed May  3 19:04:01 2006
@@ -0,0 +1,109 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "test.hh"
+
+int main()
+{
+  org::apache::hadoop::record::test::TestRecord1 r1;
+  org::apache::hadoop::record::test::TestRecord1 r2;
+  {
+    hadoop::FileOutStream ostream;
+    ostream.open("/tmp/hadooptmp.dat", true);
+    hadoop::RecordWriter writer(ostream, hadoop::kBinary);
+    r1.setBoolVal(true);
+    r1.setByteVal((int8_t)0x66);
+    r1.setFloatVal(3.145);
+    r1.setDoubleVal(1.5234);
+    r1.setIntVal(4567);
+    r1.setLongVal(0x5a5a5a5a5a5aLL);
+    std::string& s = r1.getStringVal();
+    s = "random text";
+    std::string& buf = r1.getBufferVal();
+    std::vector<std::string>& v = r1.getVectorVal();
+    std::map<std::string,std::string>& m = r1.getMapVal();
+    writer.write(r1);
+    ostream.close();
+    hadoop::FileInStream istream;
+    istream.open("/tmp/hadooptmp.dat");
+    hadoop::RecordReader reader(istream, hadoop::kBinary);
+    reader.read(r2);
+    if (r1 == r2) {
+      printf("Binary archive test passed.\n");
+    } else {
+      printf("Binary archive test failed.\n");
+    }
+    istream.close();
+  }
+  {
+    hadoop::FileOutStream ostream;
+    ostream.open("/tmp/hadooptmp.txt", true);
+    hadoop::RecordWriter writer(ostream, hadoop::kCSV);
+    r1.setBoolVal(true);
+    r1.setByteVal((int8_t)0x66);
+    r1.setFloatVal(3.145);
+    r1.setDoubleVal(1.5234);
+    r1.setIntVal(4567);
+    r1.setLongVal(0x5a5a5a5a5a5aLL);
+    std::string& s = r1.getStringVal();
+    s = "random text";
+    std::string& buf = r1.getBufferVal();
+    std::vector<std::string>& v = r1.getVectorVal();
+    std::map<std::string,std::string>& m = r1.getMapVal();
+    writer.write(r1);
+    ostream.close();
+    hadoop::FileInStream istream;
+    istream.open("/tmp/hadooptmp.txt");
+    hadoop::RecordReader reader(istream, hadoop::kCSV);
+    reader.read(r2);
+    if (r1 == r2) {
+      printf("CSV archive test passed.\n");
+    } else {
+      printf("CSV archive test failed.\n");
+    }
+    istream.close();
+  }
+  {
+    hadoop::FileOutStream ostream;
+    ostream.open("/tmp/hadooptmp.xml", true);
+    hadoop::RecordWriter writer(ostream, hadoop::kXML);
+    r1.setBoolVal(true);
+    r1.setByteVal((int8_t)0x66);
+    r1.setFloatVal(3.145);
+    r1.setDoubleVal(1.5234);
+    r1.setIntVal(4567);
+    r1.setLongVal(0x5a5a5a5a5a5aLL);
+    std::string& s = r1.getStringVal();
+    s = "random text";
+    std::string& buf = r1.getBufferVal();
+    std::vector<std::string>& v = r1.getVectorVal();
+    std::map<std::string,std::string>& m = r1.getMapVal();
+    writer.write(r1);
+    ostream.close();
+    hadoop::FileInStream istream;
+    istream.open("/tmp/hadooptmp.xml");
+    hadoop::RecordReader reader(istream, hadoop::kXML);
+    reader.read(r2);
+    if (r1 == r2) {
+      printf("XML archive test passed.\n");
+    } else {
+      printf("XML archive test failed.\n");
+    }
+    istream.close();
+  }
+  return 0;
+}
+

Added: lucene/hadoop/trunk/src/c++/librecordio/test/test.hh
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/test/test.hh?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/test/test.hh (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/test/test.hh Wed May  3 19:04:01 2006
@@ -0,0 +1,24 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TEST_HH_
+#define TEST_HH_
+
+#include "recordio.hh"
+#include "filestream.hh"
+#include "test.jr.hh"
+
+#endif /*TEST_HH_*/

Added: lucene/hadoop/trunk/src/c++/librecordio/test/test.jr
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/test/test.jr?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/test/test.jr (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/test/test.jr Wed May  3 19:04:01 2006
@@ -0,0 +1,35 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+module org.apache.hadoop.record.test {
+    class TestRecord0 {
+        ustring     StringVal;
+    }
+
+    class TestRecord1 {
+        boolean         BoolVal;
+        byte            ByteVal;
+        int             IntVal;
+        long            LongVal;
+        float           FloatVal;
+        double          DoubleVal;
+        ustring         StringVal;
+        buffer          BufferVal;
+        vector<ustring> VectorVal;
+        map<ustring, ustring>   MapVal;
+    }
+}
+

Added: lucene/hadoop/trunk/src/c++/librecordio/test/testFromJava.cc
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/test/testFromJava.cc?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/test/testFromJava.cc (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/test/testFromJava.cc Wed May  3 19:04:01 2006
@@ -0,0 +1,72 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "test.hh"
+
+int main()
+{
+  org::apache::hadoop::record::test::TestRecord1 r1;
+  org::apache::hadoop::record::test::TestRecord1 r2;
+  r1.setBoolVal(true);
+  r1.setByteVal((int8_t)0x66);
+  r1.setFloatVal(3.145);
+  r1.setDoubleVal(1.5234);
+  r1.setIntVal(4567);
+  r1.setLongVal(0x5a5a5a5a5a5aLL);
+  std::string& s = r1.getStringVal();
+  s = "random text";
+  std::string& buf = r1.getBufferVal();
+  std::vector<std::string>& v = r1.getVectorVal();
+  std::map<std::string,std::string>& m = r1.getMapVal();
+  {
+    hadoop::FileInStream istream;
+    istream.open("/tmp/hadooptemp.dat");
+    hadoop::RecordReader reader(istream, hadoop::kBinary);
+    reader.read(r2);
+    if (r1 == r2) {
+      printf("Binary archive test passed.\n");
+    } else {
+      printf("Binary archive test failed.\n");
+    }
+    istream.close();
+  }
+  {
+    hadoop::FileInStream istream;
+    istream.open("/tmp/hadooptemp.txt");
+    hadoop::RecordReader reader(istream, hadoop::kCSV);
+    reader.read(r2);
+    if (r1 == r2) {
+      printf("CSV archive test passed.\n");
+    } else {
+      printf("CSV archive test failed.\n");
+    }
+    istream.close();
+  }
+  {
+    hadoop::FileInStream istream;
+    istream.open("/tmp/hadooptemp.xml");
+    hadoop::RecordReader reader(istream, hadoop::kXML);
+    reader.read(r2);
+    if (r1 == r2) {
+      printf("XML archive test passed.\n");
+    } else {
+      printf("XML archive test failed.\n");
+    }
+    istream.close();
+  }
+  return 0;
+}
+

Added: lucene/hadoop/trunk/src/c++/librecordio/test/testFromJava.hh
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/test/testFromJava.hh?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/test/testFromJava.hh (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/test/testFromJava.hh Wed May  3 19:04:01 2006
@@ -0,0 +1,24 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#ifndef TEST_HH_
+#define TEST_HH_
+
+#include "recordio.hh"
+#include "filestream.hh"
+#include "test.jr.hh"
+
+#endif /*TEST_HH_*/

Added: lucene/hadoop/trunk/src/c++/librecordio/xmlarchive.cc
URL: http://svn.apache.org/viewcvs/lucene/hadoop/trunk/src/c%2B%2B/librecordio/xmlarchive.cc?rev=399509&view=auto
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/xmlarchive.cc (added)
+++ lucene/hadoop/trunk/src/c++/librecordio/xmlarchive.cc Wed May  3 19:04:01 2006
@@ -0,0 +1,423 @@
+/**
+ * Copyright 2005 The Apache Software Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+#include "xmlarchive.hh"
+#include <stdlib.h>
+
+using namespace hadoop;
+
+void hadoop::MySAXHandler::startElement(const XMLCh* const name, AttributeList& attr)
+{
+  charsValid = false;
+  char* qname = XMLString::transcode(name);
+  if(std::string("boolean") == qname ||
+    std::string("ex:i1") == qname ||
+    std::string("i4") == qname ||
+    std::string("int") == qname ||
+    std::string("ex:i8") == qname ||
+    std::string("ex:float") == qname ||
+    std::string("double") == qname ||
+    std::string("string") == qname) {
+    std::string s(qname);
+    Value v(s);
+    vlist.push_back(v);
+    charsValid = true;
+  } else if(std::string("struct") == qname ||
+    std::string("array") == qname) {
+    std::string s(qname);
+    Value v(s);
+    vlist.push_back(v);
+  }
+  XMLString::release(&qname);
+}
+
+void hadoop::MySAXHandler::endElement(const XMLCh* const name)
+{
+  charsValid = false;
+  char* qname = XMLString::transcode(name);
+  if(std::string("struct") == qname ||
+    std::string("array") == qname) {
+    std::string s = "/";
+    Value v(s + qname);
+    vlist.push_back(v);
+  }
+  XMLString::release(&qname);
+}
+
+void hadoop::MySAXHandler::characters(const XMLCh* const buf, const unsigned int len)
+{
+  if (charsValid) {
+    char *cstr = XMLString::transcode(buf);
+    Value& v = vlist.back();
+    v.addChars(cstr, strlen(cstr));
+    XMLString::release(&cstr);
+  }
+}
+
+static char hexchars[] = {'0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F' };
+
+static std::string toXMLString(std::string s)
+{
+  std::string r;
+  size_t len = s.length();
+  size_t i;
+  const char* data = s.data();
+  for (i=0; i<len; i++, data++) {
+    char ch = *data;
+    if (((ch >= 'A') && (ch <='Z')) ||
+        ((ch >= 'a') && (ch <='z')) ||
+        ((ch >= '0') && (ch <='9'))) {
+        r.push_back(ch);
+    } else if (ch == ' ') {
+      r.push_back('+');
+    } else {
+      uint8_t* pb = (uint8_t*) &ch;
+      char ch1 = hexchars[*pb/16];
+      char ch2 = hexchars[*pb%16];
+      r.push_back('%');
+      r.push_back(ch1);
+      r.push_back(ch2);
+    }
+  }
+  return r;
+}
+
+static uint8_t h2b(char ch) {
+  if ((ch >= 'A') || (ch <= 'F')) {
+    return ch - 'A';
+  }
+  if ((ch >= 'a') || (ch <= 'f')) {
+    return ch - 'a';
+  }
+  if ((ch >= '0') || (ch <= '9')) {
+    return ch - '0';
+  }
+  return 0;
+}
+
+static std::string fromXMLString(std::string s)
+{
+  std::string r;
+  size_t len = s.length();
+  size_t i;
+  uint8_t* pb = (uint8_t*) s.data();
+  for (i = 0; i < len; i++) {
+    uint8_t b = *pb;
+    if (b == '+') {
+      r.push_back(' ');
+    } else if (b == '%') {
+      char *pc = (char*) (pb+1);
+      char ch1 = *pc++;
+      char ch2 = *pc++;
+      pb += 2;
+      uint8_t cnv = h2b(ch1)*16 + h2b(ch2);
+      pc = (char*) &cnv;
+      r.push_back(*pc);
+    } else {
+      char *pc = (char*) pb;
+      r.push_back(*pc);
+    }
+    pb++;
+  }
+  return r;
+}
+
+static std::string toXMLBuffer(std::string s, size_t len)
+{
+  std::string r;
+  size_t i;
+  uint8_t* data = (uint8_t*) s.data();
+  for (i=0; i<len; i++, data++) {
+    uint8_t b = *data;
+    char ch1 = hexchars[b/16];
+    char ch2 = hexchars[b%16];
+    r.push_back(ch1);
+    r.push_back(ch2);
+  }
+  return r;
+}
+
+static std::string fromXMLBuffer(std::string s, size_t& len)
+{
+  len = s.length();
+  if (len%2 == 1) { // len is guaranteed to be even
+    throw new IOException("Errror deserializing buffer.");
+  }
+  len >> 1;
+  std::string t;
+  for (size_t idx = 0; idx < len; idx++) {
+    char buf[3];
+    buf[0] = s[2*idx];
+    buf[1] = s[2*idx+1];
+    buf[2] = '\0';
+    int i;
+    if (1 != sscanf(buf, "%2x", &i)) {
+      throw new IOException("Errror deserializing buffer.");
+    }
+    t.push_back((char) i);
+  }
+  len = t.length();
+  return t;
+}
+
+void hadoop::IXmlArchive::deserialize(int8_t& t, const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "ex:i1") {
+    throw new IOException("Error deserializing byte");
+  }
+  t = (int8_t) strtol(v.getValue().c_str(), NULL, 10);
+}
+
+void hadoop::IXmlArchive::deserialize(bool& t, const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "boolean") {
+    throw new IOException("Error deserializing boolean");
+  }
+  t = (v.getValue() == "1");
+}
+
+void hadoop::IXmlArchive::deserialize(int32_t& t, const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "i4" && v.getType() != "int") {
+    throw new IOException("Error deserializing int");
+  }
+  t = (int32_t) strtol(v.getValue().c_str(), NULL, 10);
+}
+
+void hadoop::IXmlArchive::deserialize(int64_t& t, const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "ex:i8") {
+    throw new IOException("Error deserializing long");
+  }
+  t = strtoll(v.getValue().c_str(), NULL, 10);
+}
+
+void hadoop::IXmlArchive::deserialize(float& t, const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "ex:float") {
+    throw new IOException("Error deserializing float");
+  }
+  t = strtof(v.getValue().c_str(), NULL);
+}
+
+void hadoop::IXmlArchive::deserialize(double& t, const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "double") {
+    throw new IOException("Error deserializing double");
+  }
+  t = strtod(v.getValue().c_str(), NULL);
+}
+
+void hadoop::IXmlArchive::deserialize(std::string& t, const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "string") {
+    throw new IOException("Error deserializing string");
+  }
+  t = fromXMLString(v.getValue());
+}
+
+void hadoop::IXmlArchive::deserialize(std::string& t, size_t& len, const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "string") {
+    throw new IOException("Error deserializing buffer");
+  }
+  t = fromXMLBuffer(v.getValue(), len);
+}
+
+void hadoop::IXmlArchive::startRecord(Record& s, const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "struct") {
+    throw new IOException("Error deserializing record");
+  }
+}
+
+void hadoop::IXmlArchive::endRecord(Record& s, const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "/struct") {
+    throw new IOException("Error deserializing record");
+  }
+}
+
+Index* hadoop::IXmlArchive::startVector(const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "array") {
+    throw new IOException("Error deserializing vector");
+  }
+  return new XmlIndex(vlist, vidx);
+}
+
+void hadoop::IXmlArchive::endVector(Index* idx, const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "/array") {
+    throw new IOException("Error deserializing vector");
+  }
+  delete idx;
+}
+
+Index* hadoop::IXmlArchive::startMap(const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "array") {
+    throw new IOException("Error deserializing map");
+  }
+  return new XmlIndex(vlist, vidx);
+}
+
+void hadoop::IXmlArchive::endMap(Index* idx, const char* tag)
+{
+  Value v = next();
+  if (v.getType() != "/array") {
+    throw new IOException("Error deserializing map");
+  }
+  delete idx;
+}
+
+void hadoop::OXmlArchive::serialize(int8_t t, const char* tag)
+{
+  printBeginEnvelope(tag);
+  p("<ex:i1>");
+  char sval[5];
+  sprintf(sval, "%d", t);
+  p(sval);
+  p("</ex:i1>");
+  printEndEnvelope(tag);
+}
+
+void hadoop::OXmlArchive::serialize(bool t, const char* tag)
+{
+  printBeginEnvelope(tag);
+  p("<boolean>");
+  p(t ? "1" : "0");
+  p("</boolean>");
+  printEndEnvelope(tag);
+}
+
+void hadoop::OXmlArchive::serialize(int32_t t, const char* tag)
+{
+  printBeginEnvelope(tag);
+  p("<i4>");
+  char sval[128];
+  sprintf(sval, "%d", t);
+  p(sval);
+  p("</i4>");
+  printEndEnvelope(tag);
+}
+
+void hadoop::OXmlArchive::serialize(int64_t t, const char* tag)
+{
+  printBeginEnvelope(tag);
+  p("<ex:i8>");
+  char sval[128];
+  sprintf(sval, "%lld", t);
+  p(sval);
+  p("</ex:i8>");
+  printEndEnvelope(tag);
+
+}
+
+void hadoop::OXmlArchive::serialize(float t, const char* tag)
+{
+  printBeginEnvelope(tag);
+  p("<ex:float>");
+  char sval[128];
+  sprintf(sval, "%f", t);
+  p(sval);
+  p("</ex:float>");
+  printEndEnvelope(tag);
+}
+
+void hadoop::OXmlArchive::serialize(double t, const char* tag)
+{
+  printBeginEnvelope(tag);
+  p("<double>");
+  char sval[128];
+  sprintf(sval, "%lf", t);
+  p(sval);
+  p("</double>");
+  printEndEnvelope(tag);
+}
+
+void hadoop::OXmlArchive::serialize(const std::string& t, const char* tag)
+{
+  printBeginEnvelope(tag);
+  p("<string>");
+  std::string s = toXMLString(t);
+  stream.write(s.data(), s.length());
+  p("</string>");
+  printEndEnvelope(tag);
+}
+
+void hadoop::OXmlArchive::serialize(const std::string& t, size_t len, const char* tag)
+{
+  printBeginEnvelope(tag);
+  p("<string>");
+  std::string s = toXMLBuffer(t, len);
+  stream.write(s.data(), s.length());
+  p("</string>");
+  printEndEnvelope(tag);
+}
+
+void hadoop::OXmlArchive::startRecord(const Record& s, const char* tag)
+{
+  insideRecord(tag);
+  p("<struct>\n");
+}
+
+void hadoop::OXmlArchive::endRecord(const Record& s, const char* tag)
+{
+  p("</struct>\n");
+  outsideRecord(tag);
+}
+
+void hadoop::OXmlArchive::startVector(size_t len, const char* tag)
+{
+  insideVector(tag);
+  p("<array>\n");
+}
+
+void hadoop::OXmlArchive::endVector(size_t len, const char* tag)
+{
+  p("</array>\n");
+  outsideVector(tag);
+}
+
+void hadoop::OXmlArchive::startMap(size_t len, const char* tag)
+{
+  insideMap(tag);
+  p("<array>\n");
+}
+
+void hadoop::OXmlArchive::endMap(size_t len, const char* tag)
+{
+  p("</array>\n");
+  outsideMap(tag);
+}
+
+hadoop::OXmlArchive::~OXmlArchive()
+{
+}