You are viewing a plain text version of this content. The canonical link for it is here.
Posted to common-commits@hadoop.apache.org by cu...@apache.org on 2007/09/05 23:55:33 UTC

svn commit: r573081 - in /lucene/hadoop/trunk: CHANGES.txt src/c++/librecordio/csvarchive.cc

Author: cutting
Date: Wed Sep  5 14:55:32 2007
New Revision: 573081

URL: http://svn.apache.org/viewvc?rev=573081&view=rev
Log:
Fix escape processing in librecordio to not be quadratic.  Contributed by Vivek Ratan.

Modified:
    lucene/hadoop/trunk/CHANGES.txt
    lucene/hadoop/trunk/src/c++/librecordio/csvarchive.cc

Modified: lucene/hadoop/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/CHANGES.txt?rev=573081&r1=573080&r2=573081&view=diff
==============================================================================
--- lucene/hadoop/trunk/CHANGES.txt (original)
+++ lucene/hadoop/trunk/CHANGES.txt Wed Sep  5 14:55:32 2007
@@ -89,6 +89,9 @@
     HADOOP-1806.  Fix ant task to compile again, also fix default
     builds to compile ant tasks.  (Chris Douglas via cutting)
 
+    HADOOP-1758.  Fix escape processing in librecordio to not be
+    quadratic.  (Vivek Ratan via cutting)
+
   IMPROVEMENTS
 
     HADOOP-1779. Replace INodeDirectory.getINode() by a getExistingPathINodes()

Modified: lucene/hadoop/trunk/src/c++/librecordio/csvarchive.cc
URL: http://svn.apache.org/viewvc/lucene/hadoop/trunk/src/c%2B%2B/librecordio/csvarchive.cc?rev=573081&r1=573080&r2=573081&view=diff
==============================================================================
--- lucene/hadoop/trunk/src/c++/librecordio/csvarchive.cc (original)
+++ lucene/hadoop/trunk/src/c++/librecordio/csvarchive.cc Wed Sep  5 14:55:32 2007
@@ -76,31 +76,42 @@
   t = strtod(s.c_str(), NULL);
 }
 
-static void replaceAll(std::string s, const char *src, char c)
-{
-  std::string::size_type pos = 0;
-  while (pos != std::string::npos) {
-    pos = s.find(src);
-    if (pos != std::string::npos) {
-      s.replace(pos, strlen(src), 1, c);
-    }
-  }
-}
-
 void hadoop::ICsvArchive::deserialize(std::string& t, const char* tag)
 {
-  t = readUptoTerminator(stream);
-  if (t[0] != '\'') {
+  std::string temp = readUptoTerminator(stream);
+  if (temp[0] != '\'') {
     throw new IOException("Errror deserializing string.");
   }
-  t.erase(0, 1); /// erase first character
-  replaceAll(t, "%0D", 0x0D);
-  replaceAll(t, "%0A", 0x0A);
-  replaceAll(t, "%7D", 0x7D);
-  replaceAll(t, "%00", 0x00);
-  replaceAll(t, "%2C", 0x2C);
-  replaceAll(t, "%25", 0x25);
-
+  t.clear();
+  // skip first character, replace escaped characters 
+  int len = temp.length();
+  for (int i = 1; i < len; i++) {
+    char c = temp.at(i);
+    if (c == '%') {
+      // since we escape '%', there have to be at least two chars following a '%'
+      char ch1 = temp.at(i+1);
+      char ch2 = temp.at(i+2);
+      i += 2;
+	  if (ch1 == '0' && ch2 == '0') {
+	    t.append(1, '\0');
+	  } else if (ch1 == '0' && ch2 == 'A') {
+	    t.append(1, '\n');
+	  } else if (ch1 == '0' && ch2 == 'D') {
+	    t.append(1, '\r');
+	  } else if (ch1 == '2' && ch2 == 'C') {
+	    t.append(1, ',');
+	  } else if (ch1 == '7' && ch2 == 'D') {
+	    t.append(1, '}');
+	  } else if (ch1 == '2' && ch2 == '5') {
+	    t.append(1, '%');
+	  } else {
+	    throw new IOException("Error deserializing string.");
+	  }
+    } 
+    else {
+      t.append(1, c);
+    }
+  }
 }
 
 void hadoop::ICsvArchive::deserialize(std::string& t, size_t& len, const char* tag)