You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ma...@apache.org on 2010/02/23 19:25:43 UTC

svn commit: r915468 - in /hadoop/avro/trunk: CHANGES.txt lang/c/src/io.c

Author: massie
Date: Tue Feb 23 18:25:43 2010
New Revision: 915468

URL: http://svn.apache.org/viewvc?rev=915468&view=rev
Log:
AVRO-428. Improve file read performance by buffering data

Modified:
    hadoop/avro/trunk/CHANGES.txt
    hadoop/avro/trunk/lang/c/src/io.c

Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=915468&r1=915467&r2=915468&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Tue Feb 23 18:25:43 2010
@@ -336,6 +336,8 @@
 
     AVRO-417. Produce Java documentation archive.  (Scott Carey via cutting)
 
+    AVRO-428. Improve file read performance by buffering data (massie)
+
   OPTIMIZATIONS
 
     AVRO-172. More efficient schema processing (massie)

Modified: hadoop/avro/trunk/lang/c/src/io.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/io.c?rev=915468&r1=915467&r2=915468&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/io.c (original)
+++ hadoop/avro/trunk/lang/c/src/io.c Tue Feb 23 18:25:43 2010
@@ -42,6 +42,9 @@
 struct avro_file_reader_t {
 	struct avro_reader_t reader;
 	FILE *fp;
+	char *cur;
+	char *end;
+	char buffer[4096];
 };
 
 struct avro_file_writer_t {
@@ -91,6 +94,7 @@
 	if (!file_reader) {
 		return NULL;
 	}
+	memset(file_reader, 0, sizeof(struct avro_file_reader_t));
 	file_reader->fp = fp;
 	reader_init(&file_reader->reader, AVRO_FILE_IO);
 	return &file_reader->reader;
@@ -149,15 +153,58 @@
 	return 0;
 }
 
+#define bytes_available(reader) (reader->end - reader->cur)
+#define buffer_reset(reader) {reader->cur = reader->end = reader->buffer;}
+
 static int
 avro_read_file(struct avro_file_reader_t *reader, void *buf, int64_t len)
 {
-	int rval = fread(buf, len, 1, reader->fp);
+	int64_t needed = len;
+	void *p = buf;
+	int rval;
 
-	if (rval == 0) {
-		return ferror(reader->fp) || feof(reader->fp) ? -1 : 0;
+	if (len == 0) {
+		return 0;
 	}
-	return 0;
+
+	if (needed > sizeof(reader->buffer)) {
+		if (bytes_available(reader) > 0) {
+			memcpy(p, reader->cur, bytes_available(reader));
+			p += bytes_available(reader);
+			needed -= bytes_available(reader);
+			buffer_reset(reader);
+		}
+		rval = fread(p, 1, needed, reader->fp);
+		if (rval != needed) {
+			return -1;
+		}
+		return 0;
+	} else if (needed <= bytes_available(reader)) {
+		memcpy(p, reader->cur, needed);
+		reader->cur += needed;
+		return 0;
+	} else {
+		memcpy(p, reader->cur, bytes_available(reader));
+		p += bytes_available(reader);
+		needed -= bytes_available(reader);
+
+		rval =
+		    fread(reader->buffer, 1, sizeof(reader->buffer),
+			  reader->fp);
+		if (rval == 0) {
+			return -1;
+		}
+		reader->cur = reader->buffer;
+		reader->end = reader->cur + rval;
+
+		if (bytes_available(reader) < needed) {
+			return -1;
+		}
+		memcpy(p, reader->cur, needed);
+		reader->cur += needed;
+		return 0;
+	}
+	return -1;
 }
 
 int avro_read(avro_reader_t reader, void *buf, int64_t len)
@@ -188,8 +235,17 @@
 static int avro_skip_file(struct avro_file_reader_t *reader, int64_t len)
 {
 	int rval;
-	if (len > 0) {
-		rval = fseek(reader->fp, len, SEEK_CUR);
+	int64_t needed = len;
+
+	if (len == 0) {
+		return 0;
+	}
+	if (needed <= bytes_available(reader)) {
+		reader->cur += needed;
+	} else {
+		needed -= bytes_available(reader);
+		buffer_reset(reader);
+		rval = fseek(reader->fp, needed, SEEK_CUR);
 		if (rval < 0) {
 			return rval;
 		}