You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by ma...@apache.org on 2010/02/23 19:25:43 UTC
svn commit: r915468 - in /hadoop/avro/trunk: CHANGES.txt lang/c/src/io.c
Author: massie
Date: Tue Feb 23 18:25:43 2010
New Revision: 915468
URL: http://svn.apache.org/viewvc?rev=915468&view=rev
Log:
AVRO-428. Improve file read performance by buffering data
Modified:
hadoop/avro/trunk/CHANGES.txt
hadoop/avro/trunk/lang/c/src/io.c
Modified: hadoop/avro/trunk/CHANGES.txt
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/CHANGES.txt?rev=915468&r1=915467&r2=915468&view=diff
==============================================================================
--- hadoop/avro/trunk/CHANGES.txt (original)
+++ hadoop/avro/trunk/CHANGES.txt Tue Feb 23 18:25:43 2010
@@ -336,6 +336,8 @@
AVRO-417. Produce Java documentation archive. (Scott Carey via cutting)
+ AVRO-428. Improve file read performance by buffering data (massie)
+
OPTIMIZATIONS
AVRO-172. More efficient schema processing (massie)
Modified: hadoop/avro/trunk/lang/c/src/io.c
URL: http://svn.apache.org/viewvc/hadoop/avro/trunk/lang/c/src/io.c?rev=915468&r1=915467&r2=915468&view=diff
==============================================================================
--- hadoop/avro/trunk/lang/c/src/io.c (original)
+++ hadoop/avro/trunk/lang/c/src/io.c Tue Feb 23 18:25:43 2010
@@ -42,6 +42,9 @@
struct avro_file_reader_t {
struct avro_reader_t reader;
FILE *fp;
+ char *cur;
+ char *end;
+ char buffer[4096];
};
struct avro_file_writer_t {
@@ -91,6 +94,7 @@
if (!file_reader) {
return NULL;
}
+ memset(file_reader, 0, sizeof(struct avro_file_reader_t));
file_reader->fp = fp;
reader_init(&file_reader->reader, AVRO_FILE_IO);
return &file_reader->reader;
@@ -149,15 +153,58 @@
return 0;
}
+#define bytes_available(reader) (reader->end - reader->cur)
+#define buffer_reset(reader) {reader->cur = reader->end = reader->buffer;}
+
static int
avro_read_file(struct avro_file_reader_t *reader, void *buf, int64_t len)
{
- int rval = fread(buf, len, 1, reader->fp);
+ int64_t needed = len;
+ void *p = buf;
+ int rval;
- if (rval == 0) {
- return ferror(reader->fp) || feof(reader->fp) ? -1 : 0;
+ if (len == 0) {
+ return 0;
}
- return 0;
+
+ if (needed > sizeof(reader->buffer)) {
+ if (bytes_available(reader) > 0) {
+ memcpy(p, reader->cur, bytes_available(reader));
+ p += bytes_available(reader);
+ needed -= bytes_available(reader);
+ buffer_reset(reader);
+ }
+ rval = fread(p, 1, needed, reader->fp);
+ if (rval != needed) {
+ return -1;
+ }
+ return 0;
+ } else if (needed <= bytes_available(reader)) {
+ memcpy(p, reader->cur, needed);
+ reader->cur += needed;
+ return 0;
+ } else {
+ memcpy(p, reader->cur, bytes_available(reader));
+ p += bytes_available(reader);
+ needed -= bytes_available(reader);
+
+ rval =
+ fread(reader->buffer, 1, sizeof(reader->buffer),
+ reader->fp);
+ if (rval == 0) {
+ return -1;
+ }
+ reader->cur = reader->buffer;
+ reader->end = reader->cur + rval;
+
+ if (bytes_available(reader) < needed) {
+ return -1;
+ }
+ memcpy(p, reader->cur, needed);
+ reader->cur += needed;
+ return 0;
+ }
+ return -1;
}
int avro_read(avro_reader_t reader, void *buf, int64_t len)
@@ -188,8 +235,17 @@
static int avro_skip_file(struct avro_file_reader_t *reader, int64_t len)
{
int rval;
- if (len > 0) {
- rval = fseek(reader->fp, len, SEEK_CUR);
+ int64_t needed = len;
+
+ if (len == 0) {
+ return 0;
+ }
+ if (needed <= bytes_available(reader)) {
+ reader->cur += needed;
+ } else {
+ needed -= bytes_available(reader);
+ buffer_reset(reader);
+ rval = fseek(reader->fp, needed, SEEK_CUR);
if (rval < 0) {
return rval;
}