You are viewing a plain text version of this content. The canonical link for it is here.
Posted to user@avro.apache.org by felix gao <gr...@gmail.com> on 2010/12/23 01:59:46 UTC
Avro Python appending data
Hi all,
I am having trouble adding more data into a file.
Environment: Python 2.6.5, avro-1.3.3-py2.6
Program looks like this
from avro import schema, datafile, io
OUTFILE_NAME = 'sample.avro'
SCHEMA_STR = """{
"type": "record",
"name": "bkSampleAvro",
"namespace": "bk_avro_example",
"fields": [
{ "name": "name" , "type": "string" },
{ "name": "age" , "type": "int" },
{ "name": "address", "type": "string" },
{ "name": "value" , "type": "long" }
]
}"""
SCHEMA = schema.parse(SCHEMA_STR)
def write_avro_file():
# Lets generate our data
data = {}
data['name'] = 'Foo'
data['age'] = 19
data['address'] = '10, Bar Eggs Spam'
data['value'] = 800
rec_writer = io.DatumWriter(SCHEMA)
df_writer = datafile.DataFileWriter(
open(OUTFILE_NAME, 'ab'),
rec_writer,
writers_schema = SCHEMA,
codec = 'deflate'
)
df_writer.append(data)
df_writer.close()
def read_avro_file():
rec_reader = io.DatumReader()
df_reader = datafile.DataFileReader(
open(OUTFILE_NAME, "rb"),
rec_reader
)
for record in df_reader:
print record['name'], record['age']
print record['address'], record['value']
if __name__ == '__main__':
# Write an AVRO file first
write_avro_file()
write_avro_file()
# Now, read it
read_avro_file()
The result looks like
Foo 19
10, Bar Eggs Spam 800
Traceback (most recent call last):
File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py", line
124, in <module>
read_avro_file()
File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py", line
112, in read_avro_file
for record in df_reader:
File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/datafile.py",
line 318, in next
datum = self.datum_reader.read(self.datum_decoder)
File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 411, in read
return self.read_data(self.writers_schema, self.readers_schema, decoder)
File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 456, in read_data
return self.read_record(writers_schema, readers_schema, decoder)
File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 648, in read_record
field_val = self.read_data(field.type, readers_field.type, decoder)
File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 434, in read_data
return decoder.read_utf8()
File
"/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
line 210, in read_utf8
return unicode(self.read_bytes(), "utf-8")
UnicodeDecodeError: 'utf8' codec can't decode bytes in position 14-15:
invalid data
if I remove the second write_avro_file() call then everything is fine. How
to properly append more data into the file?
Thanks,
Felix
Re: Avro Python appending data
Posted by Jeff Hammerbacher <ha...@cloudera.com>.
Hey Felix,
See the test_append() function at
http://svn.apache.org/viewvc/avro/trunk/lang/py/test/test_datafile.py?view=markup
.
Regards,
Jeff
On Wed, Dec 22, 2010 at 4:59 PM, felix gao <gr...@gmail.com> wrote:
> Hi all,
>
> I am having trouble adding more data into a file.
>
> Environment: Python 2.6.5, avro-1.3.3-py2.6
>
> Program looks like this
>
> from avro import schema, datafile, io
>
> OUTFILE_NAME = 'sample.avro'
>
> SCHEMA_STR = """{
> "type": "record",
> "name": "bkSampleAvro",
> "namespace": "bk_avro_example",
> "fields": [
> { "name": "name" , "type": "string" },
> { "name": "age" , "type": "int" },
> { "name": "address", "type": "string" },
> { "name": "value" , "type": "long" }
> ]
> }"""
>
> SCHEMA = schema.parse(SCHEMA_STR)
> def write_avro_file():
> # Lets generate our data
> data = {}
> data['name'] = 'Foo'
> data['age'] = 19
> data['address'] = '10, Bar Eggs Spam'
> data['value'] = 800
>
> rec_writer = io.DatumWriter(SCHEMA)
>
> df_writer = datafile.DataFileWriter(
> open(OUTFILE_NAME, 'ab'),
> rec_writer,
> writers_schema = SCHEMA,
> codec = 'deflate'
> )
>
> df_writer.append(data)
>
> df_writer.close()
>
> def read_avro_file():
> rec_reader = io.DatumReader()
>
> df_reader = datafile.DataFileReader(
> open(OUTFILE_NAME, "rb"),
> rec_reader
> )
>
> for record in df_reader:
> print record['name'], record['age']
> print record['address'], record['value']
>
>
> if __name__ == '__main__':
> # Write an AVRO file first
> write_avro_file()
> write_avro_file()
>
> # Now, read it
> read_avro_file()
>
>
> The result looks like
>
> Foo 19
> 10, Bar Eggs Spam 800
> Traceback (most recent call last):
> File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py",
> line 124, in <module>
> read_avro_file()
> File "/Users/felixgao/Desktop/workspace/Python/avro/avroExample1.py",
> line 112, in read_avro_file
> for record in df_reader:
> File
> "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/datafile.py",
> line 318, in next
> datum = self.datum_reader.read(self.datum_decoder)
> File
> "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
> line 411, in read
> return self.read_data(self.writers_schema, self.readers_schema,
> decoder)
> File
> "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
> line 456, in read_data
> return self.read_record(writers_schema, readers_schema, decoder)
> File
> "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
> line 648, in read_record
> field_val = self.read_data(field.type, readers_field.type, decoder)
> File
> "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
> line 434, in read_data
> return decoder.read_utf8()
> File
> "/opt/local/Library/Frameworks/Python.framework/Versions/2.6/lib/python2.6/site-packages/avro-1.3.3-py2.6.egg/avro/io.py",
> line 210, in read_utf8
> return unicode(self.read_bytes(), "utf-8")
> UnicodeDecodeError: 'utf8' codec can't decode bytes in position 14-15:
> invalid data
>
>
>
> if I remove the second write_avro_file() call then everything is fine. How
> to properly append more data into the file?
>
> Thanks,
>
> Felix
>
Re: Avro Python appending data
Posted by felix gao <gr...@gmail.com>.
thanks guys, I will test it out.
On Wed, Dec 22, 2010 at 8:17 PM, Harsh J <qw...@gmail.com> wrote:
> Sorry, minor error, not 'wb', but 'ab+'
> >
> > df_writer = datafile.DataFileWriter(
> > open(OUTFILE_NAME, 'ab+'),
> > io.DatumWriter(),
> > )
>
> --
> Harsh J
> www.harshj.com
>
Re: Avro Python appending data
Posted by Harsh J <qw...@gmail.com>.
Sorry, minor error, not 'wb', but 'ab+'
>
> df_writer = datafile.DataFileWriter(
> open(OUTFILE_NAME, 'ab+'),
> io.DatumWriter(),
> )
--
Harsh J
www.harshj.com
Re: Avro Python appending data
Posted by Harsh J <qw...@gmail.com>.
Hi,
On Thu, Dec 23, 2010 at 6:29 AM, felix gao <gr...@gmail.com> wrote:
> Hi all,
>
> I am having trouble adding more data into a file.
>
> Environment: Python 2.6.5, avro-1.3.3-py2.6
>
> Program looks like this
I see you've read my blog post on Avro+Python :P
http://www.harshj.com/2010/04/25/writing-and-reading-avro-data-files-using-python/
> if I remove the second write_avro_file() call then everything is fine. How
> to properly append more data into the file?
To append to an existing datafile, do not initialize the writer object
with a writers_schema again. Just create it using:
df_writer = datafile.DataFileWriter(
open(OUTFILE_NAME, 'wb'),
io.DatumWriter(),
)
--
Harsh J
www.harshj.com