You are viewing a plain text version of this content. The canonical link for it is here.
Posted to dev@avro.apache.org by "Sean Busbey (JIRA)" <ji...@apache.org> on 2014/03/31 20:40:15 UTC

[jira] [Updated] (AVRO-1490) GenericDatumWriter throws exception for nullable schema fields

     [ https://issues.apache.org/jira/browse/AVRO-1490?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]

Sean Busbey updated AVRO-1490:
------------------------------

    Description: 
providing sample code and fix

Sample Schema:
{code}
{"namespace": "AvroSample",
 "type": "record",
 "name": "User",
 "fields": [
     {"name": "name", "type": "string"},
     {"name": "favorite_number",  "type": ["int", "null"]},
     {"name": "favorite_color", "type": ["string", "null"]}
 ]
}
{code}

Sample code to recreate
{code}
namespace AvroSample
{
    class Program
    {
        static void Main(string[] args)
        {
            Schema schema = LoadSchemaFromFile(@"user.avpr");

            GenericRecord user1 = new GenericRecord((RecordSchema)schema);
            user1.Add("name", "Alyssa");
            user1.Add("favorite_number", 256);
            // Leave favorite color null

            GenericRecord user2 = new GenericRecord((RecordSchema)schema);
            user2.Add("name", "Ben");
            user2.Add("favorite_number", 7);
            user2.Add("favorite_color", "red");

            // Serialize user1 and user2 to disk
            String outPath = @"C:\\users.avro";

            GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
            IFileWriter<GenericRecord> dataFileWriter = DataFileWriter<GenericRecord>.OpenWriter(datumWriter, outPath);

            dataFileWriter.Append(user1);
            dataFileWriter.Append(user2);
            dataFileWriter.Dispose();

            // Deserialize users from disk
            
            IFileReader<User> dataFileReader = DataFileReader<User>.OpenReader(outPath);//, schema);
            User user = null;
            while (dataFileReader.HasNext())
            {
                // Reuse user object by passing it to next(). This saves us from
                // allocating and garbage collecting many objects for files with
                // many items.
                user = dataFileReader.Next();
                Console.WriteLine("Name={0}", user.name);  
                Console.WriteLine(user.ToString());
            }

            Console.WriteLine("Press any key to continue...");
            Console.ReadKey();
        }

        static Schema LoadSchema(string json)
        {
            Schema schema = Schema.Parse(json);
            return schema;
        }

        static Schema LoadSchemaFromFile(string filePath)
        {
            String json = File.ReadAllText(filePath, Encoding.UTF8);
            return LoadSchema(json);
        }
    }
}

{code}

Code that fixes issue:
{code}
protected override void WriteRecordFields(object recordObj, RecordFieldWriter[] writers, Encoder encoder)
        {
            var record = (GenericRecord) recordObj;
            foreach (var writer in writers)
            {
                  // Change from 
                  // writer.WriteField(record[writer.Field.Name], encoder);
                  // to
                object result = null;
                record.TryGetValue(writer.Field.Name, out result);
                writer.WriteField(result, encoder);
            }
        }
}
{code}

  was:
providing sample code and fix

{"namespace": "AvroSample",
 "type": "record",
 "name": "User",
 "fields": [
     {"name": "name", "type": "string"},
     {"name": "favorite_number",  "type": ["int", "null"]},
     {"name": "favorite_color", "type": ["string", "null"]}
 ]
}

Sample code
namespace AvroSample
{
    class Program
    {
        static void Main(string[] args)
        {
            Schema schema = LoadSchemaFromFile(@"user.avpr");

            GenericRecord user1 = new GenericRecord((RecordSchema)schema);
            user1.Add("name", "Alyssa");
            user1.Add("favorite_number", 256);
            // Leave favorite color null

            GenericRecord user2 = new GenericRecord((RecordSchema)schema);
            user2.Add("name", "Ben");
            user2.Add("favorite_number", 7);
            user2.Add("favorite_color", "red");

            // Serialize user1 and user2 to disk
            String outPath = @"C:\\users.avro";

            GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
            IFileWriter<GenericRecord> dataFileWriter = DataFileWriter<GenericRecord>.OpenWriter(datumWriter, outPath);

            dataFileWriter.Append(user1);
            dataFileWriter.Append(user2);
            dataFileWriter.Dispose();

            // Deserialize users from disk
            
            IFileReader<User> dataFileReader = DataFileReader<User>.OpenReader(outPath);//, schema);
            User user = null;
            while (dataFileReader.HasNext())
            {
                // Reuse user object by passing it to next(). This saves us from
                // allocating and garbage collecting many objects for files with
                // many items.
                user = dataFileReader.Next();
                Console.WriteLine("Name={0}", user.name);  
                Console.WriteLine(user.ToString());
            }

            Console.WriteLine("Press any key to continue...");
            Console.ReadKey();
        }

        static Schema LoadSchema(string json)
        {
            Schema schema = Schema.Parse(json);
            return schema;
        }

        static Schema LoadSchemaFromFile(string filePath)
        {
            String json = File.ReadAllText(filePath, Encoding.UTF8);
            return LoadSchema(json);
        }
    }
}

Code that fixes issue:

protected override void WriteRecordFields(object recordObj, RecordFieldWriter[] writers, Encoder encoder)
        {
            var record = (GenericRecord) recordObj;
            foreach (var writer in writers)
            {
                  // Change from 
                  // writer.WriteField(record[writer.Field.Name], encoder);
                  // to
                object result = null;
                record.TryGetValue(writer.Field.Name, out result);
                writer.WriteField(result, encoder);
            }
        }
}


> GenericDatumWriter throws exception for nullable schema fields
> --------------------------------------------------------------
>
>                 Key: AVRO-1490
>                 URL: https://issues.apache.org/jira/browse/AVRO-1490
>             Project: Avro
>          Issue Type: Bug
>          Components: csharp
>    Affects Versions: 1.7.6
>         Environment: Windows, C#, .Net 4.5
>            Reporter: Johan Sundstrom
>
> providing sample code and fix
> Sample Schema:
> {code}
> {"namespace": "AvroSample",
>  "type": "record",
>  "name": "User",
>  "fields": [
>      {"name": "name", "type": "string"},
>      {"name": "favorite_number",  "type": ["int", "null"]},
>      {"name": "favorite_color", "type": ["string", "null"]}
>  ]
> }
> {code}
> Sample code to recreate
> {code}
> namespace AvroSample
> {
>     class Program
>     {
>         static void Main(string[] args)
>         {
>             Schema schema = LoadSchemaFromFile(@"user.avpr");
>             GenericRecord user1 = new GenericRecord((RecordSchema)schema);
>             user1.Add("name", "Alyssa");
>             user1.Add("favorite_number", 256);
>             // Leave favorite color null
>             GenericRecord user2 = new GenericRecord((RecordSchema)schema);
>             user2.Add("name", "Ben");
>             user2.Add("favorite_number", 7);
>             user2.Add("favorite_color", "red");
>             // Serialize user1 and user2 to disk
>             String outPath = @"C:\\users.avro";
>             GenericDatumWriter<GenericRecord> datumWriter = new GenericDatumWriter<GenericRecord>(schema);
>             IFileWriter<GenericRecord> dataFileWriter = DataFileWriter<GenericRecord>.OpenWriter(datumWriter, outPath);
>             dataFileWriter.Append(user1);
>             dataFileWriter.Append(user2);
>             dataFileWriter.Dispose();
>             // Deserialize users from disk
>             
>             IFileReader<User> dataFileReader = DataFileReader<User>.OpenReader(outPath);//, schema);
>             User user = null;
>             while (dataFileReader.HasNext())
>             {
>                 // Reuse user object by passing it to next(). This saves us from
>                 // allocating and garbage collecting many objects for files with
>                 // many items.
>                 user = dataFileReader.Next();
>                 Console.WriteLine("Name={0}", user.name);  
>                 Console.WriteLine(user.ToString());
>             }
>             Console.WriteLine("Press any key to continue...");
>             Console.ReadKey();
>         }
>         static Schema LoadSchema(string json)
>         {
>             Schema schema = Schema.Parse(json);
>             return schema;
>         }
>         static Schema LoadSchemaFromFile(string filePath)
>         {
>             String json = File.ReadAllText(filePath, Encoding.UTF8);
>             return LoadSchema(json);
>         }
>     }
> }
> {code}
> Code that fixes issue:
> {code}
> protected override void WriteRecordFields(object recordObj, RecordFieldWriter[] writers, Encoder encoder)
>         {
>             var record = (GenericRecord) recordObj;
>             foreach (var writer in writers)
>             {
>                   // Change from 
>                   // writer.WriteField(record[writer.Field.Name], encoder);
>                   // to
>                 object result = null;
>                 record.TryGetValue(writer.Field.Name, out result);
>                 writer.WriteField(result, encoder);
>             }
>         }
> }
> {code}



--
This message was sent by Atlassian JIRA
(v6.2#6252)