You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@avro.apache.org by mg...@apache.org on 2022/09/27 12:53:18 UTC

[avro] branch avro-3630-append-to-preexisting-bytes updated (fabb82b96 -> 74f1d89ea)

This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a change to branch avro-3630-append-to-preexisting-bytes
in repository https://gitbox.apache.org/repos/asf/avro.git


 discard fabb82b96 AVRO-3630: [Rust] Make it possible to extend pre-existing Avro bytes
     new 74f1d89ea AVRO-3630: [Rust] Make it possible to extend pre-existing Avro bytes

This update added new revisions after undoing existing revisions.
That is to say, some revisions that were in the old version of the
branch are not in the new version.  This situation occurs
when a user --force pushes a change and generates a repository
containing something like this:

 * -- * -- B -- O -- O -- O   (fabb82b96)
            \
             N -- N -- N   refs/heads/avro-3630-append-to-preexisting-bytes (74f1d89ea)

You should already have received notification emails for all of the O
revisions, and so the following emails describe only the N revisions
from the common base, B.

Any revisions marked "omit" are not gone; other references still
refer to them.  Any revisions marked "discard" are gone forever.

The 1 revisions listed above as "new" are entirely new to this
repository and will be described in separate emails.  The revisions
listed as "add" were already present in the repository and have only
been added to this reference.


Summary of changes:
 lang/rust/avro/tests/append_to_existing.rs | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)


[avro] 01/01: AVRO-3630: [Rust] Make it possible to extend pre-existing Avro bytes

Posted by mg...@apache.org.
This is an automated email from the ASF dual-hosted git repository.

mgrigorov pushed a commit to branch avro-3630-append-to-preexisting-bytes
in repository https://gitbox.apache.org/repos/asf/avro.git

commit 74f1d89ea93e171eb92aabc8ef7a866575aea116
Author: Martin Tzvetanov Grigorov <mg...@apache.org>
AuthorDate: Tue Sep 27 15:43:36 2022 +0300

    AVRO-3630: [Rust] Make it possible to extend pre-existing Avro bytes
    
    Make it possible to pass a block marker to Writer, so that it could
    append to pre-existing bytes (i.e. bytes created by another Writer)
    
    Signed-off-by: Martin Tzvetanov Grigorov <mg...@apache.org>
---
 lang/rust/avro/src/lib.rs                  |  4 +-
 lang/rust/avro/src/writer.rs               | 42 +++++++++++++--
 lang/rust/avro/tests/append_to_existing.rs | 86 ++++++++++++++++++++++++++++++
 3 files changed, 127 insertions(+), 5 deletions(-)

diff --git a/lang/rust/avro/src/lib.rs b/lang/rust/avro/src/lib.rs
index 679eb1cf5..9edb3cb31 100644
--- a/lang/rust/avro/src/lib.rs
+++ b/lang/rust/avro/src/lib.rs
@@ -746,7 +746,9 @@ pub use reader::{from_avro_datum, GenericSingleObjectReader, Reader, SpecificSin
 pub use schema::{AvroSchema, Schema};
 pub use ser::to_value;
 pub use util::max_allocation_bytes;
-pub use writer::{to_avro_datum, GenericSingleObjectWriter, SpecificSingleObjectWriter, Writer};
+pub use writer::{
+    read_marker, to_avro_datum, GenericSingleObjectWriter, SpecificSingleObjectWriter, Writer,
+};
 
 #[cfg(feature = "derive")]
 pub use apache_avro_derive::*;
diff --git a/lang/rust/avro/src/writer.rs b/lang/rust/avro/src/writer.rs
index 98ceafe46..83b58c927 100644
--- a/lang/rust/avro/src/writer.rs
+++ b/lang/rust/avro/src/writer.rs
@@ -47,7 +47,7 @@ pub struct Writer<'a, W> {
     serializer: Serializer,
     #[builder(default = 0, setter(skip))]
     num_values: usize,
-    #[builder(default = generate_sync_marker(), setter(skip))]
+    #[builder(default = generate_sync_marker())]
     marker: Vec<u8>,
     #[builder(default = false, setter(skip))]
     has_header: bool,
@@ -60,9 +60,7 @@ impl<'a, W: Write> Writer<'a, W> {
     /// to.
     /// No compression `Codec` will be used.
     pub fn new(schema: &'a Schema, writer: W) -> Self {
-        let mut w = Self::builder().schema(schema).writer(writer).build();
-        w.resolved_schema = ResolvedSchema::try_from(schema).ok();
-        w
+        Writer::with_codec(schema, writer, Codec::Null)
     }
 
     /// Creates a `Writer` with a specific `Codec` given a `Schema` and something implementing the
@@ -77,6 +75,33 @@ impl<'a, W: Write> Writer<'a, W> {
         w
     }
 
+    /// Creates a `Writer` that will append values to already populated
+    /// `std::io::Write` using the provided `marker`
+    /// No compression `Codec` will be used.
+    pub fn extend_from(schema: &'a Schema, writer: W, marker: Vec<u8>) -> Self {
+        Writer::extend_from_with_codec(schema, writer, Codec::Null, marker)
+    }
+
+    /// Creates a `Writer` that will append values to already populated
+    /// `std::io::Write` using the provided `marker`
+    pub fn extend_from_with_codec(
+        schema: &'a Schema,
+        writer: W,
+        codec: Codec,
+        marker: Vec<u8>,
+    ) -> Self {
+        assert_eq!(marker.len(), 16);
+        let mut w = Self::builder()
+            .schema(schema)
+            .writer(writer)
+            .codec(codec)
+            .marker(marker)
+            .build();
+        w.has_header = true;
+        w.resolved_schema = ResolvedSchema::try_from(schema).ok();
+        w
+    }
+
     /// Get a reference to the `Schema` associated to a `Writer`.
     pub fn schema(&self) -> &'a Schema {
         self.schema
@@ -513,6 +538,15 @@ pub fn to_avro_datum<T: Into<Value>>(schema: &Schema, value: T) -> AvroResult<Ve
     Ok(buffer)
 }
 
+/// Reads the marker bytes from Avro bytes generated earlier by a `Writer`
+pub fn read_marker(bytes: &[u8]) -> Vec<u8> {
+    assert!(
+        bytes.len() > 16,
+        "The bytes are too short to read a marker from them"
+    );
+    bytes[(bytes.len() - 16)..].to_vec()
+}
+
 #[cfg(not(target_arch = "wasm32"))]
 fn generate_sync_marker() -> Vec<u8> {
     std::iter::repeat_with(rand::random).take(16).collect()
diff --git a/lang/rust/avro/tests/append_to_existing.rs b/lang/rust/avro/tests/append_to_existing.rs
new file mode 100644
index 000000000..862aa0f49
--- /dev/null
+++ b/lang/rust/avro/tests/append_to_existing.rs
@@ -0,0 +1,86 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+use apache_avro::{
+    read_marker,
+    types::{Record, Value},
+    AvroResult, Reader, Schema, Writer,
+};
+
+#[test]
+fn append_to_an_existing_file() {
+    let schema_str = r#"
+            {
+                "type": "record",
+                "name": "append_to_existing_file",
+                "fields": [
+                    {"name": "a", "type": "int"}
+                ]
+            }
+        "#;
+
+    let schema = Schema::parse_str(schema_str).expect("Cannot parse the schema");
+
+    let bytes = get_avro_bytes(&schema);
+
+    let marker = read_marker(&bytes);
+
+    let mut writer = Writer::extend_from(&schema, bytes, marker);
+
+    writer
+        .append(create_datum(&schema, 2))
+        .expect("An error occurred while appending more data");
+
+    let new_bytes = writer.into_inner().expect("Cannot get the new bytes");
+
+    let reader = Reader::new(&*new_bytes).expect("Cannot read the new bytes");
+    let mut i = 1;
+    for value in reader {
+        check(value, i);
+        i += 1
+    }
+}
+
+/// Simulates reading from a pre-existing .avro file and returns its bytes
+fn get_avro_bytes(schema: &Schema) -> Vec<u8> {
+    let mut writer = Writer::new(schema, Vec::new());
+    writer
+        .append(create_datum(schema, 1))
+        .expect("An error while appending data");
+    writer.into_inner().expect("Cannot get the Avro bytes")
+}
+
+/// Creates a new datum to write
+fn create_datum(schema: &Schema, value: i32) -> Record {
+    let mut datum = Record::new(schema).unwrap();
+    datum.put("a", value);
+    datum
+}
+
+/// Checks the read values
+fn check(v: AvroResult<Value>, expected: i32) {
+    match v {
+        Ok(val) => match val {
+            Value::Record(fields) => match &fields[0] {
+                (_, Value::Int(actual)) => assert_eq!(&expected, actual),
+                _ => unreachable!("The field value type must be an Int!"),
+            },
+            _ => unreachable!("The value type must be an Record!"),
+        },
+        Err(e) => eprintln!("Error white reading the data: {:?}", e),
+    }
+}