You are viewing a plain text version of this content. The canonical link for it is here.
Posted to commits@arrow.apache.org by uw...@apache.org on 2018/05/01 17:03:32 UTC

[arrow] branch master updated: ARROW-2417: [Rust] Fix API safety issues

This is an automated email from the ASF dual-hosted git repository.

uwe pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/arrow.git


The following commit(s) were added to refs/heads/master by this push:
     new 0562d3b  ARROW-2417: [Rust] Fix API safety issues
0562d3b is described below

commit 0562d3b1eaa2ab21add20a08c1bbc43c1eab1481
Author: Andy Grove <an...@gmail.com>
AuthorDate: Tue May 1 19:03:25 2018 +0200

    ARROW-2417: [Rust] Fix API safety issues
    
    I reviewed all uses of unsafe in the API implementation and added appropriate assertions where needed to guarantee that the API we expose is safe. Also added tests to verify in some cases.
    
    Author: Andy Grove <an...@gmail.com>
    
    Closes #1957 from andygrove/api_safety and squashes the following commits:
    
    4d99cdfe <Andy Grove> changes based on PR feedback
    36927881 <Andy Grove> rust fmt
    5983b6df <Andy Grove> review builder api for safety, add tests
    ce143025 <Andy Grove> review buffer api for safety, add tests
    0e2606b2 <Andy Grove> Merge remote-tracking branch 'upstream/master'
    d883da2f <Andy Grove> Merge remote-tracking branch 'upstream/master'
    589ef71d <Andy Grove> Merge remote-tracking branch 'upstream/master'
    bd4fbb55 <Andy Grove> Merge remote-tracking branch 'upstream/master'
    9c8a10a4 <Andy Grove> Merge remote-tracking branch 'upstream/master'
    05592f8c <Andy Grove> Merge remote-tracking branch 'upstream/master'
    8c0e6982 <Andy Grove> Merge remote-tracking branch 'upstream/master'
    31ef90ba <Andy Grove> Merge remote-tracking branch 'upstream/master'
    2f87c703 <Andy Grove> Fix build - add missing import
---
 rust/src/buffer.rs  | 78 ++++++++++++++++++++++++++++++++++++-----------------
 rust/src/builder.rs | 59 +++++++++++++++++++++++++++-------------
 2 files changed, 95 insertions(+), 42 deletions(-)

diff --git a/rust/src/buffer.rs b/rust/src/buffer.rs
index cdfbfc9..e8168f2 100644
--- a/rust/src/buffer.rs
+++ b/rust/src/buffer.rs
@@ -32,7 +32,8 @@ pub struct Buffer<T> {
 }
 
 impl<T> Buffer<T> {
-    pub fn from_raw_parts(data: *const T, len: i32) -> Self {
+    /// create a buffer from an existing region of memory (must already be byte-aligned)
+    pub unsafe fn from_raw_parts(data: *const T, len: i32) -> Self {
         Buffer { data, len }
     }
 
@@ -41,26 +42,28 @@ impl<T> Buffer<T> {
         self.len
     }
 
+    /// Get a pointer to the data contained by the buffer
     pub fn data(&self) -> *const T {
         self.data
     }
 
     pub fn slice(&self, start: usize, end: usize) -> &[T] {
-        assert!(start <= end);
-        assert!(start <= self.len as usize);
         assert!(end <= self.len as usize);
+        assert!(start <= end);
         unsafe { slice::from_raw_parts(self.data.offset(start as isize), (end - start) as usize) }
     }
 
     /// Get a reference to the value at the specified offset
     pub fn get(&self, i: usize) -> &T {
+        assert!(i < self.len as usize);
         unsafe { &(*self.data.offset(i as isize)) }
     }
 
-    /// Deprecated method (used by Bitmap)
+    /// Write to a slot in the buffer
     pub fn set(&mut self, i: usize, v: T) {
+        assert!(i < self.len as usize);
+        let p = self.data as *mut T;
         unsafe {
-            let p = mem::transmute::<*const T, *mut T>(self.data);
             *p.offset(i as isize) = v;
         }
     }
@@ -75,12 +78,10 @@ impl<T> Buffer<T> {
     }
 }
 
+/// Release the underlying memory when the Buffer goes out of scope
 impl<T> Drop for Buffer<T> {
     fn drop(&mut self) {
-        unsafe {
-            let p = mem::transmute::<*const T, *const u8>(self.data);
-            free_aligned(p);
-        }
+        free_aligned(self.data as *const u8);
     }
 }
 
@@ -99,14 +100,16 @@ where
 
     fn next(&mut self) -> Option<Self::Item> {
         if self.index < self.len as isize {
+            let value = unsafe { *self.data.offset(self.index) };
             self.index += 1;
-            Some(unsafe { *self.data.offset(self.index - 1) })
+            Some(value)
         } else {
             None
         }
     }
 }
 
+/// Copy the memory from a Vec<T> into a newly allocated Buffer<T>
 macro_rules! array_from_primitive {
     ($DT:ty) => {
         impl From<Vec<$DT>> for Buffer<$DT> {
@@ -118,13 +121,9 @@ macro_rules! array_from_primitive {
                 Buffer {
                     len: len as i32,
                     data: unsafe {
-                        let dst = mem::transmute::<*const u8, *mut libc::c_void>(buffer);
-                        libc::memcpy(
-                            dst,
-                            mem::transmute::<*const $DT, *const libc::c_void>(v.as_ptr()),
-                            len * sz,
-                        );
-                        mem::transmute::<*mut libc::c_void, *const $DT>(dst)
+                        let dst = buffer as *mut libc::c_void;
+                        libc::memcpy(dst, v.as_ptr() as *const libc::c_void, len * sz);
+                        dst as *const $DT
                     },
                 }
             }
@@ -150,16 +149,12 @@ impl From<Bytes> for Buffer<u8> {
         let len = bytes.len();
         let sz = mem::size_of::<u8>();
         let buf_mem = allocate_aligned((len * sz) as i64).unwrap();
+        let dst = buf_mem as *mut libc::c_void;
         Buffer {
             len: len as i32,
             data: unsafe {
-                let dst = mem::transmute::<*const u8, *mut libc::c_void>(buf_mem);
-                libc::memcpy(
-                    dst,
-                    mem::transmute::<*const u8, *const libc::c_void>(bytes.as_ptr()),
-                    len * sz,
-                );
-                mem::transmute::<*mut libc::c_void, *const u8>(dst)
+                libc::memcpy(dst, bytes.as_ptr() as *const libc::c_void, len * sz);
+                dst as *mut u8
             },
         }
     }
@@ -237,4 +232,39 @@ mod tests {
             .collect::<Vec<i32>>();
         assert_eq!(c, vec![5, 8, 9, 8, 5]);
     }
+
+    #[test]
+    #[should_panic]
+    fn test_get_out_of_bounds() {
+        let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+        a.get(123); // should panic
+    }
+
+    #[test]
+    fn slice_empty_at_end() {
+        let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+        let s = a.slice(5, 5);
+        assert_eq!(0, s.len());
+    }
+
+    #[test]
+    #[should_panic]
+    fn slice_start_out_of_bounds() {
+        let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+        a.slice(6, 6); // should panic
+    }
+
+    #[test]
+    #[should_panic]
+    fn slice_end_out_of_bounds() {
+        let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+        a.slice(0, 6); // should panic
+    }
+
+    #[test]
+    #[should_panic]
+    fn slice_end_before_start() {
+        let a = Buffer::from(vec![1, 2, 3, 4, 5]);
+        a.slice(3, 2); // should panic
+    }
 }
diff --git a/rust/src/builder.rs b/rust/src/builder.rs
index ad0caec..2d5e321 100644
--- a/rust/src/builder.rs
+++ b/rust/src/builder.rs
@@ -44,7 +44,7 @@ impl<T> Builder<T> {
         Builder {
             len: 0,
             capacity,
-            data: unsafe { mem::transmute::<*const u8, *mut T>(buffer) },
+            data: buffer as *mut T,
         }
     }
 
@@ -60,9 +60,8 @@ impl<T> Builder<T> {
 
     /// Get the internal byte-aligned memory buffer as a mutable slice
     pub fn slice_mut(&mut self, start: usize, end: usize) -> &mut [T] {
-        assert!(start <= end);
-        assert!(start < self.capacity as usize);
         assert!(end <= self.capacity as usize);
+        assert!(start <= end);
         unsafe {
             slice::from_raw_parts_mut(self.data.offset(start as isize), (end - start) as usize)
         }
@@ -94,8 +93,8 @@ impl<T> Builder<T> {
         let sz = mem::size_of::<T>();
         unsafe {
             libc::memcpy(
-                mem::transmute::<*mut T, *mut libc::c_void>(self.data.offset(self.len() as isize)),
-                mem::transmute::<*const T, *const libc::c_void>(slice.as_ptr()),
+                self.data.offset(self.len() as isize) as *mut libc::c_void,
+                slice.as_ptr() as *const libc::c_void,
                 slice.len() * sz,
             );
         }
@@ -113,36 +112,33 @@ impl<T> Builder<T> {
     /// Grow the buffer to the new size n (number of elements of type T)
     fn grow(&mut self, new_capacity: usize) {
         let sz = mem::size_of::<T>();
+        let old_buffer = self.data;
+        let new_buffer = allocate_aligned((new_capacity * sz) as i64).unwrap();
         unsafe {
-            let old_buffer = self.data;
-            let new_buffer = allocate_aligned((new_capacity * sz) as i64).unwrap();
             libc::memcpy(
-                mem::transmute::<*const u8, *mut libc::c_void>(new_buffer),
-                mem::transmute::<*const T, *const libc::c_void>(old_buffer),
+                new_buffer as *mut libc::c_void,
+                old_buffer as *const libc::c_void,
                 self.len * sz,
             );
-            self.capacity = new_capacity;
-            self.data = mem::transmute::<*const u8, *mut T>(new_buffer);
-            free_aligned(mem::transmute::<*mut T, *const u8>(old_buffer));
         }
+        self.capacity = new_capacity;
+        self.data = new_buffer as *mut T;
+        free_aligned(old_buffer as *const u8);
     }
 
     /// Build a Buffer from the existing memory
     pub fn finish(&mut self) -> Buffer<T> {
         assert!(!self.data.is_null());
-        let p = unsafe { mem::transmute::<*mut T, *const T>(self.data) };
+        let p = self.data as *const T;
         self.data = ptr::null_mut(); // ensure builder cannot be re-used
-        Buffer::from_raw_parts(p, self.len as i32)
+        unsafe { Buffer::from_raw_parts(p, self.len as i32) }
     }
 }
 
 impl<T> Drop for Builder<T> {
     fn drop(&mut self) {
         if !self.data.is_null() {
-            unsafe {
-                let p = mem::transmute::<*const T, *const u8>(self.data);
-                free_aligned(p);
-            }
+            free_aligned(self.data as *const u8);
         }
     }
 }
@@ -214,4 +210,31 @@ mod tests {
         assert_eq!("Hello, World!", s);
     }
 
+    #[test]
+    fn test_slice_empty_at_end() {
+        let mut b: Builder<u8> = Builder::with_capacity(2);
+        let s = b.slice_mut(2, 2);
+        assert_eq!(0, s.len());
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_slice_start_out_of_bounds() {
+        let mut b: Builder<u8> = Builder::with_capacity(2);
+        b.slice_mut(3, 3); // should panic
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_slice_end_out_of_bounds() {
+        let mut b: Builder<u8> = Builder::with_capacity(2);
+        b.slice_mut(0, 3); // should panic
+    }
+
+    #[test]
+    #[should_panic]
+    fn test_slice_end_before_start() {
+        let mut b: Builder<u8> = Builder::with_capacity(2);
+        b.slice_mut(1, 0); // should panic
+    }
 }

-- 
To stop receiving notification emails like this one, please contact
uwe@apache.org.