diff --git a/rust/arrow/src/array/array_list.rs b/rust/arrow/src/array/array_list.rs index 75b8a4827e5d..08fb8c2bca6c 100644 --- a/rust/arrow/src/array/array_list.rs +++ b/rust/arrow/src/array/array_list.rs @@ -580,11 +580,11 @@ mod tests { let sliced_array = list_array.slice(1, 6); assert_eq!(6, sliced_array.len()); - assert_eq!(1, sliced_array.offset()); + assert_eq!(0, sliced_array.offset()); assert_eq!(3, sliced_array.null_count()); for i in 0..sliced_array.len() { - if bit_util::get_bit(&null_bits, sliced_array.offset() + i) { + if bit_util::get_bit(&null_bits, 1 + i) { assert!(sliced_array.is_valid(i)); } else { assert!(sliced_array.is_null(i)); @@ -594,11 +594,11 @@ mod tests { // Check offset and length for each non-null value. let sliced_list_array = sliced_array.as_any().downcast_ref::().unwrap(); - assert_eq!(2, sliced_list_array.value_offset(2)); + assert_eq!(0, sliced_list_array.value_offset(2)); assert_eq!(2, sliced_list_array.value_length(2)); - assert_eq!(4, sliced_list_array.value_offset(3)); + assert_eq!(2, sliced_list_array.value_offset(3)); assert_eq!(2, sliced_list_array.value_length(3)); - assert_eq!(6, sliced_list_array.value_offset(5)); + assert_eq!(4, sliced_list_array.value_offset(5)); assert_eq!(3, sliced_list_array.value_length(5)); } @@ -645,11 +645,11 @@ mod tests { let sliced_array = list_array.slice(1, 6); assert_eq!(6, sliced_array.len()); - assert_eq!(1, sliced_array.offset()); + assert_eq!(0, sliced_array.offset()); assert_eq!(3, sliced_array.null_count()); for i in 0..sliced_array.len() { - if bit_util::get_bit(&null_bits, sliced_array.offset() + i) { + if bit_util::get_bit(&null_bits, 1 + i) { assert!(sliced_array.is_valid(i)); } else { assert!(sliced_array.is_null(i)); @@ -661,11 +661,11 @@ mod tests { .as_any() .downcast_ref::() .unwrap(); - assert_eq!(2, sliced_list_array.value_offset(2)); + assert_eq!(0, sliced_list_array.value_offset(2)); assert_eq!(2, sliced_list_array.value_length(2)); - assert_eq!(4, sliced_list_array.value_offset(3)); + assert_eq!(2, sliced_list_array.value_offset(3)); assert_eq!(2, sliced_list_array.value_length(3)); - assert_eq!(6, sliced_list_array.value_offset(5)); + assert_eq!(4, sliced_list_array.value_offset(5)); assert_eq!(3, sliced_list_array.value_length(5)); } diff --git a/rust/arrow/src/array/array_primitive.rs b/rust/arrow/src/array/array_primitive.rs index 0bdc3e51d991..018dd7650ce8 100644 --- a/rust/arrow/src/array/array_primitive.rs +++ b/rust/arrow/src/array/array_primitive.rs @@ -673,7 +673,7 @@ mod tests { let arr2 = arr.slice(2, 5); assert_eq!(5, arr2.len()); - assert_eq!(2, arr2.offset()); + assert_eq!(0, arr2.offset()); assert_eq!(1, arr2.null_count()); for i in 0..arr2.len() { @@ -683,7 +683,7 @@ mod tests { let arr3 = arr2.slice(2, 3); assert_eq!(3, arr3.len()); - assert_eq!(4, arr3.offset()); + assert_eq!(0, arr3.offset()); assert_eq!(0, arr3.null_count()); let int_arr = arr3.as_any().downcast_ref::().unwrap(); @@ -713,7 +713,7 @@ mod tests { let arr2 = arr.slice(3, 5); assert_eq!(5, arr2.len()); - assert_eq!(3, arr2.offset()); + assert_eq!(0, arr2.offset()); assert_eq!(1, arr2.null_count()); let bool_arr = arr2.as_any().downcast_ref::().unwrap(); diff --git a/rust/arrow/src/array/array_struct.rs b/rust/arrow/src/array/array_struct.rs index 50e7eea3db6b..8ce6f81f942a 100644 --- a/rust/arrow/src/array/array_struct.rs +++ b/rust/arrow/src/array/array_struct.rs @@ -123,11 +123,11 @@ impl TryFrom> for StructArray { len = Some(child_datum_len) } child_data.push(child_datum.clone()); - fields.push(Field::new( - field_name, - array.data_type().clone(), - child_datum.null_buffer().is_some(), - )); + let null_buffer = child_datum.null_buffer(); + // A field is nullable if the null buffer is Some and not all bits are set. + let nullable = null_buffer.is_some() + && null_buffer.unwrap().count_set_bits() < child_datum.len(); + fields.push(Field::new(field_name, array.data_type().clone(), nullable)); if let Some(child_null_buffer) = child_datum.null_buffer() { let child_datum_offset = child_datum.offset(); @@ -485,7 +485,7 @@ mod tests { let sliced_array = struct_array.slice(2, 3); let sliced_array = sliced_array.as_any().downcast_ref::().unwrap(); assert_eq!(3, sliced_array.len()); - assert_eq!(2, sliced_array.offset()); + assert_eq!(0, sliced_array.offset()); assert_eq!(1, sliced_array.null_count()); assert!(sliced_array.is_valid(0)); assert!(sliced_array.is_null(1)); @@ -494,7 +494,7 @@ mod tests { let sliced_c0 = sliced_array.column(0); let sliced_c0 = sliced_c0.as_any().downcast_ref::().unwrap(); assert_eq!(3, sliced_c0.len()); - assert_eq!(2, sliced_c0.offset()); + assert_eq!(0, sliced_c0.offset()); assert!(sliced_c0.is_null(0)); assert!(sliced_c0.is_null(1)); assert!(sliced_c0.is_valid(2)); @@ -503,7 +503,7 @@ mod tests { let sliced_c1 = sliced_array.column(1); let sliced_c1 = sliced_c1.as_any().downcast_ref::().unwrap(); assert_eq!(3, sliced_c1.len()); - assert_eq!(2, sliced_c1.offset()); + assert_eq!(0, sliced_c1.offset()); assert!(sliced_c1.is_valid(0)); assert_eq!(42, sliced_c1.value(0)); assert!(sliced_c1.is_null(1)); diff --git a/rust/arrow/src/array/data.rs b/rust/arrow/src/array/data.rs index ecfda3d5d5b3..cc2f2654b418 100644 --- a/rust/arrow/src/array/data.rs +++ b/rust/arrow/src/array/data.rs @@ -25,7 +25,7 @@ use crate::buffer::Buffer; use crate::datatypes::DataType; use crate::{bitmap::Bitmap, datatypes::ArrowNativeType}; -use super::equal::equal; +use super::{equal::equal, MutableArrayData}; #[inline] pub(crate) fn count_nulls( @@ -219,17 +219,15 @@ impl ArrayData { /// /// Panics if `offset + length > self.len()`. pub fn slice(&self, offset: usize, length: usize) -> ArrayData { - assert!((offset + length) <= self.len()); - - let mut new_data = self.clone(); - - new_data.len = length; - new_data.offset = offset + self.offset; - - new_data.null_count = - count_nulls(new_data.null_buffer(), new_data.offset, new_data.len); + self.copy_range(offset, length) + } - new_data + fn copy_range(&self, offset: usize, length: usize) -> ArrayData { + assert!((offset + length) <= self.len()); + let arrays = vec![self]; + let mut mutable = MutableArrayData::new(arrays, false, length); + mutable.extend(0, offset, offset + length); + mutable.freeze() } /// Returns the `buffer` as a slice of type `T` starting at self.offset @@ -428,28 +426,45 @@ mod tests { bit_util::set_bit(&mut bit_v, 0); bit_util::set_bit(&mut bit_v, 3); bit_util::set_bit(&mut bit_v, 10); + let data_vec = vec![0_u8; 64]; // align to 64 bytes let data = ArrayData::builder(DataType::Int32) .len(16) .null_bit_buffer(Buffer::from(bit_v)) + .add_buffer(Buffer::from(data_vec)) .build(); let data = data.as_ref(); let new_data = data.slice(1, 15); assert_eq!(data.len() - 1, new_data.len()); - assert_eq!(1, new_data.offset()); + assert_eq!(0, new_data.offset()); assert_eq!(data.null_count(), new_data.null_count()); // slice of a slice (removes one null) let new_data = new_data.slice(1, 14); assert_eq!(data.len() - 2, new_data.len()); - assert_eq!(2, new_data.offset()); + assert_eq!(0, new_data.offset()); assert_eq!(data.null_count() - 1, new_data.null_count()); } #[test] - fn test_equality() { - let int_data = ArrayData::builder(DataType::Int32).build(); - let float_data = ArrayData::builder(DataType::Float32).build(); - assert_ne!(int_data, float_data); + #[should_panic(expected = "index out of bounds: the len is 0 but the index is 0")] + fn test_slice_equality() { + let mut bit_v: [u8; 1] = [0; 1]; + bit_util::set_bit(&mut bit_v, 1); + let data = ArrayData::builder(DataType::Int32) + .len(2) + .null_bit_buffer(Buffer::from(bit_v)) + .build(); + let data_slice = data.slice(1, 1); + + let mut bit_v: [u8; 1] = [0; 1]; + bit_util::set_bit(&mut bit_v, 0); + let arc_data = ArrayData::builder(DataType::Int32) + .len(1) + .null_bit_buffer(Buffer::from(bit_v)) + .build(); + let expected = arc_data.as_ref(); + + assert_eq!(&data_slice, expected); } #[test] diff --git a/rust/arrow/src/array/equal/mod.rs b/rust/arrow/src/array/equal/mod.rs index 33977b496943..bb7fe6e920f3 100644 --- a/rust/arrow/src/array/equal/mod.rs +++ b/rust/arrow/src/array/equal/mod.rs @@ -562,6 +562,28 @@ mod tests { test_equal(a.as_ref(), b.as_ref(), false); } + #[test] + // Code created by @alamb, copied from https://github.com/apache/arrow/pull/9211 + fn test_string_offset_larger() { + let a = + StringArray::from(vec![Some("a"), None, Some("b"), None, Some("c")]).data(); + let b = StringArray::from(vec![None, Some("b"), None, Some("c")]).data(); + + test_equal(&a.slice(2, 2), &b.slice(0, 2), false); + test_equal(&a.slice(2, 2), &b.slice(1, 2), true); + test_equal(&a.slice(2, 2), &b.slice(2, 2), false); + } + + #[test] + // Code created by @jhorstman copied from https://issues.apache.org/jira/browse/ARROW-11267 + fn test_list_different_offsets() { + let a = create_list_array(&[Some(&[0, 0]), Some(&[1, 2]), Some(&[3, 4])]); + let b = create_list_array(&[Some(&[1, 2]), Some(&[3, 4]), Some(&[5, 6])]); + let a_slice = a.slice(1, 2); + let b_slice = b.slice(0, 2); + test_equal(&a_slice, &b_slice, true); + } + // Test the case where null_count > 0 #[test] fn test_list_null() { diff --git a/rust/arrow/src/array/null.rs b/rust/arrow/src/array/null.rs index 08c7cf1f21ef..1fa68314f09a 100644 --- a/rust/arrow/src/array/null.rs +++ b/rust/arrow/src/array/null.rs @@ -144,7 +144,7 @@ mod tests { let array2 = array1.slice(8, 16); assert_eq!(array2.len(), 16); assert_eq!(array2.null_count(), 16); - assert_eq!(array2.offset(), 8); + assert_eq!(array2.offset(), 0); } #[test] diff --git a/rust/arrow/src/array/transform/mod.rs b/rust/arrow/src/array/transform/mod.rs index 4e428f26697a..80406f9d8e87 100644 --- a/rust/arrow/src/array/transform/mod.rs +++ b/rust/arrow/src/array/transform/mod.rs @@ -814,7 +814,65 @@ mod tests { } #[test] - fn test_struct() { + // should fail with assert error: not equal. + fn array_bug_1() { + let strings: ArrayRef = Arc::new(StringArray::from(vec![None, Some("b")])); + // ArrayData::slice() is not bug-free + let strings_slice = strings.slice(1, 1); + let strings2: ArrayRef = Arc::new(StringArray::from(vec![Some("b")])); + + assert_eq!( + &strings2, + &strings_slice, + "\n##1: {:?}\n##2: {:?}", + strings2.data(), + strings_slice.data() + ); + } + + #[test] + // should fail with panic: index out of range. + fn array_bug_2() { + let strings: ArrayRef = + Arc::new(StringArray::from(vec![Some("a"), None, Some("b")])); + // ArrayData::slice() is not bug-free + let strings_slice = strings.slice(2, 1); + let strings2: ArrayRef = Arc::new(StringArray::from(vec![Some("b")])); + + assert_eq!( + &strings2, + &strings_slice, + "\n##1: {:?}\n##2: {:?}", + strings2.data(), + strings_slice.data() + ); + } + + // should pass. + #[test] + fn array_bug_3() { + let strings: ArrayRef = + Arc::new(StringArray::from(vec![Some("a"), None, Some("b")])); + + // use copy_range(), see bellow. + let strings_slice = copy_range(&strings, 2, 1); + let strings2: ArrayRef = Arc::new(StringArray::from(vec![Some("b")])); + + // failed: index out of range + assert_eq!(&strings2, &strings_slice); + } + + fn copy_range(array_ref: &ArrayRef, offset: usize, length: usize) -> ArrayRef { + let data_ref = array_ref.data(); + let arrays = vec![&*data_ref]; + let mut mutable = MutableArrayData::new(arrays, false, length); + mutable.extend(0, offset, offset + length); + let new_array = mutable.freeze(); + crate::array::make_array(Arc::new(new_array)) + } + + #[test] + fn struct_array_bug() { let strings: ArrayRef = Arc::new(StringArray::from(vec![ Some("joe"), None, @@ -822,6 +880,50 @@ mod tests { Some("mark"), Some("doe"), ])); + + let array = StructArray::try_from(vec![("f1", strings.clone())]) + .unwrap() + .data(); + let arrays = vec![array.as_ref()]; + let mut mutable = MutableArrayData::new(arrays, false, 0); + + mutable.extend(0, 1, 3); + let data = mutable.freeze(); + let array = StructArray::from(Arc::new(data)); + + let expected = StructArray::try_from(vec![("f1", strings.slice(1, 2))]).unwrap(); + assert_eq!(array, expected) + } + + #[test] + fn test_struct_bug_2() { + let ints: ArrayRef = Arc::new(Int32Array::from(vec![Some(1), Some(2), Some(3)])); + + let array = StructArray::try_from(vec![("f1", ints.clone())]) + .unwrap() + .data(); + + let arrays = vec![array.as_ref()]; + let mut mutable = MutableArrayData::new(arrays, false, 0); + + mutable.extend(0, 1, 3); + let data = mutable.freeze(); + let array = StructArray::from(Arc::new(data)); + + let expected = StructArray::try_from(vec![("f1", ints.slice(1, 2))]).unwrap(); + + assert_eq!(array, expected); + } + + #[test] + fn test_struct() { + let strings: ArrayRef = Arc::new(StringArray::from(vec![ + None, + Some("joe"), + None, + Some("mark"), + Some("doe"), + ])); let ints: ArrayRef = Arc::new(Int32Array::from(vec![ Some(1), Some(2), diff --git a/rust/arrow/src/compute/kernels/cast.rs b/rust/arrow/src/compute/kernels/cast.rs index 40b33fc649de..fa91194cba0a 100644 --- a/rust/arrow/src/compute/kernels/cast.rs +++ b/rust/arrow/src/compute/kernels/cast.rs @@ -1225,7 +1225,7 @@ mod tests { let array = Arc::new(a) as ArrayRef; assert_eq!(0, array.offset()); let array = array.slice(2, 3); - assert_eq!(2, array.offset()); + assert_eq!(0, array.offset()); let b = cast(&array, &DataType::UInt8).unwrap(); assert_eq!(3, b.len()); assert_eq!(0, b.offset());