Skip to content

Commit ddd0b57

Browse files
committed
fix
1 parent b251419 commit ddd0b57

1 file changed

Lines changed: 34 additions & 2 deletions

File tree

datafusion/spark/src/function/string/concat_ws.rs

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,7 +25,8 @@
2525
use std::sync::Arc;
2626

2727
use arrow::array::{
28-
Array, ArrayRef, AsArray, GenericListArray, OffsetSizeTrait, StringBuilder,
28+
Array, ArrayRef, AsArray, BinaryArray, GenericListArray, LargeBinaryArray,
29+
OffsetSizeTrait, StringBuilder,
2930
};
3031
use arrow::datatypes::DataType;
3132
use datafusion_common::cast::as_generic_string_array;
@@ -120,7 +121,11 @@ impl ScalarUDFImpl for SparkConcatWs {
120121

121122
for dt in &arg_types[1..] {
122123
match dt {
123-
DataType::Utf8 | DataType::LargeUtf8 | DataType::Utf8View => {
124+
DataType::Utf8
125+
| DataType::LargeUtf8
126+
| DataType::Utf8View
127+
| DataType::Binary
128+
| DataType::LargeBinary => {
124129
coerced.push(dt.clone());
125130
}
126131
DataType::List(_) | DataType::LargeList(_) => {
@@ -225,6 +230,22 @@ fn collect_parts(arr: &ArrayRef, row_idx: usize, parts: &mut Vec<String>) -> Res
225230
let str_arr = arr.as_string_view();
226231
parts.push(str_arr.value(row_idx).to_string());
227232
}
233+
DataType::Binary => {
234+
let bin_arr = arr.as_any().downcast_ref::<BinaryArray>().ok_or_else(
235+
|| datafusion_common::DataFusionError::Execution(
236+
"failed to downcast to BinaryArray".to_string(),
237+
),
238+
)?;
239+
parts.push(binary_to_utf8(bin_arr.value(row_idx))?);
240+
}
241+
DataType::LargeBinary => {
242+
let bin_arr = arr.as_any().downcast_ref::<LargeBinaryArray>().ok_or_else(
243+
|| datafusion_common::DataFusionError::Execution(
244+
"failed to downcast to LargeBinaryArray".to_string(),
245+
),
246+
)?;
247+
parts.push(binary_to_utf8(bin_arr.value(row_idx))?);
248+
}
228249
DataType::List(_) => {
229250
collect_parts_from_list::<i32>(arr.as_list::<i32>(), row_idx, parts)?;
230251
}
@@ -238,6 +259,17 @@ fn collect_parts(arr: &ArrayRef, row_idx: usize, parts: &mut Vec<String>) -> Res
238259
Ok(())
239260
}
240261

262+
/// Convert binary bytes to UTF-8 string, matching core `concat`/`concat_ws` behavior.
263+
fn binary_to_utf8(bytes: &[u8]) -> Result<String> {
264+
std::str::from_utf8(bytes)
265+
.map(|s| s.to_string())
266+
.map_err(|_| {
267+
datafusion_common::DataFusionError::Execution(
268+
"invalid UTF-8 in binary literal".to_string(),
269+
)
270+
})
271+
}
272+
241273
/// Collect string parts from a list array at a given row index.
242274
fn collect_parts_from_list<O: OffsetSizeTrait>(
243275
arr: &GenericListArray<O>,

0 commit comments

Comments
 (0)