2525use std:: sync:: Arc ;
2626
2727use arrow:: array:: {
28- Array , ArrayRef , AsArray , GenericListArray , OffsetSizeTrait , StringBuilder ,
28+ Array , ArrayRef , AsArray , BinaryArray , GenericListArray , LargeBinaryArray ,
29+ OffsetSizeTrait , StringBuilder ,
2930} ;
3031use arrow:: datatypes:: DataType ;
3132use datafusion_common:: cast:: as_generic_string_array;
@@ -120,7 +121,11 @@ impl ScalarUDFImpl for SparkConcatWs {
120121
121122 for dt in & arg_types[ 1 ..] {
122123 match dt {
123- DataType :: Utf8 | DataType :: LargeUtf8 | DataType :: Utf8View => {
124+ DataType :: Utf8
125+ | DataType :: LargeUtf8
126+ | DataType :: Utf8View
127+ | DataType :: Binary
128+ | DataType :: LargeBinary => {
124129 coerced. push ( dt. clone ( ) ) ;
125130 }
126131 DataType :: List ( _) | DataType :: LargeList ( _) => {
@@ -225,6 +230,22 @@ fn collect_parts(arr: &ArrayRef, row_idx: usize, parts: &mut Vec<String>) -> Res
225230 let str_arr = arr. as_string_view ( ) ;
226231 parts. push ( str_arr. value ( row_idx) . to_string ( ) ) ;
227232 }
233+ DataType :: Binary => {
234+ let bin_arr = arr. as_any ( ) . downcast_ref :: < BinaryArray > ( ) . ok_or_else (
235+ || datafusion_common:: DataFusionError :: Execution (
236+ "failed to downcast to BinaryArray" . to_string ( ) ,
237+ ) ,
238+ ) ?;
239+ parts. push ( binary_to_utf8 ( bin_arr. value ( row_idx) ) ?) ;
240+ }
241+ DataType :: LargeBinary => {
242+ let bin_arr = arr. as_any ( ) . downcast_ref :: < LargeBinaryArray > ( ) . ok_or_else (
243+ || datafusion_common:: DataFusionError :: Execution (
244+ "failed to downcast to LargeBinaryArray" . to_string ( ) ,
245+ ) ,
246+ ) ?;
247+ parts. push ( binary_to_utf8 ( bin_arr. value ( row_idx) ) ?) ;
248+ }
228249 DataType :: List ( _) => {
229250 collect_parts_from_list :: < i32 > ( arr. as_list :: < i32 > ( ) , row_idx, parts) ?;
230251 }
@@ -238,6 +259,17 @@ fn collect_parts(arr: &ArrayRef, row_idx: usize, parts: &mut Vec<String>) -> Res
238259 Ok ( ( ) )
239260}
240261
262+ /// Convert binary bytes to UTF-8 string, matching core `concat`/`concat_ws` behavior.
263+ fn binary_to_utf8 ( bytes : & [ u8 ] ) -> Result < String > {
264+ std:: str:: from_utf8 ( bytes)
265+ . map ( |s| s. to_string ( ) )
266+ . map_err ( |_| {
267+ datafusion_common:: DataFusionError :: Execution (
268+ "invalid UTF-8 in binary literal" . to_string ( ) ,
269+ )
270+ } )
271+ }
272+
241273/// Collect string parts from a list array at a given row index.
242274fn collect_parts_from_list < O : OffsetSizeTrait > (
243275 arr : & GenericListArray < O > ,
0 commit comments