4040#if defined(SAF_USE_APPLE_ACCELERATE_LP64 ) || defined(SAF_USE_APPLE_ACCELERATE_ILP64 )
4141# include "AvailabilityVersions.h"
4242# ifdef __MAC_12_0
43- // # define SAF_USE_INTERLEAVED_VDSP /**< New interleaved implementation as of macOS 12.0+. UNFINISHED+UNTESTED! */
43+ # define SAF_USE_INTERLEAVED_VDSP /**< New interleaved implementation as of macOS 12.0+ */
4444# endif
4545#endif
4646
@@ -81,6 +81,7 @@ typedef struct _saf_rfft_data {
8181# ifdef SAF_USE_INTERLEAVED_VDSP
8282 vDSP_DFT_Interleaved_Setup DFT_fwd ;
8383 vDSP_DFT_Interleaved_Setup DFT_bwd ;
84+ float * tempBuffer ;
8485# else
8586 vDSP_DFT_Setup DFT_fwd ;
8687 vDSP_DFT_Setup DFT_bwd ;
@@ -572,8 +573,8 @@ void saf_rfft_create
572573 ippFree (h -> memInit );
573574#elif defined(SAF_USE_APPLE_ACCELERATE_LP64 ) || defined(SAF_USE_APPLE_ACCELERATE_ILP64 )
574575# ifdef SAF_USE_INTERLEAVED_VDSP
575- h -> DFT_fwd = vDSP_DFT_Interleaved_CreateSetup (0 , N , vDSP_DFT_FORWARD , vDSP_DFT_Interleaved_RealtoComplex );
576- h -> DFT_bwd = vDSP_DFT_Interleaved_CreateSetup (0 , N , vDSP_DFT_INVERSE , vDSP_DFT_Interleaved_RealtoComplex );
576+ h -> DFT_fwd = vDSP_DFT_Interleaved_CreateSetup (0 , N / 2 , vDSP_DFT_FORWARD , vDSP_DFT_Interleaved_RealtoComplex );
577+ h -> DFT_bwd = vDSP_DFT_Interleaved_CreateSetup (0 , N / 2 , vDSP_DFT_INVERSE , vDSP_DFT_Interleaved_RealtoComplex );
577578# else
578579 h -> DFT_fwd = vDSP_DFT_zrop_CreateSetup (0 , N , vDSP_DFT_FORWARD );
579580 h -> DFT_bwd = vDSP_DFT_zrop_CreateSetup (0 , N , vDSP_DFT_INVERSE );
@@ -583,7 +584,9 @@ void saf_rfft_create
583584 else {
584585 /* Note that DFT lengths must satisfy: f * 2.^g, where f is 1, 3, 5, or 15, and g >=4 */
585586 saf_assert (h -> DFT_fwd != 0 && h -> DFT_bwd != 0 , "Failed to create vDSP DFT" );
586- # ifndef SAF_USE_INTERLEAVED_VDSP
587+ # ifdef SAF_USE_INTERLEAVED_VDSP
588+ h -> tempBuffer = malloc1d (2 * (h -> N /2 + 1 )* sizeof (float ));
589+ # else
587590 h -> VDSP_split_tmp .realp = malloc1d ((h -> N /2 )* sizeof (float ));
588591 h -> VDSP_split_tmp .imagp = malloc1d ((h -> N /2 )* sizeof (float ));
589592 h -> VDSP_split .realp = malloc1d ((h -> N /2 )* sizeof (float ));
@@ -643,6 +646,7 @@ void saf_rfft_destroy
643646# ifdef SAF_USE_INTERLEAVED_VDSP
644647 vDSP_DFT_Interleaved_DestroySetup (h -> DFT_fwd );
645648 vDSP_DFT_Interleaved_DestroySetup (h -> DFT_bwd );
649+ free (h -> tempBuffer );
646650# else
647651 vDSP_DFT_DestroySetup (h -> DFT_fwd );
648652 vDSP_DFT_DestroySetup (h -> DFT_bwd );
@@ -687,7 +691,9 @@ void saf_rfft_forward
687691#elif defined(SAF_USE_APPLE_ACCELERATE_LP64 ) || defined(SAF_USE_APPLE_ACCELERATE_ILP64 )
688692 if (!h -> useKissFFT_FLAG ){
689693# ifdef SAF_USE_INTERLEAVED_VDSP
690- saf_print_error ("Not implemented yet" );
694+ vDSP_DFT_Interleaved_Execute (h -> DFT_fwd , (DSPComplex * )inputTD , (DSPComplex * )outputFD );
695+ outputFD [h -> N /2 ] = cmplxf (((float * )(& outputFD [0 ]))[1 ], 0.0f );
696+ outputFD [0 ] = cmplxf (((float * )(& outputFD [0 ]))[0 ], 0.0f );
691697# else
692698 vDSP_ctoz ((DSPComplex * )inputTD , 2 , & (h -> VDSP_split_tmp ), 1 , (h -> N )/2 );
693699 vDSP_DFT_Execute (h -> DFT_fwd , h -> VDSP_split_tmp .realp , h -> VDSP_split_tmp .imagp , h -> VDSP_split .realp , h -> VDSP_split .imagp );
@@ -733,16 +739,19 @@ void saf_rfft_backward
733739#elif defined(SAF_USE_APPLE_ACCELERATE_LP64 ) || defined(SAF_USE_APPLE_ACCELERATE_ILP64 )
734740 if (!h -> useKissFFT_FLAG ){
735741# ifdef SAF_USE_INTERLEAVED_VDSP
736- saf_print_error ("Not implemented yet" );
742+ memcpy (h -> tempBuffer , inputFD , (h -> N /2 + 1 )* sizeof (float_complex ));
743+ h -> tempBuffer [1 /*imag*/ ] = crealf (inputFD [h -> N /2 ]);
744+ vDSP_DFT_Interleaved_Execute (h -> DFT_bwd , (DSPComplex * )h -> tempBuffer , (DSPComplex * )outputTD );
737745# else
738746 h -> VDSP_split_tmp .realp [0 ] = crealf (inputFD [0 ]);
739747 h -> VDSP_split_tmp .imagp [0 ] = crealf (inputFD [h -> N /2 ]);
740748 cblas_scopy (h -> N /2 - 1 , & ((float * )(inputFD ))[2 ], 2 , & h -> VDSP_split_tmp .realp [1 ], 1 );
741749 cblas_scopy (h -> N /2 - 1 , & ((float * )(inputFD ))[3 ], 2 , & h -> VDSP_split_tmp .imagp [1 ], 1 );
742750 vDSP_DFT_Execute (h -> DFT_bwd , h -> VDSP_split_tmp .realp , h -> VDSP_split_tmp .imagp , h -> VDSP_split .realp , h -> VDSP_split .imagp );
743751 vDSP_ztoc (& (h -> VDSP_split ), 1 , (DSPComplex * )outputTD , 2 , (h -> N )/2 );
744- vDSP_vsmul ( outputTD , 1 , & ( h -> Scale ), outputTD , 1 , h -> N );
752+
745753# endif
754+ vDSP_vsmul (outputTD , 1 , & (h -> Scale ), outputTD , 1 , h -> N );
746755 }
747756#elif defined(SAF_USE_INTEL_MKL_LP64 ) || defined(SAF_USE_INTEL_MKL_ILP64 )
748757 h -> Status = DftiComputeBackward (h -> MKL_FFT_Handle , inputFD , outputTD );
@@ -802,8 +811,8 @@ void saf_fft_create
802811 ippFree (h -> memInit );
803812#elif defined(SAF_USE_APPLE_ACCELERATE_LP64 ) || defined(SAF_USE_APPLE_ACCELERATE_ILP64 )
804813# ifdef SAF_USE_INTERLEAVED_VDSP
805- h -> DFT_fwd = vDSP_DFT_Interleaved_CreateSetup (0 , N , vDSP_DFT_FORWARD , vDSP_DFT_Interleaved_RealtoComplex );
806- h -> DFT_bwd = vDSP_DFT_Interleaved_CreateSetup (0 , N , vDSP_DFT_INVERSE , vDSP_DFT_Interleaved_RealtoComplex );
814+ h -> DFT_fwd = vDSP_DFT_Interleaved_CreateSetup (0 , N , vDSP_DFT_FORWARD , vDSP_DFT_Interleaved_ComplextoComplex );
815+ h -> DFT_bwd = vDSP_DFT_Interleaved_CreateSetup (0 , N , vDSP_DFT_INVERSE , vDSP_DFT_Interleaved_ComplextoComplex );
807816# else
808817 h -> DFT_fwd = vDSP_DFT_zop_CreateSetup (0 , N , vDSP_DFT_FORWARD );
809818 h -> DFT_bwd = vDSP_DFT_zop_CreateSetup (0 , N , vDSP_DFT_INVERSE );
@@ -915,7 +924,7 @@ void saf_fft_forward
915924#elif defined(SAF_USE_APPLE_ACCELERATE_LP64 ) || defined(SAF_USE_APPLE_ACCELERATE_ILP64 )
916925 if (!h -> useKissFFT_FLAG ){
917926# ifdef SAF_USE_INTERLEAVED_VDSP
918- saf_print_error ( "Not implemented yet" );
927+ vDSP_DFT_Interleaved_Execute ( h -> DFT_fwd , ( DSPComplex * ) inputTD , ( DSPComplex * ) outputFD );
919928# else
920929 cblas_scopy (h -> N , & ((float * )(inputTD ))[0 ], 2 , h -> VDSP_split_tmp .realp , 1 );
921930 cblas_scopy (h -> N , & ((float * )(inputTD ))[1 ], 2 , h -> VDSP_split_tmp .imagp , 1 );
@@ -953,15 +962,15 @@ void saf_fft_backward
953962#elif defined(SAF_USE_APPLE_ACCELERATE_LP64 ) || defined(SAF_USE_APPLE_ACCELERATE_ILP64 )
954963 if (!h -> useKissFFT_FLAG ){
955964# ifdef SAF_USE_INTERLEAVED_VDSP
956- saf_print_error ( "Not implemented yet" );
965+ vDSP_DFT_Interleaved_Execute ( h -> DFT_bwd , ( DSPComplex * ) inputFD , ( DSPComplex * ) outputTD );
957966# else
958967 cblas_scopy (h -> N , & ((float * )(inputFD ))[0 ], 2 , h -> VDSP_split_tmp .realp , 1 );
959968 cblas_scopy (h -> N , & ((float * )(inputFD ))[1 ], 2 , h -> VDSP_split_tmp .imagp , 1 );
960969 vDSP_DFT_Execute (h -> DFT_bwd , h -> VDSP_split_tmp .realp , h -> VDSP_split_tmp .imagp , h -> VDSP_split .realp , h -> VDSP_split .imagp );
961970 cblas_scopy (h -> N , h -> VDSP_split .realp , 1 , & ((float * )(outputTD ))[0 ], 2 );
962971 cblas_scopy (h -> N , h -> VDSP_split .imagp , 1 , & ((float * )(outputTD ))[1 ], 2 );
963- cblas_sscal (/*re+im*/ 2 * (h -> N ), 1.0f /(float )(h -> N ), (float * )outputTD , 1 );
964972# endif
973+ cblas_sscal (/*re+im*/ 2 * (h -> N ), 1.0f /(float )(h -> N ), (float * )outputTD , 1 );
965974 }
966975#elif defined(SAF_USE_INTEL_MKL_LP64 ) || defined(SAF_USE_INTEL_MKL_ILP64 )
967976 h -> Status = DftiComputeBackward (h -> MKL_FFT_Handle , inputFD , outputTD );
0 commit comments