Skip to content

Commit d334bf6

Browse files
Extend sqllogictest framework to uptake custom datafusion.format.* settings
1 parent d68373e commit d334bf6

5 files changed

Lines changed: 138 additions & 33 deletions

File tree

datafusion/sqllogictest/src/engines/datafusion_engine/normalize.rs

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -20,10 +20,9 @@ use super::error::{DFSqlLogicTestError, Result};
2020
use crate::engines::output::DFColumnType;
2121
use arrow::array::{Array, AsArray};
2222
use arrow::datatypes::{Fields, Schema};
23-
use arrow::util::display::ArrayFormatter;
23+
use arrow::util::display::{ArrayFormatter, FormatOptions};
2424
use arrow::{array, array::ArrayRef, datatypes::DataType, record_batch::RecordBatch};
2525
use datafusion::common::internal_datafusion_err;
26-
use datafusion::config::ConfigField;
2726
use std::path::PathBuf;
2827
use std::sync::LazyLock;
2928

@@ -32,6 +31,7 @@ pub fn convert_batches(
3231
schema: &Schema,
3332
batches: Vec<RecordBatch>,
3433
is_spark_path: bool,
34+
format_options: &FormatOptions<'_>,
3535
) -> Result<Vec<Vec<String>>> {
3636
let mut rows = vec![];
3737
for batch in batches {
@@ -50,7 +50,7 @@ pub fn convert_batches(
5050
batch
5151
.columns()
5252
.iter()
53-
.map(|col| cell_to_string(col, row, is_spark_path))
53+
.map(|col| cell_to_string(col, row, is_spark_path, format_options))
5454
.collect::<Result<Vec<String>>>()
5555
})
5656
.collect::<Result<Vec<Vec<String>>>>()?
@@ -185,7 +185,12 @@ macro_rules! get_row_value {
185185
/// [NULL Values and empty strings]: https://duckdb.org/dev/sqllogictest/result_verification#null-values-and-empty-strings
186186
///
187187
/// Floating numbers are rounded to have a consistent representation with the Postgres runner.
188-
pub fn cell_to_string(col: &ArrayRef, row: usize, is_spark_path: bool) -> Result<String> {
188+
pub fn cell_to_string(
189+
col: &ArrayRef,
190+
row: usize,
191+
is_spark_path: bool,
192+
format_options: &FormatOptions<'_>,
193+
) -> Result<String> {
189194
if col.is_null(row) {
190195
// represent any null value with the string "NULL"
191196
Ok(NULL_STR.to_string())
@@ -233,18 +238,15 @@ pub fn cell_to_string(col: &ArrayRef, row: usize, is_spark_path: bool) -> Result
233238
DataType::Dictionary(_, _) => {
234239
let dict = col.as_any_dictionary();
235240
let key = dict.normalized_keys()[row];
236-
Ok(cell_to_string(dict.values(), key, is_spark_path)?)
241+
Ok(cell_to_string(
242+
dict.values(),
243+
key,
244+
is_spark_path,
245+
format_options,
246+
)?)
237247
}
238248
_ => {
239-
let mut datafusion_format_options =
240-
datafusion::config::FormatOptions::default();
241-
242-
datafusion_format_options.set("null", "NULL").unwrap();
243-
244-
let arrow_format_options: arrow::util::display::FormatOptions =
245-
(&datafusion_format_options).try_into().unwrap();
246-
247-
let f = ArrayFormatter::try_new(col.as_ref(), &arrow_format_options)?;
249+
let f = ArrayFormatter::try_new(col.as_ref(), format_options)?;
248250

249251
Ok(f.value(row).to_string())
250252
}

datafusion/sqllogictest/src/engines/datafusion_engine/runner.rs

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -22,7 +22,7 @@ use std::{path::PathBuf, time::Duration};
2222
use super::{DFSqlLogicTestError, error::Result, normalize};
2323
use crate::engines::currently_executed_sql::CurrentlyExecutingSqlTracker;
2424
use crate::engines::output::{DFColumnType, DFOutput};
25-
use crate::is_spark_path;
25+
use crate::{get_format_options, is_spark_path};
2626
use arrow::record_batch::RecordBatch;
2727
use async_trait::async_trait;
2828
use datafusion::physical_plan::common::collect;
@@ -212,7 +212,12 @@ async fn run_query(
212212
let stream = execute_stream(plan, task_ctx)?;
213213
let types = normalize::convert_schema_to_types(stream.schema().fields());
214214
let results: Vec<RecordBatch> = collect(stream).await?;
215-
let rows = normalize::convert_batches(&schema, results, is_spark_path)?;
215+
216+
let df_format = get_format_options(ctx)?;
217+
let arrow_format: arrow::util::display::FormatOptions<'_> =
218+
(&df_format).try_into()?;
219+
let rows =
220+
normalize::convert_batches(&schema, results, is_spark_path, &arrow_format)?;
216221

217222
if rows.is_empty() && types.is_empty() {
218223
Ok(DBOutput::StatementComplete(0))

datafusion/sqllogictest/src/engines/datafusion_substrait_roundtrip_engine/runner.rs

Lines changed: 9 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,9 +21,12 @@ use std::{path::PathBuf, time::Duration};
2121
use crate::engines::currently_executed_sql::CurrentlyExecutingSqlTracker;
2222
use crate::engines::datafusion_engine::Result;
2323
use crate::engines::output::{DFColumnType, DFOutput};
24-
use crate::{DFSqlLogicTestError, convert_batches, convert_schema_to_types};
24+
use crate::{
25+
DFSqlLogicTestError, convert_batches, convert_schema_to_types, get_format_options,
26+
};
2527
use arrow::record_batch::RecordBatch;
2628
use async_trait::async_trait;
29+
use datafusion::config::ConfigField;
2730
use datafusion::logical_expr::LogicalPlan;
2831
use datafusion::physical_plan::common::collect;
2932
use datafusion::physical_plan::execute_stream;
@@ -166,7 +169,11 @@ async fn run_query_substrait_round_trip(
166169
let stream = execute_stream(physical_plan, task_ctx)?;
167170
let types = convert_schema_to_types(stream.schema().fields());
168171
let results: Vec<RecordBatch> = collect(stream).await?;
169-
let rows = convert_batches(&schema, results, false)?;
172+
173+
let df_format = get_format_options(ctx)?;
174+
let arrow_format: arrow::util::display::FormatOptions<'_> =
175+
(&df_format).try_into()?;
176+
let rows = convert_batches(&schema, results, false, &arrow_format)?;
170177

171178
if rows.is_empty() && types.is_empty() {
172179
Ok(DBOutput::StatementComplete(0))

datafusion/sqllogictest/src/util.rs

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
// under the License.
1717

1818
use datafusion::common::{Result, exec_datafusion_err};
19+
use datafusion::config::{ConfigField, FormatOptions};
20+
use datafusion::prelude::SessionContext;
1921
use itertools::Itertools;
2022
use log::Level::Warn;
2123
use log::{info, log_enabled, warn};
@@ -141,6 +143,13 @@ pub fn is_spark_path(relative_path: &Path) -> bool {
141143
relative_path.starts_with("spark/")
142144
}
143145

146+
// Get passed custom FormatOptions by SessionContext to be used for sqllogictest
147+
pub fn get_format_options(ctx: &SessionContext) -> Result<FormatOptions> {
148+
let mut df_format = ctx.state().config().options().format.clone();
149+
df_format.set("null", "NULL")?;
150+
Ok(df_format)
151+
}
152+
144153
#[cfg(test)]
145154
mod tests {
146155
use super::*;

datafusion/sqllogictest/test_files/set_variable.slt

Lines changed: 97 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -395,7 +395,7 @@ datafusion.format.timestamp_format
395395
datafusion.format.timestamp_tz_format
396396
datafusion.format.types_info
397397

398-
# date_format: SET / SHOW / RESET / SHOW
398+
# date_format: query result display uses session format (default: %Y-%m-%d)
399399
statement ok
400400
SET datafusion.format.date_format = '%d-%m-%Y'
401401

@@ -404,6 +404,11 @@ SHOW datafusion.format.date_format
404404
----
405405
datafusion.format.date_format %d-%m-%Y
406406

407+
query D
408+
SELECT DATE '2026-04-07'
409+
----
410+
07-04-2026
411+
407412
statement ok
408413
RESET datafusion.format.date_format
409414

@@ -412,14 +417,23 @@ SHOW datafusion.format.date_format
412417
----
413418
datafusion.format.date_format %Y-%m-%d
414419

415-
# datetime_format
420+
query D
421+
SELECT DATE '2026-04-07'
422+
----
423+
2026-04-07
424+
425+
# datetime_format (default: %Y-%m-%dT%H:%M:%S%.f)
416426
statement ok
417-
SET datafusion.format.datetime_format = '%Y/%m/%d %H:%M:%S'
427+
SET datafusion.format.datetime_format = '%d-%m-%YT%H:%M:%S'
418428

419429
query TT
420430
SHOW datafusion.format.datetime_format
421431
----
422-
datafusion.format.datetime_format %Y/%m/%d %H:%M:%S
432+
datafusion.format.datetime_format %d-%m-%YT%H:%M:%S
433+
434+
# DATETIME literals are not implemented in the SQL parser yet.
435+
query error DataFusion error: This feature is not implemented: Unsupported SQL type DATETIME
436+
SELECT DATETIME '2026-04-07 00:10:00';
423437

424438
statement ok
425439
RESET datafusion.format.datetime_format
@@ -429,14 +443,19 @@ SHOW datafusion.format.datetime_format
429443
----
430444
datafusion.format.datetime_format %Y-%m-%dT%H:%M:%S%.f
431445

432-
# timestamp_format
446+
# timestamp_format (default: %Y-%m-%dT%H:%M:%S%.f)
433447
statement ok
434-
SET datafusion.format.timestamp_format = '%FT%H:%M:%S'
448+
SET datafusion.format.timestamp_format = '%d-%m-%YT%H:%M:%S'
435449

436450
query TT
437451
SHOW datafusion.format.timestamp_format
438452
----
439-
datafusion.format.timestamp_format %FT%H:%M:%S
453+
datafusion.format.timestamp_format %d-%m-%YT%H:%M:%S
454+
455+
query P
456+
SELECT TIMESTAMP '2026-04-07 13:31:00';
457+
----
458+
07-04-2026T13:31:00
440459

441460
statement ok
442461
RESET datafusion.format.timestamp_format
@@ -446,7 +465,12 @@ SHOW datafusion.format.timestamp_format
446465
----
447466
datafusion.format.timestamp_format %Y-%m-%dT%H:%M:%S%.f
448467

449-
# timestamp_tz_format (default NULL)
468+
query P
469+
SELECT TIMESTAMP '2026-04-07 13:31:00';
470+
----
471+
2026-04-07T13:31:00
472+
473+
# timestamp_tz_format (default: NULL)
450474
statement ok
451475
SET datafusion.format.timestamp_tz_format = '%Y-%m-%d %H:%M:%S %z'
452476

@@ -455,6 +479,11 @@ SHOW datafusion.format.timestamp_tz_format
455479
----
456480
datafusion.format.timestamp_tz_format %Y-%m-%d %H:%M:%S %z
457481

482+
query P
483+
SELECT TIMESTAMPTZ '2026-04-07 13:31:00';
484+
----
485+
2026-04-07T13:31:00
486+
458487
statement ok
459488
RESET datafusion.format.timestamp_tz_format
460489

@@ -463,14 +492,19 @@ SHOW datafusion.format.timestamp_tz_format
463492
----
464493
datafusion.format.timestamp_tz_format NULL
465494

466-
# time_format
495+
# time_format (default: %H:%M:%S%.f)
467496
statement ok
468-
SET datafusion.format.time_format = '%H-%M-%S'
497+
SET datafusion.format.time_format = '%S-%M-%H'
469498

470499
query TT
471500
SHOW datafusion.format.time_format
472501
----
473-
datafusion.format.time_format %H-%M-%S
502+
datafusion.format.time_format %S-%M-%H
503+
504+
query D
505+
SELECT TIME '01:02:12.123' AS time;
506+
----
507+
12-02-01
474508

475509
statement ok
476510
RESET datafusion.format.time_format
@@ -480,7 +514,12 @@ SHOW datafusion.format.time_format
480514
----
481515
datafusion.format.time_format %H:%M:%S%.f
482516

483-
# duration_format: values are normalized to lowercase; ISO8601 and pretty are valid
517+
query D
518+
SELECT TIME '01:02:12.123' AS time;
519+
----
520+
01:02:12.123
521+
522+
# duration_format: (default: pretty) values are normalized to lowercase; ISO8601 and pretty are valid
484523
statement ok
485524
SET datafusion.format.duration_format = ISO8601
486525

@@ -489,6 +528,12 @@ SHOW datafusion.format.duration_format
489528
----
490529
datafusion.format.duration_format iso8601
491530

531+
# Session duration_format controls display of Duration columns (not SQL INTERVAL)
532+
query ?
533+
SELECT arrow_cast(3661, 'Duration(Second)');
534+
----
535+
PT3661S
536+
492537
statement ok
493538
SET datafusion.format.duration_format to 'PRETTY'
494539

@@ -497,6 +542,11 @@ SHOW datafusion.format.duration_format
497542
----
498543
datafusion.format.duration_format pretty
499544

545+
query ?
546+
SELECT arrow_cast(3661, 'Duration(Second)');
547+
----
548+
0 days 1 hours 1 mins 1 secs
549+
500550
statement ok
501551
RESET datafusion.format.duration_format
502552

@@ -505,7 +555,29 @@ SHOW datafusion.format.duration_format
505555
----
506556
datafusion.format.duration_format pretty
507557

508-
# null display string
558+
query ?
559+
SELECT arrow_cast(3661, 'Duration(Second)');
560+
----
561+
0 days 1 hours 1 mins 1 secs
562+
563+
# Case-insensitive duration_format variable name
564+
statement ok
565+
SET datafusion.FORMAT.DURATION_FORMAT = 'ISO8601'
566+
567+
query TT
568+
SHOW datafusion.format.duration_format
569+
----
570+
datafusion.format.duration_format iso8601
571+
572+
query ?
573+
SELECT arrow_cast(61, 'Duration(Second)');
574+
----
575+
PT61S
576+
577+
statement ok
578+
RESET datafusion.format.duration_format
579+
580+
# null display string (default: (empty))
509581
statement ok
510582
SET datafusion.format.null = 'NuLL'
511583

@@ -522,7 +594,7 @@ SHOW datafusion.format.null
522594
----
523595
datafusion.format.null (empty)
524596

525-
# safe
597+
# safe (default: true)
526598
statement ok
527599
SET datafusion.format.safe = false
528600

@@ -539,7 +611,7 @@ SHOW datafusion.format.safe
539611
----
540612
datafusion.format.safe true
541613

542-
# types_info
614+
# types_info (default: false)
543615
statement ok
544616
SET datafusion.format.types_info to true
545617

@@ -565,6 +637,11 @@ SHOW datafusion.format.date_format
565637
----
566638
datafusion.format.date_format %m/%d/%Y
567639

640+
query D
641+
SELECT DATE '2026-04-07';
642+
----
643+
04/07/2026
644+
568645
statement ok
569646
RESET datafusion.format.date_format
570647

@@ -573,6 +650,11 @@ SHOW datafusion.format.date_format
573650
----
574651
datafusion.format.date_format %Y-%m-%d
575652

653+
query D
654+
SELECT DATE '2026-04-07';
655+
----
656+
2026-04-07
657+
576658
# Invalid format option name
577659
statement error DataFusion error: Invalid or Unsupported Configuration: Config value "unknown_option" not found on FormatOptions
578660
SET datafusion.format.unknown_option = true

0 commit comments

Comments
 (0)