@@ -427,3 +427,173 @@ def test_cache(self, spark):
427427 assert df is not cached
428428 assert cached .collect () == df .collect ()
429429 assert cached .collect () == [Row (one = 1 , two = 2 , three = 3 , four = 4 )]
430+
431+ def test_dtypes (self , spark ):
432+ data = [("Alice" , 25 , 5000.0 ), ("Bob" , 30 , 6000.0 )]
433+ df = spark .createDataFrame (data , ["name" , "age" , "salary" ])
434+ dtypes = df .dtypes
435+
436+ assert isinstance (dtypes , list )
437+ assert len (dtypes ) == 3
438+ for col_name , col_type in dtypes :
439+ assert isinstance (col_name , str )
440+ assert isinstance (col_type , str )
441+
442+ col_names = [name for name , _ in dtypes ]
443+ assert col_names == ["name" , "age" , "salary" ]
444+ for _ , col_type in dtypes :
445+ assert len (col_type ) > 0
446+
447+ def test_dtypes_complex_types (self , spark ):
448+ from spark_namespace .sql .types import ArrayType , IntegerType , StringType , StructField , StructType
449+
450+ schema = StructType (
451+ [
452+ StructField ("name" , StringType (), True ),
453+ StructField ("scores" , ArrayType (IntegerType ()), True ),
454+ StructField (
455+ "address" ,
456+ StructType ([StructField ("city" , StringType (), True ), StructField ("zip" , StringType (), True )]),
457+ True ,
458+ ),
459+ ]
460+ )
461+ data = [
462+ ("Alice" , [90 , 85 , 88 ], {"city" : "NYC" , "zip" : "10001" }),
463+ ("Bob" , [75 , 80 , 82 ], {"city" : "LA" , "zip" : "90001" }),
464+ ]
465+ df = spark .createDataFrame (data , schema )
466+ dtypes = df .dtypes
467+
468+ assert len (dtypes ) == 3
469+ col_names = [name for name , _ in dtypes ]
470+ assert col_names == ["name" , "scores" , "address" ]
471+
472+ def test_printSchema (self , spark , capsys ):
473+ data = [("Alice" , 25 , 5000 ), ("Bob" , 30 , 6000 )]
474+ df = spark .createDataFrame (data , ["name" , "age" , "salary" ])
475+ df .printSchema ()
476+ captured = capsys .readouterr ()
477+ output = captured .out
478+
479+ assert "root" in output
480+ assert "name" in output
481+ assert "age" in output
482+ assert "salary" in output
483+ assert "string" in output or "varchar" in output .lower ()
484+ assert "int" in output .lower () or "bigint" in output .lower ()
485+
486+ def test_printSchema_nested (self , spark , capsys ):
487+ from spark_namespace .sql .types import ArrayType , IntegerType , StringType , StructField , StructType
488+
489+ schema = StructType (
490+ [
491+ StructField ("id" , IntegerType (), True ),
492+ StructField (
493+ "person" ,
494+ StructType ([StructField ("name" , StringType (), True ), StructField ("age" , IntegerType (), True )]),
495+ True ,
496+ ),
497+ StructField ("hobbies" , ArrayType (StringType ()), True ),
498+ ]
499+ )
500+ data = [
501+ (1 , {"name" : "Alice" , "age" : 25 }, ["reading" , "coding" ]),
502+ (2 , {"name" : "Bob" , "age" : 30 }, ["gaming" , "music" ]),
503+ ]
504+ df = spark .createDataFrame (data , schema )
505+ df .printSchema ()
506+ captured = capsys .readouterr ()
507+ output = captured .out
508+
509+ assert "root" in output
510+ assert "person" in output
511+ assert "hobbies" in output
512+
513+ def test_printSchema_negative_level (self , spark ):
514+ data = [("Alice" , 25 )]
515+ df = spark .createDataFrame (data , ["name" , "age" ])
516+
517+ with pytest .raises (PySparkValueError ):
518+ df .printSchema (level = - 1 )
519+
520+ def test_treeString_basic (self , spark ):
521+ data = [("Alice" , 25 , 5000 )]
522+ df = spark .createDataFrame (data , ["name" , "age" , "salary" ])
523+ tree = df .schema .treeString ()
524+
525+ assert tree .startswith ("root\n " )
526+ assert " |-- name:" in tree
527+ assert " |-- age:" in tree
528+ assert " |-- salary:" in tree
529+ assert "(nullable = true)" in tree
530+ assert tree .count (" |-- " ) == 3
531+
532+ def test_treeString_nested_struct (self , spark ):
533+ from spark_namespace .sql .types import IntegerType , StringType , StructField , StructType
534+
535+ schema = StructType (
536+ [
537+ StructField ("id" , IntegerType (), True ),
538+ StructField (
539+ "person" ,
540+ StructType ([StructField ("name" , StringType (), True ), StructField ("age" , IntegerType (), True )]),
541+ True ,
542+ ),
543+ ]
544+ )
545+ data = [(1 , {"name" : "Alice" , "age" : 25 })]
546+ df = spark .createDataFrame (data , schema )
547+ tree = df .schema .treeString ()
548+
549+ assert "root\n " in tree
550+ assert " |-- id:" in tree
551+ assert " |-- person: struct (nullable = true)" in tree
552+ assert "name:" in tree
553+ assert "age:" in tree
554+
555+ def test_treeString_with_level (self , spark ):
556+ from spark_namespace .sql .types import IntegerType , StringType , StructField , StructType
557+
558+ schema = StructType (
559+ [
560+ StructField ("id" , IntegerType (), True ),
561+ StructField (
562+ "person" ,
563+ StructType (
564+ [
565+ StructField ("name" , StringType (), True ),
566+ StructField ("details" , StructType ([StructField ("address" , StringType (), True )]), True ),
567+ ]
568+ ),
569+ True ,
570+ ),
571+ ]
572+ )
573+
574+ data = [(1 , {"name" : "Alice" , "details" : {"address" : "123 Main St" }})]
575+ df = spark .createDataFrame (data , schema )
576+
577+ # Level 1 should only show top-level fields
578+ tree_level_1 = df .schema .treeString (level = 1 )
579+ assert " |-- id:" in tree_level_1
580+ assert " |-- person: struct" in tree_level_1
581+ # Should not show nested field names at level 1
582+ lines = tree_level_1 .split ("\n " )
583+ assert len ([line for line in lines if line .strip ()]) <= 3
584+
585+ def test_treeString_array_type (self , spark ):
586+ from spark_namespace .sql .types import ArrayType , StringType , StructField , StructType
587+
588+ schema = StructType (
589+ [StructField ("name" , StringType (), True ), StructField ("hobbies" , ArrayType (StringType ()), True )]
590+ )
591+
592+ data = [("Alice" , ["reading" , "coding" ])]
593+ df = spark .createDataFrame (data , schema )
594+ tree = df .schema .treeString ()
595+
596+ assert "root\n " in tree
597+ assert " |-- name:" in tree
598+ assert " |-- hobbies: array<" in tree
599+ assert "(nullable = true)" in tree
0 commit comments