Skip to content

Commit 320e649

Browse files
committed
Test fixes
1 parent b2f56ea commit 320e649

3 files changed

Lines changed: 48 additions & 37 deletions

File tree

src/stdlib/__init__.py

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
all = ["csv", "re"]

src/stdlib/csv/_csv.py

Lines changed: 31 additions & 28 deletions
Original file line numberDiff line numberDiff line change
@@ -5,17 +5,7 @@
55
This module provides a CSV parser and writer.
66
"""
77

8-
from typing import (
9-
Any,
10-
Dict,
11-
Iterable,
12-
List,
13-
Optional,
14-
Union,
15-
TextIO,
16-
Sequence,
17-
Type,
18-
)
8+
from typing import Any, Dict, Iterable, List, Optional, Sequence, TextIO, Union
199

2010
# Quoting styles
2111
QUOTE_MINIMAL = 0
@@ -337,20 +327,30 @@ def sniff(
337327
# For now, use a heuristic: consistent number of fields
338328
first_line_fields = -1
339329
line_consistency = 0
330+
total_delim_count = 0
340331
for i, line in enumerate(
341332
lines[:10]
342333
): # Check consistency over more lines
343334
# A very simple split, doesn't respect quoting for now for sniffing delimiter
344335
fields = line.split(delim_char)
336+
total_delim_count += line.count(delim_char)
345337
if i == 0:
346338
first_line_fields = len(fields)
347-
if first_line_fields > 0:
339+
if (
340+
first_line_fields > 1
341+
): # Need at least 2 fields to be meaningful
348342
line_consistency += 1
349343
elif len(fields) == first_line_fields:
350344
line_consistency += 1
351345

352-
if first_line_fields > 0 and line_consistency > max_consistency:
353-
max_consistency = line_consistency
346+
# Score based on consistency and delimiter frequency
347+
score = line_consistency * 10 + total_delim_count
348+
if (
349+
first_line_fields > 1
350+
and score > max_consistency
351+
and total_delim_count > 0
352+
):
353+
max_consistency = score
354354
best_dialect_params = potential_dialect_params
355355
best_dialect_params.setdefault("quotechar", '"') # Ensure a default
356356
best_dialect_params.setdefault("doublequote", True)
@@ -367,7 +367,7 @@ def sniff(
367367
except Exception: # Broad exception if parsing attempt fails
368368
continue
369369

370-
if not best_dialect_params:
370+
if not best_dialect_params or max_consistency <= 0:
371371
raise Error("Could not determine delimiter")
372372

373373
# Create a Dialect instance. Sniffer in CPython returns a dialect *class*,
@@ -470,6 +470,7 @@ def reader(
470470
quotechar = d.quotechar
471471
quoting = d.quoting
472472
skipinitialspace = d.skipinitialspace
473+
lineterminator = d.lineterminator
473474
# strict = d.strict # TODO: Use strict mode
474475

475476
if not csvfile:
@@ -488,8 +489,8 @@ def reader(
488489
raise Error(f"field larger than field limit ({_field_size_limit})")
489490

490491
row_str = row_str_orig.rstrip(
491-
"\r\n"
492-
) # Reader should not depend on lineterminator from dialect
492+
lineterminator
493+
) # Reader should use dialect's lineterminator
493494

494495
fields: List[str] = []
495496
current_field: str = ""
@@ -579,9 +580,7 @@ def reader(
579580
pass
580581
else:
581582
if d.strict:
582-
raise Error(
583-
f"'{delimiter}' expected after '{quotechar}' at char {idx}, found '{char}'"
584-
)
583+
raise Error(f"delimiter expected after '{quotechar}'")
585584
# If not strict, CPython CSV often appends this char to the field or starts a new unquoted field.
586585
# This behavior is complex. For simplicity, we'll be strict or error-prone here.
587586
# Let's assume for now it's an error if strict, or append to field if not (though might be wrong for some cases)
@@ -604,7 +603,7 @@ def reader(
604603
if d.strict or not (
605604
escapechar and row_str.endswith(escapechar)
606605
): # CPython behavior for unclosed quote
607-
raise Error("unexpected end of data - unclosed quote")
606+
raise Error("unclosed quote")
608607
if state == ESCAPE:
609608
raise Error("unexpected end of data - incomplete escape sequence")
610609

@@ -670,7 +669,10 @@ def writerow(self, row: _Row) -> None:
670669
elif quoting == QUOTE_NONNUMERIC:
671670
if quotechar is None:
672671
raise Error("quotechar must be set for QUOTE_NONNUMERIC")
673-
if not isinstance(field_obj, (int, float)):
672+
# Check for boolean first since isinstance(bool, int) is True
673+
if isinstance(field_obj, bool) or not isinstance(
674+
field_obj, (int, float)
675+
):
674676
needs_quoting = True
675677
else:
676678
if quotechar and (
@@ -702,20 +704,21 @@ def writerow(self, row: _Row) -> None:
702704
continue
703705

704706
if needs_quoting and quotechar:
705-
escaped_field = ""
707+
escaped_field = field_str # Start with the original field
706708
if doublequote:
707-
escaped_field = field_str.replace(quotechar, quotechar * 2)
709+
escaped_field = escaped_field.replace(quotechar, quotechar * 2)
708710
elif escapechar:
709-
escaped_field = field_str.replace(escapechar, escapechar * 2)
711+
escaped_field = escaped_field.replace(escapechar, escapechar * 2)
710712
escaped_field = escaped_field.replace(
711713
quotechar, escapechar + quotechar
712714
)
713715
else:
714716
# This case means quotechar is in field, needs_quoting is true,
715717
# but no mechanism (doublequote=F, escapechar=None) to escape it.
716-
raise Error(
717-
"quotechar found in field, but no escape mechanism (doublequote=False, escapechar=None)"
718-
)
718+
if quotechar in field_str:
719+
raise Error(
720+
"quotechar found in field, but no escape mechanism (doublequote=False, escapechar=None)"
721+
)
719722

720723
processed_fields.append(quotechar + escaped_field + quotechar)
721724
else:

tests/test_csv.py

Lines changed: 16 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
import io
2-
import pytest
3-
import sys
42
import os
3+
import sys
4+
5+
import pytest
6+
57
from stdlib import csv
68

79
# Add src directory to PYTHONPATH to allow direct import of stdlib
@@ -230,7 +232,8 @@ def test_embedded_newlines_in_quoted_fields(self):
230232
# Let's assume strict=True for this test.
231233
with pytest.raises(csv.Error, match="unclosed quote"):
232234
list(csv.reader(io.StringIO('a,"b\nc",d'), strict=True))
233-
# If not strict, it might yield `[['a', 'b']]` or `[['a', '"b']]` for `a,"b\n`. The current reader's unclosed quote error isn't bypassed by non-strict mode.
235+
# If not strict, it might yield `[['a', 'b']]` or `[['a', '"b']]` for `a,"b\n`.
236+
# The current reader's unclosed quote error isn't bypassed by non-strict mode.
234237

235238
def test_empty_lines_and_whitespace_lines(self):
236239
data = "\r\n \r\nval1,val2\r\n\r\n" # Empty line, whitespace line, data, empty line
@@ -283,6 +286,7 @@ def test_reader_error_unclosed_quote(self):
283286
with pytest.raises(csv.Error, match="unclosed quote"):
284287
list(csv.reader(sio)) # Test with default strictness
285288

289+
sio.seek(0) # Reset position for second test
286290
with pytest.raises(csv.Error, match="unclosed quote"):
287291
list(csv.reader(sio, strict=True))
288292

@@ -297,13 +301,16 @@ def test_reader_error_unexpected_chars_after_quotes_strict(self):
297301
# So it always raises an error, but message might differ or behavior could be refined for non-strict.
298302
# For now, let's assume strict=True in the dialect for this test.
299303
with pytest.raises(
300-
csv.Error, match="'b' found after quoted field"
301-
): # Or similar, based on exact error msg
304+
csv.Error, match="delimiter expected after"
305+
): # Our error message pattern
302306
list(csv.reader(sio, strict=True))
303307

304308
# Test default strictness (False) - still expect error from current code
309+
sio2 = io.StringIO(
310+
data
311+
) # Need a fresh StringIO since the first one was consumed
305312
with pytest.raises(csv.Error, match="malformed CSV row"):
306-
list(csv.reader(sio))
313+
list(csv.reader(sio2))
307314

308315
def test_field_size_limit_reader(self):
309316
original_limit = csv.field_size_limit()
@@ -372,7 +379,7 @@ def test_writerows(self):
372379
# Assuming the goal is to fix the E501 on the line that was *originally* here at 322.
373380
# The current `read_files` shows the problematic line.
374381
# Shortened comment. Note: This assertion itself is debated in the test.
375-
assert sio.getvalue() == 'a,b\r\n1,2\r\n"x",""\r\n'
382+
assert sio.getvalue() == "a,b\r\n1,2\r\nx,\r\n"
376383
# Correction for writerows output:
377384
# If x is simple string, and "" is empty string due to None:
378385
# 'a,b\r\n1,2\r\nx,\r\n' (If empty string doesn't get quoted by default)
@@ -431,7 +438,7 @@ def test_quoting_nonnumeric_writer(self):
431438
sio3 = io.StringIO() # Numeric that contains delimiter
432439
w3 = csv.writer(sio3, quoting=csv.QUOTE_NONNUMERIC, delimiter=".")
433440
w3.writerow([1, 2.3]) # 2.3 -> "2.3" which contains '.', so it will be quoted
434-
assert sio3.getvalue() == '1,"2.3"\r\n'
441+
assert sio3.getvalue() == '1."2.3"\r\n'
435442

436443
def test_quoting_none_writer_with_escapechar(self):
437444
sio = io.StringIO()
@@ -572,7 +579,7 @@ def test_dialect_properties_validation(self):
572579
):
573580
csv.Dialect(delimiter="long")
574581
with pytest.raises(TypeError, match="doublequote must be a boolean"):
575-
csv.Dialect(doublequote=True) # Changed "true" to True
582+
csv.Dialect(doublequote="true") # Invalid type - should be boolean
576583
# ... other validation checks in Dialect.__init__ can be tested similarly
577584

578585
def test_predefined_dialects_exist(self):

0 commit comments

Comments
 (0)