Test fixes

adsharma · adsharma · commit 320e6493e781 · 2025-05-29T10:06:54.000-07:00
diff --git a/src/stdlib/__init__.py b/src/stdlib/__init__.py
@@ -0,0 +1 @@
+all = ["csv", "re"]
diff --git a/src/stdlib/csv/_csv.py b/src/stdlib/csv/_csv.py
@@ -5,17 +5,7 @@
 This module provides a CSV parser and writer.
 """
 
-from typing import (
-    Any,
-    Dict,
-    Iterable,
-    List,
-    Optional,
-    Union,
-    TextIO,
-    Sequence,
-    Type,
-)
+from typing import Any, Dict, Iterable, List, Optional, Sequence, TextIO, Union
 
 # Quoting styles
 QUOTE_MINIMAL = 0
@@ -337,20 +327,30 @@ def sniff(
                 # For now, use a heuristic: consistent number of fields
                 first_line_fields = -1
                 line_consistency = 0
+                total_delim_count = 0
                 for i, line in enumerate(
                     lines[:10]
                 ):  # Check consistency over more lines
                     # A very simple split, doesn't respect quoting for now for sniffing delimiter
                     fields = line.split(delim_char)
+                    total_delim_count += line.count(delim_char)
                     if i == 0:
                         first_line_fields = len(fields)
-                        if first_line_fields > 0:
+                        if (
+                            first_line_fields > 1
+                        ):  # Need at least 2 fields to be meaningful
                             line_consistency += 1
                     elif len(fields) == first_line_fields:
                         line_consistency += 1
 
-                if first_line_fields > 0 and line_consistency > max_consistency:
-                    max_consistency = line_consistency
+                # Score based on consistency and delimiter frequency
+                score = line_consistency * 10 + total_delim_count
+                if (
+                    first_line_fields > 1
+                    and score > max_consistency
+                    and total_delim_count > 0
+                ):
+                    max_consistency = score
                     best_dialect_params = potential_dialect_params
                     best_dialect_params.setdefault("quotechar", '"')  # Ensure a default
                     best_dialect_params.setdefault("doublequote", True)
@@ -367,7 +367,7 @@ def sniff(
             except Exception:  # Broad exception if parsing attempt fails
                 continue
 
-        if not best_dialect_params:
+        if not best_dialect_params or max_consistency <= 0:
             raise Error("Could not determine delimiter")
 
         # Create a Dialect instance. Sniffer in CPython returns a dialect *class*,
@@ -470,6 +470,7 @@ def reader(
     quotechar = d.quotechar
     quoting = d.quoting
     skipinitialspace = d.skipinitialspace
+    lineterminator = d.lineterminator
     # strict = d.strict # TODO: Use strict mode
 
     if not csvfile:
@@ -488,8 +489,8 @@ def reader(
             raise Error(f"field larger than field limit ({_field_size_limit})")
 
         row_str = row_str_orig.rstrip(
-            "\r\n"
-        )  # Reader should not depend on lineterminator from dialect
+            lineterminator
+        )  # Reader should use dialect's lineterminator
 
         fields: List[str] = []
         current_field: str = ""
@@ -579,9 +580,7 @@ def reader(
                     pass
                 else:
                     if d.strict:
-                        raise Error(
-                            f"'{delimiter}' expected after '{quotechar}' at char {idx}, found '{char}'"
-                        )
+                        raise Error(f"delimiter expected after '{quotechar}'")
                     # If not strict, CPython CSV often appends this char to the field or starts a new unquoted field.
                     # This behavior is complex. For simplicity, we'll be strict or error-prone here.
                     # Let's assume for now it's an error if strict, or append to field if not (though might be wrong for some cases)
@@ -604,7 +603,7 @@ def reader(
             if d.strict or not (
                 escapechar and row_str.endswith(escapechar)
             ):  # CPython behavior for unclosed quote
-                raise Error("unexpected end of data - unclosed quote")
+                raise Error("unclosed quote")
         if state == ESCAPE:
             raise Error("unexpected end of data - incomplete escape sequence")
 
@@ -670,7 +669,10 @@ def writerow(self, row: _Row) -> None:
             elif quoting == QUOTE_NONNUMERIC:
                 if quotechar is None:
                     raise Error("quotechar must be set for QUOTE_NONNUMERIC")
-                if not isinstance(field_obj, (int, float)):
+                # Check for boolean first since isinstance(bool, int) is True
+                if isinstance(field_obj, bool) or not isinstance(
+                    field_obj, (int, float)
+                ):
                     needs_quoting = True
                 else:
                     if quotechar and (
@@ -702,20 +704,21 @@ def writerow(self, row: _Row) -> None:
                     continue
 
             if needs_quoting and quotechar:
-                escaped_field = ""
+                escaped_field = field_str  # Start with the original field
                 if doublequote:
-                    escaped_field = field_str.replace(quotechar, quotechar * 2)
+                    escaped_field = escaped_field.replace(quotechar, quotechar * 2)
                 elif escapechar:
-                    escaped_field = field_str.replace(escapechar, escapechar * 2)
+                    escaped_field = escaped_field.replace(escapechar, escapechar * 2)
                     escaped_field = escaped_field.replace(
                         quotechar, escapechar + quotechar
                     )
                 else:
                     # This case means quotechar is in field, needs_quoting is true,
                     # but no mechanism (doublequote=F, escapechar=None) to escape it.
-                    raise Error(
-                        "quotechar found in field, but no escape mechanism (doublequote=False, escapechar=None)"
-                    )
+                    if quotechar in field_str:
+                        raise Error(
+                            "quotechar found in field, but no escape mechanism (doublequote=False, escapechar=None)"
+                        )
 
                 processed_fields.append(quotechar + escaped_field + quotechar)
             else:
diff --git a/tests/test_csv.py b/tests/test_csv.py
@@ -1,7 +1,9 @@
 import io
-import pytest
-import sys
 import os
+import sys
+
+import pytest
+
 from stdlib import csv
 
 # Add src directory to PYTHONPATH to allow direct import of stdlib
@@ -230,7 +232,8 @@ def test_embedded_newlines_in_quoted_fields(self):
         # Let's assume strict=True for this test.
         with pytest.raises(csv.Error, match="unclosed quote"):
             list(csv.reader(io.StringIO('a,"b\nc",d'), strict=True))
-        # If not strict, it might yield `[['a', 'b']]` or `[['a', '"b']]` for `a,"b\n`. The current reader's unclosed quote error isn't bypassed by non-strict mode.
+        # If not strict, it might yield `[['a', 'b']]` or `[['a', '"b']]` for `a,"b\n`.
+        # The current reader's unclosed quote error isn't bypassed by non-strict mode.
 
     def test_empty_lines_and_whitespace_lines(self):
         data = "\r\n  \r\nval1,val2\r\n\r\n"  # Empty line, whitespace line, data, empty line
@@ -283,6 +286,7 @@ def test_reader_error_unclosed_quote(self):
         with pytest.raises(csv.Error, match="unclosed quote"):
             list(csv.reader(sio))  # Test with default strictness
 
+        sio.seek(0)  # Reset position for second test
         with pytest.raises(csv.Error, match="unclosed quote"):
             list(csv.reader(sio, strict=True))
 
@@ -297,13 +301,16 @@ def test_reader_error_unexpected_chars_after_quotes_strict(self):
         # So it always raises an error, but message might differ or behavior could be refined for non-strict.
         # For now, let's assume strict=True in the dialect for this test.
         with pytest.raises(
-            csv.Error, match="'b' found after quoted field"
-        ):  # Or similar, based on exact error msg
+            csv.Error, match="delimiter expected after"
+        ):  # Our error message pattern
             list(csv.reader(sio, strict=True))
 
         # Test default strictness (False) - still expect error from current code
+        sio2 = io.StringIO(
+            data
+        )  # Need a fresh StringIO since the first one was consumed
         with pytest.raises(csv.Error, match="malformed CSV row"):
-            list(csv.reader(sio))
+            list(csv.reader(sio2))
 
     def test_field_size_limit_reader(self):
         original_limit = csv.field_size_limit()
@@ -372,7 +379,7 @@ def test_writerows(self):
         # Assuming the goal is to fix the E501 on the line that was *originally* here at 322.
         # The current `read_files` shows the problematic line.
         # Shortened comment. Note: This assertion itself is debated in the test.
-        assert sio.getvalue() == 'a,b\r\n1,2\r\n"x",""\r\n'
+        assert sio.getvalue() == "a,b\r\n1,2\r\nx,\r\n"
         # Correction for writerows output:
         # If x is simple string, and "" is empty string due to None:
         # 'a,b\r\n1,2\r\nx,\r\n' (If empty string doesn't get quoted by default)
@@ -431,7 +438,7 @@ def test_quoting_nonnumeric_writer(self):
         sio3 = io.StringIO()  # Numeric that contains delimiter
         w3 = csv.writer(sio3, quoting=csv.QUOTE_NONNUMERIC, delimiter=".")
         w3.writerow([1, 2.3])  # 2.3 -> "2.3" which contains '.', so it will be quoted
-        assert sio3.getvalue() == '1,"2.3"\r\n'
+        assert sio3.getvalue() == '1."2.3"\r\n'
 
     def test_quoting_none_writer_with_escapechar(self):
         sio = io.StringIO()
@@ -572,7 +579,7 @@ def test_dialect_properties_validation(self):
         ):
             csv.Dialect(delimiter="long")
         with pytest.raises(TypeError, match="doublequote must be a boolean"):
-            csv.Dialect(doublequote=True)  # Changed "true" to True
+            csv.Dialect(doublequote="true")  # Invalid type - should be boolean
         # ... other validation checks in Dialect.__init__ can be tested similarly
 
     def test_predefined_dialects_exist(self):