Skip to content
Open
34 changes: 24 additions & 10 deletions mssql_python/connection.py
Original file line number Diff line number Diff line change
Expand Up @@ -261,10 +261,14 @@ def __init__(
}

# Initialize decoding settings with Python 3 defaults
# SQL_CHAR default uses SQL_WCHAR ctype so the ODBC driver returns
# UTF-16 data for VARCHAR columns. This avoids encoding mismatches on
# Windows where the driver returns raw bytes in the server's native
# code page (e.g. CP-1252) that may fail to decode as UTF-8.
self._decoding_settings = {
ConstantsDDBC.SQL_CHAR.value: {
"encoding": "utf-8",
"ctype": ConstantsDDBC.SQL_CHAR.value,
"encoding": "utf-16le",
Comment thread
subrata-ms marked this conversation as resolved.
"ctype": ConstantsDDBC.SQL_WCHAR.value,
},
Comment thread
subrata-ms marked this conversation as resolved.
ConstantsDDBC.SQL_WCHAR.value: {
"encoding": "utf-16le",
Expand Down Expand Up @@ -643,9 +647,13 @@ def setdecoding(
sqltype (int): The SQL type being configured: SQL_CHAR, SQL_WCHAR, or SQL_WMETADATA.
SQL_WMETADATA is a special flag for configuring column name decoding.
encoding (str, optional): The Python encoding to use when decoding the data.
If None, uses default encoding based on sqltype.
If None, defaults to ``'utf-16le'`` for all sqltypes (SQL_CHAR,
SQL_WCHAR, and SQL_WMETADATA), matching the connection-level
defaults set in ``Connection.__init__``. Passing ``encoding=None``
therefore resets the sqltype to its initial default.
ctype (int, optional): The C data type to request from SQLGetData:
SQL_CHAR or SQL_WCHAR. If None, uses default based on encoding.
SQL_CHAR or SQL_WCHAR. If None, uses default based on encoding
(SQL_WCHAR for UTF-16 variants, SQL_CHAR otherwise).

Returns:
None
Expand All @@ -655,7 +663,10 @@ def setdecoding(
InterfaceError: If the connection is closed.

Example:
# Configure SQL_CHAR to use UTF-8 decoding
# Reset SQL_CHAR to the connection default (utf-16le + SQL_WCHAR ctype)
cnxn.setdecoding(mssql_python.SQL_CHAR)

# Configure SQL_CHAR to use UTF-8 decoding (opt-in, non-default)
cnxn.setdecoding(mssql_python.SQL_CHAR, encoding='utf-8')

# Configure column metadata decoding
Expand Down Expand Up @@ -691,12 +702,15 @@ def setdecoding(
),
)

# Set default encoding based on sqltype if not provided
# Set default encoding based on sqltype if not provided.
# All sqltypes default to UTF-16LE to match Connection.__init__ defaults.
# SQL_CHAR uses utf-16le + SQL_WCHAR ctype so the ODBC driver returns
# UTF-16 data for VARCHAR columns, avoiding encoding mismatches on
# Windows where the driver may otherwise return raw bytes in the
# server's native code page (e.g. CP-1252). This makes
# ``setdecoding(SQL_CHAR)`` with no arguments a true reset-to-defaults.
if encoding is None:
if sqltype == ConstantsDDBC.SQL_CHAR.value:
encoding = "utf-8" # Default for SQL_CHAR in Python 3
else: # SQL_WCHAR or SQL_WMETADATA
encoding = "utf-16le" # Default for SQL_WCHAR in Python 3
encoding = "utf-16le"

# Validate encoding using cached validation for better performance
if not _validate_encoding(encoding):
Expand Down
9 changes: 6 additions & 3 deletions mssql_python/cursor.py
Original file line number Diff line number Diff line change
Expand Up @@ -2462,8 +2462,9 @@ def fetchone(self) -> Union[None, Row]:
ret = ddbc_bindings.DDBCSQLFetchOne(
self.hstmt,
row_data,
char_decoding.get("encoding", "utf-8"),
char_decoding.get("encoding", "utf-16le"),
wchar_decoding.get("encoding", "utf-16le"),
char_decoding.get("ctype", ddbc_sql_const.SQL_WCHAR.value),
)

if self.hstmt:
Expand Down Expand Up @@ -2528,8 +2529,9 @@ def fetchmany(self, size: Optional[int] = None) -> List[Row]:
self.hstmt,
rows_data,
size,
char_decoding.get("encoding", "utf-8"),
char_decoding.get("encoding", "utf-16le"),
wchar_decoding.get("encoding", "utf-16le"),
char_decoding.get("ctype", ddbc_sql_const.SQL_WCHAR.value),
)

if self.hstmt:
Expand Down Expand Up @@ -2586,8 +2588,9 @@ def fetchall(self) -> List[Row]:
ret = ddbc_bindings.DDBCSQLFetchAll(
self.hstmt,
rows_data,
char_decoding.get("encoding", "utf-8"),
char_decoding.get("encoding", "utf-16le"),
wchar_decoding.get("encoding", "utf-16le"),
char_decoding.get("ctype", ddbc_sql_const.SQL_WCHAR.value),
)

# Check for errors
Expand Down
Loading
Loading