From e8a92bbf896f9879d1ea33730b9de8b450e23ac4 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 9 Jan 2019 12:17:34 +0100 Subject: [PATCH 1/3] bpo-35697, decimal: Fix locale formatting The decimal module now supports formatting number to the "n" type when the LC_NUMERIC locale uses a different encoding than the LC_CTYPE locale. It now sets temporarily the LC_CTYPE locale to the LC_NUMERIC locale to decode decimal_point and thousands_sep byte strings if they are non-ASCII or longer than 1 byte, and the LC_NUMERIC locale is different than the LC_CTYPE locale. This temporary change affects other threads. Fix also #define guard of pycore_fileutils.h: allow also Py_BUILD_CORE_BUILTIN. --- Include/internal/pycore_fileutils.h | 4 +- .../2019-01-09-12-35-57.bpo-35697._TAUNc.rst | 7 ++ Modules/_decimal/_decimal.c | 72 ++++++++----------- Modules/_decimal/libmpdec/io.c | 2 + Modules/_decimal/libmpdec/mpdecimal.h | 1 + setup.py | 2 +- 6 files changed, 42 insertions(+), 46 deletions(-) create mode 100644 Misc/NEWS.d/next/Library/2019-01-09-12-35-57.bpo-35697._TAUNc.rst diff --git a/Include/internal/pycore_fileutils.h b/Include/internal/pycore_fileutils.h index 25006653a58960..66b86f82861f8e 100644 --- a/Include/internal/pycore_fileutils.h +++ b/Include/internal/pycore_fileutils.h @@ -4,8 +4,8 @@ extern "C" { #endif -#ifndef Py_BUILD_CORE -# error "Py_BUILD_CORE must be defined to include this header" +#if !defined(Py_BUILD_CORE) && !defined(Py_BUILD_CORE_BUILTIN) +# error "this header requires Py_BUILD_CORE or Py_BUILD_CORE_BUILTIN defined" #endif #include /* struct lconv */ diff --git a/Misc/NEWS.d/next/Library/2019-01-09-12-35-57.bpo-35697._TAUNc.rst b/Misc/NEWS.d/next/Library/2019-01-09-12-35-57.bpo-35697._TAUNc.rst new file mode 100644 index 00000000000000..c2970c88e9b1ad --- /dev/null +++ b/Misc/NEWS.d/next/Library/2019-01-09-12-35-57.bpo-35697._TAUNc.rst @@ -0,0 +1,7 @@ +The :mod:`decimal` module now supports formatting number to the "n" type +when the ``LC_NUMERIC`` locale uses a different encoding than the +``LC_CTYPE`` locale. It now sets temporarily the ``LC_CTYPE`` locale to the +``LC_NUMERIC`` locale to decode ``decimal_point`` and ``thousands_sep`` byte +strings if they are non-ASCII or longer than 1 byte, and the ``LC_NUMERIC`` +locale is different than the ``LC_CTYPE`` locale. This temporary change +affects other threads. diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index 51aed2c67dc65e..db7f9bfaf5101a 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -32,6 +32,7 @@ #include "structmember.h" #include "complexobject.h" #include "mpdecimal.h" +#include "pycore_fileutils.h" #include @@ -3013,32 +3014,6 @@ dec_replace_fillchar(char *dest) } } -/* Convert decimal_point or thousands_sep, which may be multibyte or in - the range [128, 255], to a UTF8 string. */ -static PyObject * -dotsep_as_utf8(const char *s) -{ - PyObject *utf8; - PyObject *tmp; - wchar_t buf[2]; - size_t n; - - n = mbstowcs(buf, s, 2); - if (n != 1) { /* Issue #7442 */ - PyErr_SetString(PyExc_ValueError, - "invalid decimal point or unsupported " - "combination of LC_CTYPE and LC_NUMERIC"); - return NULL; - } - tmp = PyUnicode_FromWideChar(buf, n); - if (tmp == NULL) { - return NULL; - } - utf8 = PyUnicode_AsUTF8String(tmp); - Py_DECREF(tmp); - return utf8; -} - /* Formatted representation of a PyDecObject. */ static PyObject * dec_format(PyObject *dec, PyObject *args) @@ -3048,6 +3023,7 @@ dec_format(PyObject *dec, PyObject *args) PyObject *dot = NULL; PyObject *sep = NULL; PyObject *grouping = NULL; + char *grouping_buffer = NULL; PyObject *fmtarg; PyObject *context; mpd_spec_t spec; @@ -3133,24 +3109,33 @@ dec_format(PyObject *dec, PyObject *args) goto finish; } } - else { - size_t n = strlen(spec.dot); - if (n > 1 || (n == 1 && !isascii((uchar)spec.dot[0]))) { - /* fix locale dependent non-ascii characters */ - dot = dotsep_as_utf8(spec.dot); - if (dot == NULL) { - goto finish; - } - spec.dot = PyBytes_AS_STRING(dot); + else if (spec.locale) { + struct lconv *lc = localeconv(); + if (_Py_GetLocaleconvNumeric(lc, &dot, &sep) < 0) { + goto finish; } - n = strlen(spec.sep); - if (n > 1 || (n == 1 && !isascii((uchar)spec.sep[0]))) { - /* fix locale dependent non-ascii characters */ - sep = dotsep_as_utf8(spec.sep); - if (sep == NULL) { - goto finish; - } - spec.sep = PyBytes_AS_STRING(sep); + + grouping_buffer = _PyMem_Strdup(lc->grouping); + if (grouping_buffer == NULL) { + PyErr_NoMemory(); + goto finish; + } + spec.grouping = grouping_buffer; + + spec.dot = PyUnicode_AsUTF8(dot); + if (spec.dot == NULL) { + goto finish; + } + + spec.sep = PyUnicode_AsUTF8(sep); + if (spec.sep == NULL) { + goto finish; + } + + if (mpd_validate_lconv(&spec) < 0) { + PyErr_SetString(PyExc_ValueError, + "invalid localeconv()"); + goto finish; } } @@ -3176,6 +3161,7 @@ dec_format(PyObject *dec, PyObject *args) finish: Py_XDECREF(grouping); + PyMem_Free(grouping_buffer); Py_XDECREF(sep); Py_XDECREF(dot); if (replace_fillchar) PyMem_Free(fmt); diff --git a/Modules/_decimal/libmpdec/io.c b/Modules/_decimal/libmpdec/io.c index f45e558f1a9573..02bee5b771a239 100644 --- a/Modules/_decimal/libmpdec/io.c +++ b/Modules/_decimal/libmpdec/io.c @@ -784,6 +784,7 @@ mpd_parse_fmt_str(mpd_spec_t *spec, const char *fmt, int caps) spec->dot = ""; spec->sep = ""; spec->grouping = ""; + spec->locale = 0; /* presume that the first character is a UTF-8 fill character */ @@ -871,6 +872,7 @@ mpd_parse_fmt_str(mpd_spec_t *spec, const char *fmt, int caps) if (*spec->sep) { return 0; } + spec->locale = 1; spec->type = *cp++; spec->type = (spec->type == 'N') ? 'G' : 'g'; lc = localeconv(); diff --git a/Modules/_decimal/libmpdec/mpdecimal.h b/Modules/_decimal/libmpdec/mpdecimal.h index a67dd9bc126c27..a4ac20b710afc5 100644 --- a/Modules/_decimal/libmpdec/mpdecimal.h +++ b/Modules/_decimal/libmpdec/mpdecimal.h @@ -397,6 +397,7 @@ typedef struct mpd_spec_t { const char *dot; /* decimal point */ const char *sep; /* thousands separator */ const char *grouping; /* grouping of digits */ + int locale; /* use localeconv() */ } mpd_spec_t; /* output to a string */ diff --git a/setup.py b/setup.py index 44a563bce45906..1911b77b742742 100644 --- a/setup.py +++ b/setup.py @@ -2002,7 +2002,7 @@ def detect_ctypes(self, inc_dirs, lib_dirs): ext.libraries.append('dl') def _decimal_ext(self): - extra_compile_args = [] + extra_compile_args = ['-DPy_BUILD_CORE_BUILTIN'] undef_macros = [] if '--with-system-libmpdec' in sysconfig.get_config_var("CONFIG_ARGS"): include_dirs = [] From cdd4a68d2f628aece8124d11892eda666a5eeb76 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Wed, 9 Jan 2019 12:44:24 +0100 Subject: [PATCH 2/3] Include explicitly --- Modules/_decimal/_decimal.c | 1 + 1 file changed, 1 insertion(+) diff --git a/Modules/_decimal/_decimal.c b/Modules/_decimal/_decimal.c index db7f9bfaf5101a..52e072a22aa945 100644 --- a/Modules/_decimal/_decimal.c +++ b/Modules/_decimal/_decimal.c @@ -34,6 +34,7 @@ #include "mpdecimal.h" #include "pycore_fileutils.h" +#include #include #include "docstrings.h" From 150696f929aed30cb94b758c47ce1ed1b9681d95 Mon Sep 17 00:00:00 2001 From: Victor Stinner Date: Thu, 10 Jan 2019 09:02:54 +0100 Subject: [PATCH 3/3] _decimal.vcxproj: add Py_BUILD_CORE_BUILTIN define --- PCbuild/_decimal.vcxproj | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/PCbuild/_decimal.vcxproj b/PCbuild/_decimal.vcxproj index df9f600cdafe7b..4ef7b9478de9d8 100644 --- a/PCbuild/_decimal.vcxproj +++ b/PCbuild/_decimal.vcxproj @@ -61,7 +61,7 @@ - _CRT_SECURE_NO_WARNINGS;MASM;%(PreprocessorDefinitions) + _CRT_SECURE_NO_WARNINGS;MASM;Py_BUILD_CORE_BUILTIN;%(PreprocessorDefinitions) CONFIG_32;PPRO;%(PreprocessorDefinitions) CONFIG_64;%(PreprocessorDefinitions) ..\Modules\_decimal;..\Modules\_decimal\libmpdec;%(AdditionalIncludeDirectories)