Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Fix mapping of PostgreSQL encodings to Python encodings.
authorHeikki Linnakangas <heikki.linnakangas@iki.fi>
Thu, 5 Jul 2012 19:16:29 +0000 (22:16 +0300)
committerHeikki Linnakangas <heikki.linnakangas@iki.fi>
Thu, 5 Jul 2012 19:32:12 +0000 (22:32 +0300)
Windows encodings, "win1252" and so forth, are named differently in Python,
like "cp1252". Also, if the PyUnicode_AsEncodedString() function call fails
for some reason, use a plain ereport(), not a PLy_elog(), to report that
error. That avoids recursion and crash, if PLy_elog() tries to call
PLyUnicode_Bytes() again.

This fixes bug reported by Asif Naeem. Backpatch down to 9.0, before that
plpython didn't even try these conversions.

Jan UrbaƄski, with minor comment improvements by me.

src/pl/plpython/plpython.c

index 19037d1fb9ad56d96e9ccc88a5881ce7e633e0aa..2b2ddeffac2111ead86f5d9a55452548130583fd 100644 (file)
@@ -3690,16 +3690,71 @@ PLyUnicode_Bytes(PyObject *unicode)
    const char *serverenc;
 
    /*
-    * Python understands almost all PostgreSQL encoding names, but it doesn't
-    * know SQL_ASCII.
+    * Map PostgreSQL encoding to a Python encoding name.
     */
-   if (GetDatabaseEncoding() == PG_SQL_ASCII)
-       serverenc = "ascii";
-   else
-       serverenc = GetDatabaseEncodingName();
+   switch (GetDatabaseEncoding())
+   {
+       case PG_SQL_ASCII:
+           /*
+            * Mapping SQL_ASCII to Python's 'ascii' is a bit bogus. Python's
+            * 'ascii' means true 7-bit only ASCII, while PostgreSQL's
+            * SQL_ASCII means that anything is allowed, and the system doesn't
+            * try to interpret the bytes in any way. But not sure what else
+            * to do, and we haven't heard any complaints...
+            */
+           serverenc = "ascii";
+           break;
+       case PG_WIN1250:
+           serverenc = "cp1250";
+           break;
+       case PG_WIN1251:
+           serverenc = "cp1251";
+           break;
+       case PG_WIN1252:
+           serverenc = "cp1252";
+           break;
+       case PG_WIN1253:
+           serverenc = "cp1253";
+           break;
+       case PG_WIN1254:
+           serverenc = "cp1254";
+           break;
+       case PG_WIN1255:
+           serverenc = "cp1255";
+           break;
+       case PG_WIN1256:
+           serverenc = "cp1256";
+           break;
+       case PG_WIN1257:
+           serverenc = "cp1257";
+           break;
+       case PG_WIN1258:
+           serverenc = "cp1258";
+           break;
+       case PG_WIN866:
+           serverenc = "cp866";
+           break;
+       case PG_WIN874:
+           serverenc = "cp874";
+           break;
+       default:
+           /* Other encodings have the same name in Python. */
+           serverenc = GetDatabaseEncodingName();
+           break;
+   }
+
    rv = PyUnicode_AsEncodedString(unicode, serverenc, "strict");
    if (rv == NULL)
-       PLy_elog(ERROR, "could not convert Python Unicode object to PostgreSQL server encoding");
+   {
+       /*
+        * Use a plain ereport instead of PLy_elog to avoid recursion, if
+        * the traceback formatting functions try to do unicode to bytes
+        * conversion again.
+        */
+       ereport(ERROR,
+               (errcode(ERRCODE_INTERNAL_ERROR),
+                errmsg("could not convert Python Unicode object to PostgreSQL server encoding")));
+   }
    return rv;
 }