|
8 | 8 | *
|
9 | 9 | *
|
10 | 10 | * IDENTIFICATION
|
11 |
| - * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.283 2007/04/27 22:05:46 tgl Exp $ |
| 11 | + * $PostgreSQL: pgsql/src/backend/commands/copy.c,v 1.284 2007/06/17 23:39:28 tgl Exp $ |
12 | 12 | *
|
13 | 13 | *-------------------------------------------------------------------------
|
14 | 14 | */
|
@@ -3075,68 +3075,88 @@ CopyAttributeOutText(CopyState cstate, char *string)
|
3075 | 3075 | * We have to grovel through the string searching for control characters
|
3076 | 3076 | * and instances of the delimiter character. In most cases, though, these
|
3077 | 3077 | * are infrequent. To avoid overhead from calling CopySendData once per
|
3078 |
| - * character, we dump out all characters between replaceable characters in |
| 3078 | + * character, we dump out all characters between escaped characters in |
3079 | 3079 | * a single call. The loop invariant is that the data from "start" to
|
3080 | 3080 | * "ptr" can be sent literally, but hasn't yet been.
|
| 3081 | + * |
| 3082 | + * We can skip pg_encoding_mblen() overhead when encoding is safe, because |
| 3083 | + * in valid backend encodings, extra bytes of a multibyte character never |
| 3084 | + * look like ASCII. This loop is sufficiently performance-critical that |
| 3085 | + * it's worth making two copies of it to get the IS_HIGHBIT_SET() test |
| 3086 | + * out of the normal safe-encoding path. |
3081 | 3087 | */
|
3082 |
| - start = ptr; |
3083 |
| - while ((c = *ptr) != '\0') |
| 3088 | + if (cstate->encoding_embeds_ascii) |
3084 | 3089 | {
|
3085 |
| - switch (c) |
| 3090 | + start = ptr; |
| 3091 | + while ((c = *ptr) != '\0') |
3086 | 3092 | {
|
3087 |
| - case '\b': |
3088 |
| - DUMPSOFAR(); |
3089 |
| - CopySendString(cstate, "\\b"); |
3090 |
| - start = ++ptr; |
3091 |
| - break; |
3092 |
| - case '\f': |
3093 |
| - DUMPSOFAR(); |
3094 |
| - CopySendString(cstate, "\\f"); |
3095 |
| - start = ++ptr; |
3096 |
| - break; |
3097 |
| - case '\n': |
3098 |
| - DUMPSOFAR(); |
3099 |
| - CopySendString(cstate, "\\n"); |
3100 |
| - start = ++ptr; |
3101 |
| - break; |
3102 |
| - case '\r': |
3103 |
| - DUMPSOFAR(); |
3104 |
| - CopySendString(cstate, "\\r"); |
3105 |
| - start = ++ptr; |
3106 |
| - break; |
3107 |
| - case '\t': |
3108 |
| - DUMPSOFAR(); |
3109 |
| - CopySendString(cstate, "\\t"); |
3110 |
| - start = ++ptr; |
3111 |
| - break; |
3112 |
| - case '\v': |
| 3093 | + if (c == '\\' || c == delimc) |
| 3094 | + { |
3113 | 3095 | DUMPSOFAR();
|
3114 |
| - CopySendString(cstate, "\\v"); |
3115 |
| - start = ++ptr; |
3116 |
| - break; |
3117 |
| - case '\\': |
| 3096 | + CopySendChar(cstate, '\\'); |
| 3097 | + start = ptr++; /* we include char in next run */ |
| 3098 | + } |
| 3099 | + else if ((unsigned char) c < (unsigned char) 0x20) |
| 3100 | + { |
| 3101 | + switch (c) |
| 3102 | + { |
| 3103 | + /* \r and \n must be escaped, the others are traditional */ |
| 3104 | + case '\b': |
| 3105 | + case '\f': |
| 3106 | + case '\n': |
| 3107 | + case '\r': |
| 3108 | + case '\t': |
| 3109 | + case '\v': |
| 3110 | + DUMPSOFAR(); |
| 3111 | + CopySendChar(cstate, '\\'); |
| 3112 | + start = ptr++; /* we include char in next run */ |
| 3113 | + break; |
| 3114 | + default: |
| 3115 | + /* All ASCII control chars are length 1 */ |
| 3116 | + ptr++; |
| 3117 | + break; |
| 3118 | + } |
| 3119 | + } |
| 3120 | + else if (IS_HIGHBIT_SET(c)) |
| 3121 | + ptr += pg_encoding_mblen(cstate->client_encoding, ptr); |
| 3122 | + else |
| 3123 | + ptr++; |
| 3124 | + } |
| 3125 | + } |
| 3126 | + else |
| 3127 | + { |
| 3128 | + start = ptr; |
| 3129 | + while ((c = *ptr) != '\0') |
| 3130 | + { |
| 3131 | + if (c == '\\' || c == delimc) |
| 3132 | + { |
3118 | 3133 | DUMPSOFAR();
|
3119 |
| - CopySendString(cstate, "\\\\"); |
3120 |
| - start = ++ptr; |
3121 |
| - break; |
3122 |
| - default: |
3123 |
| - if (c == delimc) |
| 3134 | + CopySendChar(cstate, '\\'); |
| 3135 | + start = ptr++; /* we include char in next run */ |
| 3136 | + } |
| 3137 | + else if ((unsigned char) c < (unsigned char) 0x20) |
| 3138 | + { |
| 3139 | + switch (c) |
3124 | 3140 | {
|
3125 |
| - DUMPSOFAR(); |
3126 |
| - CopySendChar(cstate, '\\'); |
3127 |
| - start = ptr; /* we include char in next run */ |
| 3141 | + /* \r and \n must be escaped, the others are traditional */ |
| 3142 | + case '\b': |
| 3143 | + case '\f': |
| 3144 | + case '\n': |
| 3145 | + case '\r': |
| 3146 | + case '\t': |
| 3147 | + case '\v': |
| 3148 | + DUMPSOFAR(); |
| 3149 | + CopySendChar(cstate, '\\'); |
| 3150 | + start = ptr++; /* we include char in next run */ |
| 3151 | + break; |
| 3152 | + default: |
| 3153 | + /* All ASCII control chars are length 1 */ |
| 3154 | + ptr++; |
| 3155 | + break; |
3128 | 3156 | }
|
3129 |
| - |
3130 |
| - /* |
3131 |
| - * We can skip pg_encoding_mblen() overhead when encoding is |
3132 |
| - * safe, because in valid backend encodings, extra bytes of a |
3133 |
| - * multibyte character never look like ASCII. |
3134 |
| - */ |
3135 |
| - if (IS_HIGHBIT_SET(c) && cstate->encoding_embeds_ascii) |
3136 |
| - ptr += pg_encoding_mblen(cstate->client_encoding, ptr); |
3137 |
| - else |
3138 |
| - ptr++; |
3139 |
| - break; |
| 3157 | + } |
| 3158 | + else |
| 3159 | + ptr++; |
3140 | 3160 | }
|
3141 | 3161 | }
|
3142 | 3162 |
|
|
0 commit comments