Location via proxy:   [ UP ]  
[Report a bug]   [Manage cookies]                
Skip to content

Commit e2d088d

Browse files
committed
Allow direct conversion between EUC_JP and SJIS to improve
performance. patches submitted by Atsushi Ogawa.
1 parent 1fa87fa commit e2d088d

File tree

1 file changed

+201
-11
lines changed

1 file changed

+201
-11
lines changed

src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c

Lines changed: 201 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
* Portions Copyright (c) 1994, Regents of the University of California
77
*
88
* IDENTIFICATION
9-
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.10 2005/06/10 16:43:56 ishii Exp $
9+
* $PostgreSQL: pgsql/src/backend/utils/mb/conversion_procs/euc_jp_and_sjis/euc_jp_and_sjis.c,v 1.11 2005/06/24 13:56:39 ishii Exp $
1010
*
1111
*-------------------------------------------------------------------------
1212
*/
@@ -58,23 +58,21 @@ static void sjis2mic(unsigned char *sjis, unsigned char *p, int len);
5858
static void mic2sjis(unsigned char *mic, unsigned char *p, int len);
5959
static void euc_jp2mic(unsigned char *euc, unsigned char *p, int len);
6060
static void mic2euc_jp(unsigned char *mic, unsigned char *p, int len);
61+
static void euc_jp2sjis(unsigned char *mic, unsigned char *p, int len);
62+
static void sjis2euc_jp(unsigned char *mic, unsigned char *p, int len);
6163

6264
Datum
6365
euc_jp_to_sjis(PG_FUNCTION_ARGS)
6466
{
6567
unsigned char *src = PG_GETARG_CSTRING(2);
6668
unsigned char *dest = PG_GETARG_CSTRING(3);
6769
int len = PG_GETARG_INT32(4);
68-
unsigned char *buf;
6970

7071
Assert(PG_GETARG_INT32(0) == PG_EUC_JP);
7172
Assert(PG_GETARG_INT32(1) == PG_SJIS);
7273
Assert(len >= 0);
7374

74-
buf = palloc(len * ENCODING_GROWTH_RATE);
75-
euc_jp2mic(src, buf, len);
76-
mic2sjis(buf, dest, strlen(buf));
77-
pfree(buf);
75+
euc_jp2sjis(src, dest, len);
7876

7977
PG_RETURN_VOID();
8078
}
@@ -85,16 +83,12 @@ sjis_to_euc_jp(PG_FUNCTION_ARGS)
8583
unsigned char *src = PG_GETARG_CSTRING(2);
8684
unsigned char *dest = PG_GETARG_CSTRING(3);
8785
int len = PG_GETARG_INT32(4);
88-
unsigned char *buf;
8986

9087
Assert(PG_GETARG_INT32(0) == PG_SJIS);
9188
Assert(PG_GETARG_INT32(1) == PG_EUC_JP);
9289
Assert(len >= 0);
9390

94-
buf = palloc(len * ENCODING_GROWTH_RATE);
95-
sjis2mic(src, buf, len);
96-
mic2euc_jp(buf, dest, strlen(buf));
97-
pfree(buf);
91+
sjis2euc_jp(src, dest, len);
9892

9993
PG_RETURN_VOID();
10094
}
@@ -454,3 +448,199 @@ mic2euc_jp(unsigned char *mic, unsigned char *p, int len)
454448
}
455449
*p = '\0';
456450
}
451+
452+
/*
453+
* EUC_JP -> SJIS
454+
*/
455+
static void
456+
euc_jp2sjis(unsigned char *euc, unsigned char *p, int len)
457+
{
458+
int c1,
459+
c2,
460+
k;
461+
unsigned char *euc_end = euc + len;
462+
463+
while (euc_end >= euc && (c1 = *euc++))
464+
{
465+
if(c1 < 0x80)
466+
{
467+
/* should be ASCII */
468+
*p++ = c1;
469+
}
470+
else if (c1 == SS2)
471+
{
472+
/* hankaku kana? */
473+
*p++ = *euc++;
474+
}
475+
else if (c1 == SS3)
476+
{
477+
/* JIS X0212 kanji? */
478+
c1 = *euc++;
479+
c2 = *euc++;
480+
k = c1 << 8 | c2;
481+
if (k >= 0xf5a1)
482+
{
483+
/* UDC2 */
484+
c1 -= 0x54;
485+
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x74;
486+
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
487+
}
488+
else
489+
{
490+
int i, k2;
491+
492+
/* IBM kanji */
493+
for (i = 0;; i++)
494+
{
495+
k2 = ibmkanji[i].euc & 0xffff;
496+
if (k2 == 0xffff)
497+
{
498+
*p++ = PGSJISALTCODE >> 8;
499+
*p++ = PGSJISALTCODE & 0xff;
500+
break;
501+
}
502+
if (k2 == k)
503+
{
504+
k = ibmkanji[i].sjis;
505+
*p++ = k >> 8;
506+
*p++ = k & 0xff;
507+
break;
508+
}
509+
}
510+
}
511+
}
512+
else
513+
{
514+
/* JIS X0208 kanji? */
515+
c2 = *euc++;
516+
k = (c1 << 8) | (c2 & 0xff);
517+
if (k >= 0xf5a1)
518+
{
519+
/* UDC1 */
520+
c1 -= 0x54;
521+
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1) + 0x6f;
522+
}
523+
else
524+
*p++ = ((c1 - 0xa1) >> 1) + ((c1 < 0xdf) ? 0x81 : 0xc1);
525+
*p++ = c2 - ((c1 & 1) ? ((c2 < 0xe0) ? 0x61 : 0x60) : 2);
526+
}
527+
}
528+
*p = '\0';
529+
}
530+
531+
/*
532+
* SJIS ---> EUC_JP
533+
*/
534+
static void
535+
sjis2euc_jp(unsigned char *sjis, unsigned char *p, int len)
536+
{
537+
int c1,
538+
c2,
539+
i,
540+
k,
541+
k2;
542+
unsigned char *sjis_end = sjis + len;
543+
544+
while (sjis_end >= sjis && (c1 = *sjis++))
545+
{
546+
if(c1 < 0x80)
547+
{
548+
/* should be ASCII */
549+
*p++ = c1;
550+
}
551+
else if (c1 >= 0xa1 && c1 <= 0xdf)
552+
{
553+
/* JIS X0201 (1 byte kana) */
554+
*p++ = SS2;
555+
*p++ = c1;
556+
}
557+
else
558+
{
559+
/*
560+
* JIS X0208, X0212, user defined extended characters
561+
*/
562+
c2 = *sjis++;
563+
k = (c1 << 8) + c2;
564+
if (k >= 0xed40 && k < 0xf040)
565+
{
566+
/* NEC selection IBM kanji */
567+
for (i = 0;; i++)
568+
{
569+
k2 = ibmkanji[i].nec;
570+
if (k2 == 0xffff)
571+
break;
572+
if (k2 == k)
573+
{
574+
k = ibmkanji[i].sjis;
575+
c1 = (k >> 8) & 0xff;
576+
c2 = k & 0xff;
577+
}
578+
}
579+
}
580+
581+
if (k < 0xeb3f)
582+
{
583+
/* JIS X0208 */
584+
*p++ = ((c1 & 0x3f) << 1) + 0x9f + (c2 > 0x9e);
585+
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
586+
}
587+
else if ((k >= 0xeb40 && k < 0xf040) || (k >= 0xfc4c && k <= 0xfcfc))
588+
{
589+
/* NEC selection IBM kanji - Other undecided justice */
590+
*p++ = PGEUCALTCODE >> 8;
591+
*p++ = PGEUCALTCODE & 0xff;
592+
}
593+
else if (k >= 0xf040 && k < 0xf540)
594+
{
595+
/*
596+
* UDC1 mapping to X0208 85 ku - 94 ku JIS code 0x7521 -
597+
* 0x7e7e EUC 0xf5a1 - 0xfefe
598+
*/
599+
c1 -= 0x6f;
600+
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
601+
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
602+
}
603+
else if (k >= 0xf540 && k < 0xfa40)
604+
{
605+
/*
606+
* UDC2 mapping to X0212 85 ku - 94 ku JIS code 0x7521 -
607+
* 0x7e7e EUC 0x8ff5a1 - 0x8ffefe
608+
*/
609+
*p++ = SS3;
610+
c1 -= 0x74;
611+
*p++ = ((c1 & 0x3f) << 1) + 0xf3 + (c2 > 0x9e);
612+
*p++ = c2 + ((c2 > 0x9e) ? 2 : 0x60) + (c2 < 0x80);
613+
}
614+
else if (k >= 0xfa40)
615+
{
616+
/*
617+
* mapping IBM kanji to X0208 and X0212
618+
*
619+
*/
620+
for (i = 0;; i++)
621+
{
622+
k2 = ibmkanji[i].sjis;
623+
if (k2 == 0xffff)
624+
break;
625+
if (k2 == k)
626+
{
627+
k = ibmkanji[i].euc;
628+
if (k >= 0x8f0000)
629+
{
630+
*p++ = SS3;
631+
*p++ = 0x80 | ((k & 0xff00) >> 8);
632+
*p++ = 0x80 | (k & 0xff);
633+
}
634+
else
635+
{
636+
*p++ = 0x80 | (k >> 8);
637+
*p++ = 0x80 | (k & 0xff);
638+
}
639+
}
640+
}
641+
}
642+
}
643+
}
644+
*p = '\0';
645+
}
646+

0 commit comments

Comments
 (0)