@@ -457,13 +457,149 @@ NIAddAffix(IspellDict *Conf, int flag, char flagflags, const char *mask, const c
457
457
Conf -> naffixes ++ ;
458
458
}
459
459
460
+
461
+ /* Parsing states for parse_affentry() and friends */
460
462
#define PAE_WAIT_MASK 0
461
- #define PAE_INMASK 1
463
+ #define PAE_INMASK 1
462
464
#define PAE_WAIT_FIND 2
463
- #define PAE_INFIND 3
465
+ #define PAE_INFIND 3
464
466
#define PAE_WAIT_REPL 4
465
- #define PAE_INREPL 5
467
+ #define PAE_INREPL 5
468
+ #define PAE_WAIT_TYPE 6
469
+ #define PAE_WAIT_FLAG 7
466
470
471
+ /*
472
+ * Parse next space-separated field of an .affix file line.
473
+ *
474
+ * *str is the input pointer (will be advanced past field)
475
+ * next is where to copy the field value to, with null termination
476
+ *
477
+ * The buffer at "next" must be of size BUFSIZ; we truncate the input to fit.
478
+ *
479
+ * Returns TRUE if we found a field, FALSE if not.
480
+ */
481
+ static bool
482
+ get_nextfield (char * * str , char * next )
483
+ {
484
+ int state = PAE_WAIT_MASK ;
485
+ int avail = BUFSIZ ;
486
+
487
+ while (* * str )
488
+ {
489
+ if (state == PAE_WAIT_MASK )
490
+ {
491
+ if (t_iseq (* str , '#' ))
492
+ return false;
493
+ else if (!t_isspace (* str ))
494
+ {
495
+ int clen = pg_mblen (* str );
496
+
497
+ if (clen < avail )
498
+ {
499
+ COPYCHAR (next , * str );
500
+ next += clen ;
501
+ avail -= clen ;
502
+ }
503
+ state = PAE_INMASK ;
504
+ }
505
+ }
506
+ else /* state == PAE_INMASK */
507
+ {
508
+ if (t_isspace (* str ))
509
+ {
510
+ * next = '\0' ;
511
+ return true;
512
+ }
513
+ else
514
+ {
515
+ int clen = pg_mblen (* str );
516
+
517
+ if (clen < avail )
518
+ {
519
+ COPYCHAR (next , * str );
520
+ next += clen ;
521
+ avail -= clen ;
522
+ }
523
+ }
524
+ }
525
+ * str += pg_mblen (* str );
526
+ }
527
+
528
+ * next = '\0' ;
529
+
530
+ return (state == PAE_INMASK ); /* OK if we got a nonempty field */
531
+ }
532
+
533
+ /*
534
+ * Parses entry of an .affix file of MySpell or Hunspell format.
535
+ *
536
+ * An .affix file entry has the following format:
537
+ * - header
538
+ * <type> <flag> <cross_flag> <flag_count>
539
+ * - fields after header:
540
+ * <type> <flag> <find> <replace> <mask>
541
+ *
542
+ * str is the input line
543
+ * field values are returned to type etc, which must be buffers of size BUFSIZ.
544
+ *
545
+ * Returns number of fields found; any omitted fields are set to empty strings.
546
+ */
547
+ static int
548
+ parse_ooaffentry (char * str , char * type , char * flag , char * find ,
549
+ char * repl , char * mask )
550
+ {
551
+ int state = PAE_WAIT_TYPE ;
552
+ int fields_read = 0 ;
553
+ bool valid = false;
554
+
555
+ * type = * flag = * find = * repl = * mask = '\0' ;
556
+
557
+ while (* str )
558
+ {
559
+ switch (state )
560
+ {
561
+ case PAE_WAIT_TYPE :
562
+ valid = get_nextfield (& str , type );
563
+ state = PAE_WAIT_FLAG ;
564
+ break ;
565
+ case PAE_WAIT_FLAG :
566
+ valid = get_nextfield (& str , flag );
567
+ state = PAE_WAIT_FIND ;
568
+ break ;
569
+ case PAE_WAIT_FIND :
570
+ valid = get_nextfield (& str , find );
571
+ state = PAE_WAIT_REPL ;
572
+ break ;
573
+ case PAE_WAIT_REPL :
574
+ valid = get_nextfield (& str , repl );
575
+ state = PAE_WAIT_MASK ;
576
+ break ;
577
+ case PAE_WAIT_MASK :
578
+ valid = get_nextfield (& str , mask );
579
+ state = -1 ; /* force loop exit */
580
+ break ;
581
+ default :
582
+ elog (ERROR , "unrecognized state in parse_ooaffentry: %d" ,
583
+ state );
584
+ break ;
585
+ }
586
+ if (valid )
587
+ fields_read ++ ;
588
+ else
589
+ break ; /* early EOL */
590
+ if (state < 0 )
591
+ break ; /* got all fields */
592
+ }
593
+
594
+ return fields_read ;
595
+ }
596
+
597
+ /*
598
+ * Parses entry of an .affix file of Ispell format
599
+ *
600
+ * An .affix file entry has the following format:
601
+ * <mask> > [-<find>,]<replace>
602
+ */
467
603
static bool
468
604
parse_affentry (char * str , char * mask , char * find , char * repl )
469
605
{
@@ -618,8 +754,6 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
618
754
int flag = 0 ;
619
755
char flagflags = 0 ;
620
756
tsearch_readline_state trst ;
621
- int scanread = 0 ;
622
- char scanbuf [BUFSIZ ];
623
757
char * recoded ;
624
758
625
759
/* read file to find any flag */
@@ -682,8 +816,6 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
682
816
}
683
817
tsearch_readline_end (& trst );
684
818
685
- sprintf (scanbuf , "%%6s %%%ds %%%ds %%%ds %%%ds" , BUFSIZ / 5 , BUFSIZ / 5 , BUFSIZ / 5 , BUFSIZ / 5 );
686
-
687
819
if (!tsearch_readline_begin (& trst , filename ))
688
820
ereport (ERROR ,
689
821
(errcode (ERRCODE_CONFIG_FILE_ERROR ),
@@ -692,18 +824,21 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
692
824
693
825
while ((recoded = tsearch_readline (& trst )) != NULL )
694
826
{
827
+ int fields_read ;
828
+
695
829
if (* recoded == '\0' || t_isspace (recoded ) || t_iseq (recoded , '#' ))
696
830
goto nextline ;
697
831
698
- scanread = sscanf (recoded , scanbuf , type , sflag , find , repl , mask );
832
+ fields_read = parse_ooaffentry (recoded , type , sflag , find , repl , mask );
699
833
700
834
if (ptype )
701
835
pfree (ptype );
702
836
ptype = lowerstr_ctx (Conf , type );
703
- if (scanread < 4 || (STRNCMP (ptype , "sfx" ) && STRNCMP (ptype , "pfx" )))
837
+ if (fields_read < 4 ||
838
+ (STRNCMP (ptype , "sfx" ) != 0 && STRNCMP (ptype , "pfx" ) != 0 ))
704
839
goto nextline ;
705
840
706
- if (scanread == 4 )
841
+ if (fields_read == 4 )
707
842
{
708
843
if (strlen (sflag ) != 1 )
709
844
goto nextline ;
@@ -722,9 +857,13 @@ NIImportOOAffixes(IspellDict *Conf, const char *filename)
722
857
if (strlen (sflag ) != 1 || flag != * sflag || flag == 0 )
723
858
goto nextline ;
724
859
prepl = lowerstr_ctx (Conf , repl );
725
- /* affix flag */
860
+ /* Find position of '/' in lowercased string "prepl" */
726
861
if ((ptr = strchr (prepl , '/' )) != NULL )
727
862
{
863
+ /*
864
+ * Here we use non-lowercased string "repl". We need position
865
+ * of '/' in "repl".
866
+ */
728
867
* ptr = '\0' ;
729
868
ptr = repl + (ptr - prepl ) + 1 ;
730
869
while (* ptr )
@@ -800,11 +939,12 @@ NIImportAffixes(IspellDict *Conf, const char *filename)
800
939
801
940
if (STRNCMP (pstr , "compoundwords" ) == 0 )
802
941
{
942
+ /* Find position in lowercased string "pstr" */
803
943
s = findchar (pstr , 'l' );
804
944
if (s )
805
945
{
806
- s = recoded + ( s - pstr ); /* we need non-lowercased
807
- * string */
946
+ /* Here we use non-lowercased string "recoded" */
947
+ s = recoded + ( s - pstr );
808
948
while (* s && !t_isspace (s ))
809
949
s += pg_mblen (s );
810
950
while (* s && t_isspace (s ))
0 commit comments