00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025 #ifdef HAVE_CONFIG_H
00026 # include <config.h>
00027 #endif
00028
00029
00030 #undef ENABLE_NLS
00031
00032 #include "libiberty.h"
00033
00034
00035 #ifdef C_ALLOCA
00036 # define REGEX_MALLOC
00037 #endif
00038
00039
00040 #if defined _AIX && !defined REGEX_MALLOC
00041 #pragma alloca
00042 #endif
00043
00044 #ifndef PARAMS
00045 # if defined __GNUC__ || (defined __STDC__ && __STDC__)
00046 # define PARAMS(args) args
00047 # else
00048 # define PARAMS(args) ()
00049 # endif
00050 #endif
00051
00052 #if defined STDC_HEADERS && !defined emacs
00053 # include <stddef.h>
00054 #else
00055
00056 # include <sys/types.h>
00057 #endif
00058
00059
00060
00061 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
00062
00063 # include <wchar.h>
00064 # include <wctype.h>
00065 #endif
00066
00067
00068
00069 #ifdef ENABLE_NLS
00070 # include <libintl.h>
00071 #else
00072 # define gettext(msgid) (msgid)
00073 #endif
00074
00075 #ifndef gettext_noop
00076
00077
00078 # define gettext_noop(String) String
00079 #endif
00080
00081 # if !defined(volatile) && !defined(HAVE_VOLATILE)
00082 # define volatile
00083 # endif
00084
00085
00086
00087
00088 # undef REL_ALLOC
00089
00090 # if defined STDC_HEADERS || defined _LIBC
00091 # include <stdlib.h>
00092 # else
00093 char *malloc ();
00094 char *realloc ();
00095 # endif
00096
00097
00098
00099 # ifdef INHIBIT_STRING_HEADER
00100 # if !(defined HAVE_BZERO && defined HAVE_BCOPY)
00101 # if !defined bzero && !defined bcopy
00102 # undef INHIBIT_STRING_HEADER
00103 # endif
00104 # endif
00105 # endif
00106
00107
00108
00109
00110 # ifndef INHIBIT_STRING_HEADER
00111 # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
00112 # include <string.h>
00113 # ifndef bzero
00114 # ifndef _LIBC
00115 # define bzero(s, n) (memset (s, '\0', n), (s))
00116 # else
00117 # define bzero(s, n) __bzero (s, n)
00118 # endif
00119 # endif
00120 # else
00121 # include <strings.h>
00122 # ifndef memcmp
00123 # define memcmp(s1, s2, n) bcmp (s1, s2, n)
00124 # endif
00125 # ifndef memcpy
00126 # define memcpy(d, s, n) (bcopy (s, d, n), (d))
00127 # endif
00128 # endif
00129 # endif
00130
00131
00132
00133
00134
00135 # ifndef Sword
00136 # define Sword 1
00137 # endif
00138
00139 # ifdef SWITCH_ENUM_BUG
00140 # define SWITCH_ENUM_CAST(x) ((int)(x))
00141 # else
00142 # define SWITCH_ENUM_CAST(x) (x)
00143 # endif
00144
00145
00146 # define CHAR_SET_SIZE 256
00147
00148 # ifdef SYNTAX_TABLE
00149
00150 extern char *re_syntax_table;
00151
00152 # else
00153
00154 static char re_syntax_table[CHAR_SET_SIZE];
00155
00156 static void init_syntax_once PARAMS ((void));
00157
00158 static void
00159 init_syntax_once ()
00160 {
00161 register int c;
00162 static int done = 0;
00163
00164 if (done)
00165 return;
00166
00167 bzero (re_syntax_table, sizeof re_syntax_table);
00168
00169 for (c = 'a'; c <= 'z'; c++)
00170 re_syntax_table[c] = Sword;
00171
00172 for (c = 'A'; c <= 'Z'; c++)
00173 re_syntax_table[c] = Sword;
00174
00175 for (c = '0'; c <= '9'; c++)
00176 re_syntax_table[c] = Sword;
00177
00178 re_syntax_table['_'] = Sword;
00179
00180 done = 1;
00181 }
00182
00183 # endif
00184
00185 # define SYNTAX(c) re_syntax_table[c]
00186
00187
00188
00189 #include "gnu-regex.h"
00190
00191
00192
00193
00194 #include <safe-ctype.h>
00195
00196 #ifndef NULL
00197 # define NULL (void *)0
00198 #endif
00199
00200
00201
00202
00203
00204 #undef SIGN_EXTEND_CHAR
00205 #if __STDC__
00206 # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
00207 #else
00208
00209 # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
00210 #endif
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220
00221
00222 #ifdef REGEX_MALLOC
00223
00224 # define REGEX_ALLOCATE malloc
00225 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
00226 # define REGEX_FREE free
00227
00228 #else
00229
00230
00231 # ifndef alloca
00232
00233
00234 # ifdef __GNUC__
00235 # define alloca __builtin_alloca
00236 # else
00237 # if HAVE_ALLOCA_H
00238 # include <alloca.h>
00239 # endif
00240 # endif
00241
00242 # endif
00243
00244 # define REGEX_ALLOCATE alloca
00245
00246
00247 # define REGEX_REALLOCATE(source, osize, nsize) \
00248 (destination = (char *) alloca (nsize), \
00249 memcpy (destination, source, osize))
00250
00251
00252 # define REGEX_FREE(arg) ((void)0)
00253
00254 #endif
00255
00256
00257
00258 #if defined REL_ALLOC && defined REGEX_MALLOC
00259
00260 # define REGEX_ALLOCATE_STACK(size) \
00261 r_alloc (&failure_stack_ptr, (size))
00262 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
00263 r_re_alloc (&failure_stack_ptr, (nsize))
00264 # define REGEX_FREE_STACK(ptr) \
00265 r_alloc_free (&failure_stack_ptr)
00266
00267 #else
00268
00269 # ifdef REGEX_MALLOC
00270
00271 # define REGEX_ALLOCATE_STACK malloc
00272 # define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
00273 # define REGEX_FREE_STACK free
00274
00275 # else
00276
00277 # define REGEX_ALLOCATE_STACK alloca
00278
00279 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
00280 REGEX_REALLOCATE (source, osize, nsize)
00281
00282 # define REGEX_FREE_STACK(arg)
00283
00284 # endif
00285 #endif
00286
00287
00288
00289
00290
00291 #define FIRST_STRING_P(ptr) \
00292 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
00293
00294
00295 #define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
00296 #define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
00297 #define RETALLOC_IF(addr, n, t) \
00298 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
00299 #define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
00300
00301 #define BYTEWIDTH 8
00302
00303 #define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
00304
00305 #undef MAX
00306 #undef MIN
00307 #define MAX(a, b) ((a) > (b) ? (a) : (b))
00308 #define MIN(a, b) ((a) < (b) ? (a) : (b))
00309
00310 typedef char boolean;
00311 #define false 0
00312 #define true 1
00313
00314 static int re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
00315 const char *string1, int size1,
00316 const char *string2, int size2,
00317 int pos,
00318 struct re_registers *regs,
00319 int stop));
00320
00321
00322
00323
00324
00325
00326 typedef enum
00327 {
00328 no_op = 0,
00329
00330
00331 succeed,
00332
00333
00334 exactn,
00335
00336
00337 anychar,
00338
00339
00340
00341
00342
00343
00344
00345 charset,
00346
00347
00348
00349 charset_not,
00350
00351
00352
00353
00354
00355
00356
00357
00358 start_memory,
00359
00360
00361
00362
00363
00364
00365
00366
00367 stop_memory,
00368
00369
00370
00371 duplicate,
00372
00373
00374 begline,
00375
00376
00377 endline,
00378
00379
00380
00381 begbuf,
00382
00383
00384 endbuf,
00385
00386
00387 jump,
00388
00389
00390 jump_past_alt,
00391
00392
00393
00394 on_failure_jump,
00395
00396
00397
00398 on_failure_keep_string_jump,
00399
00400
00401
00402 pop_failure_jump,
00403
00404
00405
00406
00407
00408
00409
00410
00411 maybe_pop_jump,
00412
00413
00414
00415
00416
00417
00418 dummy_failure_jump,
00419
00420
00421
00422 push_dummy_failure,
00423
00424
00425
00426 succeed_n,
00427
00428
00429
00430 jump_n,
00431
00432
00433
00434
00435 set_number_at,
00436
00437 wordchar,
00438 notwordchar,
00439
00440 wordbeg,
00441 wordend,
00442
00443 wordbound,
00444 notwordbound
00445
00446 #ifdef emacs
00447 ,before_dot,
00448 at_dot,
00449 after_dot,
00450
00451
00452
00453 syntaxspec,
00454
00455
00456 notsyntaxspec
00457 #endif
00458 } re_opcode_t;
00459
00460
00461
00462
00463
00464 #define STORE_NUMBER(destination, number) \
00465 do { \
00466 (destination)[0] = (number) & 0377; \
00467 (destination)[1] = (number) >> 8; \
00468 } while (0)
00469
00470
00471
00472
00473
00474 #define STORE_NUMBER_AND_INCR(destination, number) \
00475 do { \
00476 STORE_NUMBER (destination, number); \
00477 (destination) += 2; \
00478 } while (0)
00479
00480
00481
00482
00483 #define EXTRACT_NUMBER(destination, source) \
00484 do { \
00485 (destination) = *(source) & 0377; \
00486 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
00487 } while (0)
00488
00489 #ifdef DEBUG
00490 static void extract_number _RE_ARGS ((int *dest, unsigned char *source));
00491 static void
00492 extract_number (dest, source)
00493 int *dest;
00494 unsigned char *source;
00495 {
00496 int temp = SIGN_EXTEND_CHAR (*(source + 1));
00497 *dest = *source & 0377;
00498 *dest += temp << 8;
00499 }
00500
00501 # ifndef EXTRACT_MACROS
00502 # undef EXTRACT_NUMBER
00503 # define EXTRACT_NUMBER(dest, src) extract_number (&dest, src)
00504 # endif
00505
00506 #endif
00507
00508
00509
00510
00511 #define EXTRACT_NUMBER_AND_INCR(destination, source) \
00512 do { \
00513 EXTRACT_NUMBER (destination, source); \
00514 (source) += 2; \
00515 } while (0)
00516
00517 #ifdef DEBUG
00518 static void extract_number_and_incr _RE_ARGS ((int *destination,
00519 unsigned char **source));
00520 static void
00521 extract_number_and_incr (destination, source)
00522 int *destination;
00523 unsigned char **source;
00524 {
00525 extract_number (destination, *source);
00526 *source += 2;
00527 }
00528
00529 # ifndef EXTRACT_MACROS
00530 # undef EXTRACT_NUMBER_AND_INCR
00531 # define EXTRACT_NUMBER_AND_INCR(dest, src) \
00532 extract_number_and_incr (&dest, &src)
00533 # endif
00534
00535 #endif
00536
00537
00538
00539
00540
00541
00542
00543 #ifdef DEBUG
00544
00545
00546 # include <stdio.h>
00547
00548
00549 # include <assert.h>
00550
00551 static int debug = 0;
00552
00553 # define DEBUG_STATEMENT(e) e
00554 # define DEBUG_PRINT1(x) if (debug) printf (x)
00555 # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
00556 # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
00557 # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
00558 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
00559 if (debug) print_partial_compiled_pattern (s, e)
00560 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
00561 if (debug) print_double_string (w, s1, sz1, s2, sz2)
00562
00563
00564
00565
00566 void
00567 print_fastmap (fastmap)
00568 char *fastmap;
00569 {
00570 unsigned was_a_range = 0;
00571 unsigned i = 0;
00572
00573 while (i < (1 << BYTEWIDTH))
00574 {
00575 if (fastmap[i++])
00576 {
00577 was_a_range = 0;
00578 putchar (i - 1);
00579 while (i < (1 << BYTEWIDTH) && fastmap[i])
00580 {
00581 was_a_range = 1;
00582 i++;
00583 }
00584 if (was_a_range)
00585 {
00586 printf ("-");
00587 putchar (i - 1);
00588 }
00589 }
00590 }
00591 putchar ('\n');
00592 }
00593
00594
00595
00596
00597
00598 void
00599 print_partial_compiled_pattern (start, end)
00600 unsigned char *start;
00601 unsigned char *end;
00602 {
00603 int mcnt, mcnt2;
00604 unsigned char *p1;
00605 unsigned char *p = start;
00606 unsigned char *pend = end;
00607
00608 if (start == NULL)
00609 {
00610 printf ("(null)\n");
00611 return;
00612 }
00613
00614
00615 while (p < pend)
00616 {
00617 printf ("%d:\t", p - start);
00618
00619 switch ((re_opcode_t) *p++)
00620 {
00621 case no_op:
00622 printf ("/no_op");
00623 break;
00624
00625 case exactn:
00626 mcnt = *p++;
00627 printf ("/exactn/%d", mcnt);
00628 do
00629 {
00630 putchar ('/');
00631 putchar (*p++);
00632 }
00633 while (--mcnt);
00634 break;
00635
00636 case start_memory:
00637 mcnt = *p++;
00638 printf ("/start_memory/%d/%d", mcnt, *p++);
00639 break;
00640
00641 case stop_memory:
00642 mcnt = *p++;
00643 printf ("/stop_memory/%d/%d", mcnt, *p++);
00644 break;
00645
00646 case duplicate:
00647 printf ("/duplicate/%d", *p++);
00648 break;
00649
00650 case anychar:
00651 printf ("/anychar");
00652 break;
00653
00654 case charset:
00655 case charset_not:
00656 {
00657 register int c, last = -100;
00658 register int in_range = 0;
00659
00660 printf ("/charset [%s",
00661 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
00662
00663 assert (p + *p < pend);
00664
00665 for (c = 0; c < 256; c++)
00666 if (c / 8 < *p
00667 && (p[1 + (c/8)] & (1 << (c % 8))))
00668 {
00669
00670 if (last + 1 == c && ! in_range)
00671 {
00672 putchar ('-');
00673 in_range = 1;
00674 }
00675
00676 else if (last + 1 != c && in_range)
00677 {
00678 putchar (last);
00679 in_range = 0;
00680 }
00681
00682 if (! in_range)
00683 putchar (c);
00684
00685 last = c;
00686 }
00687
00688 if (in_range)
00689 putchar (last);
00690
00691 putchar (']');
00692
00693 p += 1 + *p;
00694 }
00695 break;
00696
00697 case begline:
00698 printf ("/begline");
00699 break;
00700
00701 case endline:
00702 printf ("/endline");
00703 break;
00704
00705 case on_failure_jump:
00706 extract_number_and_incr (&mcnt, &p);
00707 printf ("/on_failure_jump to %d", p + mcnt - start);
00708 break;
00709
00710 case on_failure_keep_string_jump:
00711 extract_number_and_incr (&mcnt, &p);
00712 printf ("/on_failure_keep_string_jump to %d", p + mcnt - start);
00713 break;
00714
00715 case dummy_failure_jump:
00716 extract_number_and_incr (&mcnt, &p);
00717 printf ("/dummy_failure_jump to %d", p + mcnt - start);
00718 break;
00719
00720 case push_dummy_failure:
00721 printf ("/push_dummy_failure");
00722 break;
00723
00724 case maybe_pop_jump:
00725 extract_number_and_incr (&mcnt, &p);
00726 printf ("/maybe_pop_jump to %d", p + mcnt - start);
00727 break;
00728
00729 case pop_failure_jump:
00730 extract_number_and_incr (&mcnt, &p);
00731 printf ("/pop_failure_jump to %d", p + mcnt - start);
00732 break;
00733
00734 case jump_past_alt:
00735 extract_number_and_incr (&mcnt, &p);
00736 printf ("/jump_past_alt to %d", p + mcnt - start);
00737 break;
00738
00739 case jump:
00740 extract_number_and_incr (&mcnt, &p);
00741 printf ("/jump to %d", p + mcnt - start);
00742 break;
00743
00744 case succeed_n:
00745 extract_number_and_incr (&mcnt, &p);
00746 p1 = p + mcnt;
00747 extract_number_and_incr (&mcnt2, &p);
00748 printf ("/succeed_n to %d, %d times", p1 - start, mcnt2);
00749 break;
00750
00751 case jump_n:
00752 extract_number_and_incr (&mcnt, &p);
00753 p1 = p + mcnt;
00754 extract_number_and_incr (&mcnt2, &p);
00755 printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
00756 break;
00757
00758 case set_number_at:
00759 extract_number_and_incr (&mcnt, &p);
00760 p1 = p + mcnt;
00761 extract_number_and_incr (&mcnt2, &p);
00762 printf ("/set_number_at location %d to %d", p1 - start, mcnt2);
00763 break;
00764
00765 case wordbound:
00766 printf ("/wordbound");
00767 break;
00768
00769 case notwordbound:
00770 printf ("/notwordbound");
00771 break;
00772
00773 case wordbeg:
00774 printf ("/wordbeg");
00775 break;
00776
00777 case wordend:
00778 printf ("/wordend");
00779
00780 # ifdef emacs
00781 case before_dot:
00782 printf ("/before_dot");
00783 break;
00784
00785 case at_dot:
00786 printf ("/at_dot");
00787 break;
00788
00789 case after_dot:
00790 printf ("/after_dot");
00791 break;
00792
00793 case syntaxspec:
00794 printf ("/syntaxspec");
00795 mcnt = *p++;
00796 printf ("/%d", mcnt);
00797 break;
00798
00799 case notsyntaxspec:
00800 printf ("/notsyntaxspec");
00801 mcnt = *p++;
00802 printf ("/%d", mcnt);
00803 break;
00804 # endif
00805
00806 case wordchar:
00807 printf ("/wordchar");
00808 break;
00809
00810 case notwordchar:
00811 printf ("/notwordchar");
00812 break;
00813
00814 case begbuf:
00815 printf ("/begbuf");
00816 break;
00817
00818 case endbuf:
00819 printf ("/endbuf");
00820 break;
00821
00822 default:
00823 printf ("?%d", *(p-1));
00824 }
00825
00826 putchar ('\n');
00827 }
00828
00829 printf ("%d:\tend of pattern.\n", p - start);
00830 }
00831
00832
00833 void
00834 print_compiled_pattern (bufp)
00835 struct re_pattern_buffer *bufp;
00836 {
00837 unsigned char *buffer = bufp->buffer;
00838
00839 print_partial_compiled_pattern (buffer, buffer + bufp->used);
00840 printf ("%ld bytes used/%ld bytes allocated.\n",
00841 bufp->used, bufp->allocated);
00842
00843 if (bufp->fastmap_accurate && bufp->fastmap)
00844 {
00845 printf ("fastmap: ");
00846 print_fastmap (bufp->fastmap);
00847 }
00848
00849 printf ("re_nsub: %d\t", bufp->re_nsub);
00850 printf ("regs_alloc: %d\t", bufp->regs_allocated);
00851 printf ("can_be_null: %d\t", bufp->can_be_null);
00852 printf ("newline_anchor: %d\n", bufp->newline_anchor);
00853 printf ("no_sub: %d\t", bufp->no_sub);
00854 printf ("not_bol: %d\t", bufp->not_bol);
00855 printf ("not_eol: %d\t", bufp->not_eol);
00856 printf ("syntax: %lx\n", bufp->syntax);
00857
00858 }
00859
00860
00861 void
00862 print_double_string (where, string1, size1, string2, size2)
00863 const char *where;
00864 const char *string1;
00865 const char *string2;
00866 int size1;
00867 int size2;
00868 {
00869 int this_char;
00870
00871 if (where == NULL)
00872 printf ("(null)");
00873 else
00874 {
00875 if (FIRST_STRING_P (where))
00876 {
00877 for (this_char = where - string1; this_char < size1; this_char++)
00878 putchar (string1[this_char]);
00879
00880 where = string2;
00881 }
00882
00883 for (this_char = where - string2; this_char < size2; this_char++)
00884 putchar (string2[this_char]);
00885 }
00886 }
00887
00888 void
00889 printchar (c)
00890 int c;
00891 {
00892 putc (c, stderr);
00893 }
00894
00895 #else
00896
00897 # undef assert
00898 # define assert(e)
00899
00900 # define DEBUG_STATEMENT(e)
00901 # define DEBUG_PRINT1(x)
00902 # define DEBUG_PRINT2(x1, x2)
00903 # define DEBUG_PRINT3(x1, x2, x3)
00904 # define DEBUG_PRINT4(x1, x2, x3, x4)
00905 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
00906 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
00907
00908 #endif
00909
00910
00911
00912
00913
00914
00915 reg_syntax_t re_syntax_options;
00916
00917
00918
00919
00920
00921
00922
00923
00924
00925 reg_syntax_t
00926 re_set_syntax (syntax)
00927 reg_syntax_t syntax;
00928 {
00929 reg_syntax_t ret = re_syntax_options;
00930
00931 re_syntax_options = syntax;
00932 #ifdef DEBUG
00933 if (syntax & RE_DEBUG)
00934 debug = 1;
00935 else if (debug)
00936 debug = 0;
00937 #endif
00938 return ret;
00939 }
00940 #ifdef _LIBC
00941 weak_alias (__re_set_syntax, re_set_syntax)
00942 #endif
00943
00944
00945
00946
00947
00948
00949 static const char *const re_error_msgid[] =
00950 {
00951 gettext_noop ("Success"),
00952 gettext_noop ("No match"),
00953 gettext_noop ("Invalid regular expression"),
00954 gettext_noop ("Invalid collation character"),
00955 gettext_noop ("Invalid character class name"),
00956 gettext_noop ("Trailing backslash"),
00957 gettext_noop ("Invalid back reference"),
00958 gettext_noop ("Unmatched [ or [^"),
00959 gettext_noop ("Unmatched ( or \\("),
00960 gettext_noop ("Unmatched \\{"),
00961 gettext_noop ("Invalid content of \\{\\}"),
00962 gettext_noop ("Invalid range end"),
00963 gettext_noop ("Memory exhausted"),
00964 gettext_noop ("Invalid preceding regular expression"),
00965 gettext_noop ("Premature end of regular expression"),
00966 gettext_noop ("Regular expression too big"),
00967 gettext_noop ("Unmatched ) or \\)"),
00968 };
00969
00970
00971
00972
00973
00974
00975
00976
00977
00978
00979
00980
00981
00982
00983
00984
00985
00986
00987
00988
00989
00990 #define MATCH_MAY_ALLOCATE
00991
00992
00993
00994 #ifdef __GNUC__
00995 # undef C_ALLOCA
00996 #endif
00997
00998
00999
01000
01001
01002
01003 #if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
01004 # undef MATCH_MAY_ALLOCATE
01005 #endif
01006
01007
01008
01009
01010
01011
01012
01013
01014
01015
01016 #ifndef INIT_FAILURE_ALLOC
01017 # define INIT_FAILURE_ALLOC 5
01018 #endif
01019
01020
01021
01022
01023
01024
01025 #ifdef INT_IS_16BIT
01026
01027 # if defined MATCH_MAY_ALLOCATE
01028
01029
01030 long int re_max_failures = 4000;
01031 # else
01032 long int re_max_failures = 2000;
01033 # endif
01034
01035 union fail_stack_elt
01036 {
01037 unsigned char *pointer;
01038 long int integer;
01039 };
01040
01041 typedef union fail_stack_elt fail_stack_elt_t;
01042
01043 typedef struct
01044 {
01045 fail_stack_elt_t *stack;
01046 unsigned long int size;
01047 unsigned long int avail;
01048 } fail_stack_type;
01049
01050 #else
01051
01052 # if defined MATCH_MAY_ALLOCATE
01053
01054
01055 int re_max_failures = 20000;
01056 # else
01057 int re_max_failures = 2000;
01058 # endif
01059
01060 union fail_stack_elt
01061 {
01062 unsigned char *pointer;
01063 int integer;
01064 };
01065
01066 typedef union fail_stack_elt fail_stack_elt_t;
01067
01068 typedef struct
01069 {
01070 fail_stack_elt_t *stack;
01071 unsigned size;
01072 unsigned avail;
01073 } fail_stack_type;
01074
01075 #endif
01076
01077 #define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
01078 #define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
01079 #define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
01080
01081
01082
01083
01084
01085 #ifdef MATCH_MAY_ALLOCATE
01086 # define INIT_FAIL_STACK() \
01087 do { \
01088 fail_stack.stack = (fail_stack_elt_t *) \
01089 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (fail_stack_elt_t)); \
01090 \
01091 if (fail_stack.stack == NULL) \
01092 return -2; \
01093 \
01094 fail_stack.size = INIT_FAILURE_ALLOC; \
01095 fail_stack.avail = 0; \
01096 } while (0)
01097
01098 # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
01099 #else
01100 # define INIT_FAIL_STACK() \
01101 do { \
01102 fail_stack.avail = 0; \
01103 } while (0)
01104
01105 # define RESET_FAIL_STACK()
01106 #endif
01107
01108
01109
01110
01111
01112
01113
01114
01115
01116 #define DOUBLE_FAIL_STACK(fail_stack) \
01117 ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
01118 ? 0 \
01119 : ((fail_stack).stack = (fail_stack_elt_t *) \
01120 REGEX_REALLOCATE_STACK ((fail_stack).stack, \
01121 (fail_stack).size * sizeof (fail_stack_elt_t), \
01122 ((fail_stack).size << 1) * sizeof (fail_stack_elt_t)), \
01123 \
01124 (fail_stack).stack == NULL \
01125 ? 0 \
01126 : ((fail_stack).size <<= 1, \
01127 1)))
01128
01129
01130
01131
01132
01133 #define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
01134 ((FAIL_STACK_FULL () \
01135 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
01136 ? 0 \
01137 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
01138 1))
01139
01140
01141
01142
01143 #define PUSH_FAILURE_POINTER(item) \
01144 fail_stack.stack[fail_stack.avail++].pointer = (unsigned char *) (item)
01145
01146
01147
01148
01149 #define PUSH_FAILURE_INT(item) \
01150 fail_stack.stack[fail_stack.avail++].integer = (item)
01151
01152
01153
01154
01155 #define PUSH_FAILURE_ELT(item) \
01156 fail_stack.stack[fail_stack.avail++] = (item)
01157
01158
01159
01160 #define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
01161 #define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
01162 #define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
01163
01164
01165 #ifdef DEBUG
01166 # define DEBUG_PUSH PUSH_FAILURE_INT
01167 # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
01168 #else
01169 # define DEBUG_PUSH(item)
01170 # define DEBUG_POP(item_addr)
01171 #endif
01172
01173
01174
01175
01176
01177
01178
01179
01180
01181
01182
01183 #define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
01184 do { \
01185 char *destination; \
01186
01187 \
01188
01189
01190 \
01191 active_reg_t this_reg; \
01192 \
01193 DEBUG_STATEMENT (failure_id++); \
01194 DEBUG_STATEMENT (nfailure_points_pushed++); \
01195 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
01196 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
01197 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
01198 \
01199 DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
01200 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
01201 \
01202 \
01203 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
01204 { \
01205 if (!DOUBLE_FAIL_STACK (fail_stack)) \
01206 return failure_code; \
01207 \
01208 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
01209 (fail_stack).size); \
01210 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
01211 } \
01212 \
01213 \
01214 DEBUG_PRINT1 ("\n"); \
01215 \
01216 if (1) \
01217 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
01218 this_reg++) \
01219 { \
01220 DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
01221 DEBUG_STATEMENT (num_regs_pushed++); \
01222 \
01223 DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
01224 PUSH_FAILURE_POINTER (regstart[this_reg]); \
01225 \
01226 DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
01227 PUSH_FAILURE_POINTER (regend[this_reg]); \
01228 \
01229 DEBUG_PRINT2 (" info: %p\n ", \
01230 reg_info[this_reg].word.pointer); \
01231 DEBUG_PRINT2 (" match_null=%d", \
01232 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
01233 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
01234 DEBUG_PRINT2 (" matched_something=%d", \
01235 MATCHED_SOMETHING (reg_info[this_reg])); \
01236 DEBUG_PRINT2 (" ever_matched=%d", \
01237 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
01238 DEBUG_PRINT1 ("\n"); \
01239 PUSH_FAILURE_ELT (reg_info[this_reg].word); \
01240 } \
01241 \
01242 DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
01243 PUSH_FAILURE_INT (lowest_active_reg); \
01244 \
01245 DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
01246 PUSH_FAILURE_INT (highest_active_reg); \
01247 \
01248 DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
01249 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
01250 PUSH_FAILURE_POINTER (pattern_place); \
01251 \
01252 DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
01253 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
01254 size2); \
01255 DEBUG_PRINT1 ("'\n"); \
01256 PUSH_FAILURE_POINTER (string_place); \
01257 \
01258 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
01259 DEBUG_PUSH (failure_id); \
01260 } while (0)
01261
01262
01263
01264 #define NUM_REG_ITEMS 3
01265
01266
01267 #ifdef DEBUG
01268 # define NUM_NONREG_ITEMS 5
01269 #else
01270 # define NUM_NONREG_ITEMS 4
01271 #endif
01272
01273
01274
01275
01276
01277 #define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
01278
01279
01280 #define NUM_FAILURE_ITEMS \
01281 (((0 \
01282 ? 0 : highest_active_reg - lowest_active_reg + 1) \
01283 * NUM_REG_ITEMS) \
01284 + NUM_NONREG_ITEMS)
01285
01286
01287 #define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
01288
01289
01290
01291
01292
01293
01294
01295
01296
01297
01298
01299
01300
01301
01302 #define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
01303 { \
01304 DEBUG_STATEMENT (unsigned failure_id;) \
01305 active_reg_t this_reg; \
01306 const unsigned char *string_temp; \
01307 \
01308 assert (!FAIL_STACK_EMPTY ()); \
01309 \
01310 \
01311 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
01312 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
01313 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
01314 \
01315 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
01316 \
01317 DEBUG_POP (&failure_id); \
01318 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
01319 \
01320
01321
01322 \
01323 string_temp = POP_FAILURE_POINTER (); \
01324 if (string_temp != NULL) \
01325 str = (const char *) string_temp; \
01326 \
01327 DEBUG_PRINT2 (" Popping string %p: `", str); \
01328 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
01329 DEBUG_PRINT1 ("'\n"); \
01330 \
01331 pat = (unsigned char *) POP_FAILURE_POINTER (); \
01332 DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
01333 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
01334 \
01335 \
01336 high_reg = (active_reg_t) POP_FAILURE_INT (); \
01337 DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
01338 \
01339 low_reg = (active_reg_t) POP_FAILURE_INT (); \
01340 DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
01341 \
01342 if (1) \
01343 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
01344 { \
01345 DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
01346 \
01347 reg_info[this_reg].word = POP_FAILURE_ELT (); \
01348 DEBUG_PRINT2 (" info: %p\n", \
01349 reg_info[this_reg].word.pointer); \
01350 \
01351 regend[this_reg] = (const char *) POP_FAILURE_POINTER (); \
01352 DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
01353 \
01354 regstart[this_reg] = (const char *) POP_FAILURE_POINTER (); \
01355 DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
01356 } \
01357 else \
01358 { \
01359 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
01360 { \
01361 reg_info[this_reg].word.integer = 0; \
01362 regend[this_reg] = 0; \
01363 regstart[this_reg] = 0; \
01364 } \
01365 highest_active_reg = high_reg; \
01366 } \
01367 \
01368 set_regs_matched_done = 0; \
01369 DEBUG_STATEMENT (nfailure_points_popped++); \
01370 }
01371
01372
01373
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383
01384
01385
01386
01387
01388 typedef union
01389 {
01390 fail_stack_elt_t word;
01391 struct
01392 {
01393
01394
01395 #define MATCH_NULL_UNSET_VALUE 3
01396 unsigned match_null_string_p : 2;
01397 unsigned is_active : 1;
01398 unsigned matched_something : 1;
01399 unsigned ever_matched_something : 1;
01400 } bits;
01401 } register_info_type;
01402
01403 #define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
01404 #define IS_ACTIVE(R) ((R).bits.is_active)
01405 #define MATCHED_SOMETHING(R) ((R).bits.matched_something)
01406 #define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
01407
01408
01409
01410
01411
01412 #define SET_REGS_MATCHED() \
01413 do \
01414 { \
01415 if (!set_regs_matched_done) \
01416 { \
01417 active_reg_t r; \
01418 set_regs_matched_done = 1; \
01419 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
01420 { \
01421 MATCHED_SOMETHING (reg_info[r]) \
01422 = EVER_MATCHED_SOMETHING (reg_info[r]) \
01423 = 1; \
01424 } \
01425 } \
01426 } \
01427 while (0)
01428
01429
01430 static char reg_unset_dummy;
01431 #define REG_UNSET_VALUE (®_unset_dummy)
01432 #define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
01433
01434
01435
01436 static reg_errcode_t regex_compile _RE_ARGS ((const char *pattern, size_t size,
01437 reg_syntax_t syntax,
01438 struct re_pattern_buffer *bufp));
01439 static void store_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc, int arg));
01440 static void store_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
01441 int arg1, int arg2));
01442 static void insert_op1 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
01443 int arg, unsigned char *end));
01444 static void insert_op2 _RE_ARGS ((re_opcode_t op, unsigned char *loc,
01445 int arg1, int arg2, unsigned char *end));
01446 static boolean at_begline_loc_p _RE_ARGS ((const char *pattern, const char *p,
01447 reg_syntax_t syntax));
01448 static boolean at_endline_loc_p _RE_ARGS ((const char *p, const char *pend,
01449 reg_syntax_t syntax));
01450 static reg_errcode_t compile_range _RE_ARGS ((const char **p_ptr,
01451 const char *pend,
01452 char *translate,
01453 reg_syntax_t syntax,
01454 unsigned char *b));
01455
01456
01457
01458
01459
01460 #ifndef PATFETCH
01461 # define PATFETCH(c) \
01462 do {if (p == pend) return REG_EEND; \
01463 c = (unsigned char) *p++; \
01464 if (translate) c = (unsigned char) translate[c]; \
01465 } while (0)
01466 #endif
01467
01468
01469
01470 #define PATFETCH_RAW(c) \
01471 do {if (p == pend) return REG_EEND; \
01472 c = (unsigned char) *p++; \
01473 } while (0)
01474
01475
01476 #define PATUNFETCH p--
01477
01478
01479
01480
01481
01482
01483 #ifndef TRANSLATE
01484 # define TRANSLATE(d) \
01485 (translate ? (char) translate[(unsigned char) (d)] : (d))
01486 #endif
01487
01488
01489
01490
01491
01492 #define INIT_BUF_SIZE 32
01493
01494
01495 #define GET_BUFFER_SPACE(n) \
01496 while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
01497 EXTEND_BUFFER ()
01498
01499
01500 #define BUF_PUSH(c) \
01501 do { \
01502 GET_BUFFER_SPACE (1); \
01503 *b++ = (unsigned char) (c); \
01504 } while (0)
01505
01506
01507
01508 #define BUF_PUSH_2(c1, c2) \
01509 do { \
01510 GET_BUFFER_SPACE (2); \
01511 *b++ = (unsigned char) (c1); \
01512 *b++ = (unsigned char) (c2); \
01513 } while (0)
01514
01515
01516
01517 #define BUF_PUSH_3(c1, c2, c3) \
01518 do { \
01519 GET_BUFFER_SPACE (3); \
01520 *b++ = (unsigned char) (c1); \
01521 *b++ = (unsigned char) (c2); \
01522 *b++ = (unsigned char) (c3); \
01523 } while (0)
01524
01525
01526
01527
01528 #define STORE_JUMP(op, loc, to) \
01529 store_op1 (op, loc, (int) ((to) - (loc) - 3))
01530
01531
01532 #define STORE_JUMP2(op, loc, to, arg) \
01533 store_op2 (op, loc, (int) ((to) - (loc) - 3), arg)
01534
01535
01536 #define INSERT_JUMP(op, loc, to) \
01537 insert_op1 (op, loc, (int) ((to) - (loc) - 3), b)
01538
01539
01540 #define INSERT_JUMP2(op, loc, to, arg) \
01541 insert_op2 (op, loc, (int) ((to) - (loc) - 3), arg, b)
01542
01543
01544
01545
01546
01547
01548
01549
01550
01551
01552 #if defined _MSC_VER && !defined WIN32
01553
01554
01555
01556 # define MAX_BUF_SIZE 65500L
01557 # define REALLOC(p,s) realloc ((p), (size_t) (s))
01558 #else
01559 # define MAX_BUF_SIZE (1L << 16)
01560 # define REALLOC(p,s) realloc ((p), (s))
01561 #endif
01562
01563
01564
01565
01566
01567 #define EXTEND_BUFFER() \
01568 do { \
01569 unsigned char *old_buffer = bufp->buffer; \
01570 if (bufp->allocated == MAX_BUF_SIZE) \
01571 return REG_ESIZE; \
01572 bufp->allocated <<= 1; \
01573 if (bufp->allocated > MAX_BUF_SIZE) \
01574 bufp->allocated = MAX_BUF_SIZE; \
01575 bufp->buffer = (unsigned char *) REALLOC (bufp->buffer, bufp->allocated);\
01576 if (bufp->buffer == NULL) \
01577 return REG_ESPACE; \
01578 \
01579 if (old_buffer != bufp->buffer) \
01580 { \
01581 b = (b - old_buffer) + bufp->buffer; \
01582 begalt = (begalt - old_buffer) + bufp->buffer; \
01583 if (fixup_alt_jump) \
01584 fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer;\
01585 if (laststart) \
01586 laststart = (laststart - old_buffer) + bufp->buffer; \
01587 if (pending_exact) \
01588 pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
01589 } \
01590 } while (0)
01591
01592
01593
01594
01595
01596 #define MAX_REGNUM 255
01597
01598
01599
01600 typedef unsigned regnum_t;
01601
01602
01603
01604
01605
01606
01607
01608 typedef long pattern_offset_t;
01609
01610 typedef struct
01611 {
01612 pattern_offset_t begalt_offset;
01613 pattern_offset_t fixup_alt_jump;
01614 pattern_offset_t inner_group_offset;
01615 pattern_offset_t laststart_offset;
01616 regnum_t regnum;
01617 } compile_stack_elt_t;
01618
01619
01620 typedef struct
01621 {
01622 compile_stack_elt_t *stack;
01623 unsigned size;
01624 unsigned avail;
01625 } compile_stack_type;
01626
01627
01628 #define INIT_COMPILE_STACK_SIZE 32
01629
01630 #define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
01631 #define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
01632
01633
01634 #define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
01635
01636
01637
01638 #define SET_LIST_BIT(c) \
01639 (b[((unsigned char) (c)) / BYTEWIDTH] \
01640 |= 1 << (((unsigned char) c) % BYTEWIDTH))
01641
01642
01643
01644 #define GET_UNSIGNED_NUMBER(num) \
01645 { if (p != pend) \
01646 { \
01647 PATFETCH (c); \
01648 while (ISDIGIT (c)) \
01649 { \
01650 if (num < 0) \
01651 num = 0; \
01652 num = num * 10 + c - '0'; \
01653 if (p == pend) \
01654 break; \
01655 PATFETCH (c); \
01656 } \
01657 } \
01658 }
01659
01660 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H)
01661
01662
01663 # ifdef CHARCLASS_NAME_MAX
01664 # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
01665 # else
01666
01667
01668 # define CHAR_CLASS_MAX_LENGTH 256
01669 # endif
01670
01671 # ifdef _LIBC
01672 # define IS_CHAR_CLASS(string) __wctype (string)
01673 # else
01674 # define IS_CHAR_CLASS(string) wctype (string)
01675 # endif
01676 #else
01677 # define CHAR_CLASS_MAX_LENGTH 6
01678
01679 # define IS_CHAR_CLASS(string) \
01680 (STREQ (string, "alpha") || STREQ (string, "upper") \
01681 || STREQ (string, "lower") || STREQ (string, "digit") \
01682 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
01683 || STREQ (string, "space") || STREQ (string, "print") \
01684 || STREQ (string, "punct") || STREQ (string, "graph") \
01685 || STREQ (string, "cntrl") || STREQ (string, "blank"))
01686 #endif
01687
01688 #ifndef MATCH_MAY_ALLOCATE
01689
01690
01691
01692
01693
01694
01695
01696
01697 static fail_stack_type fail_stack;
01698
01699
01700
01701
01702 static int regs_allocated_size;
01703
01704 static const char ** regstart, ** regend;
01705 static const char ** old_regstart, ** old_regend;
01706 static const char **best_regstart, **best_regend;
01707 static register_info_type *reg_info;
01708 static const char **reg_dummy;
01709 static register_info_type *reg_info_dummy;
01710
01711
01712
01713
01714 static
01715 regex_grow_registers (num_regs)
01716 int num_regs;
01717 {
01718 if (num_regs > regs_allocated_size)
01719 {
01720 RETALLOC_IF (regstart, num_regs, const char *);
01721 RETALLOC_IF (regend, num_regs, const char *);
01722 RETALLOC_IF (old_regstart, num_regs, const char *);
01723 RETALLOC_IF (old_regend, num_regs, const char *);
01724 RETALLOC_IF (best_regstart, num_regs, const char *);
01725 RETALLOC_IF (best_regend, num_regs, const char *);
01726 RETALLOC_IF (reg_info, num_regs, register_info_type);
01727 RETALLOC_IF (reg_dummy, num_regs, const char *);
01728 RETALLOC_IF (reg_info_dummy, num_regs, register_info_type);
01729
01730 regs_allocated_size = num_regs;
01731 }
01732 }
01733
01734 #endif
01735
01736 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
01737 compile_stack,
01738 regnum_t regnum));
01739
01740
01741
01742
01743
01744
01745
01746
01747
01748
01749
01750
01751
01752
01753
01754
01755
01756
01757
01758
01759 #define FREE_STACK_RETURN(value) \
01760 return (free (compile_stack.stack), value)
01761
01762 static reg_errcode_t
01763 regex_compile (pattern, size, syntax, bufp)
01764 const char *pattern;
01765 size_t size;
01766 reg_syntax_t syntax;
01767 struct re_pattern_buffer *bufp;
01768 {
01769
01770
01771
01772 register unsigned char c, c1;
01773
01774
01775 const char *p1;
01776
01777
01778 register unsigned char *b;
01779
01780
01781 compile_stack_type compile_stack;
01782
01783
01784 const char *p = pattern;
01785 const char *pend = pattern + size;
01786
01787
01788 RE_TRANSLATE_TYPE translate = bufp->translate;
01789
01790
01791
01792
01793
01794 unsigned char *pending_exact = 0;
01795
01796
01797
01798
01799 unsigned char *laststart = 0;
01800
01801
01802 unsigned char *begalt;
01803
01804
01805
01806 const char *beg_interval;
01807
01808
01809
01810
01811 unsigned char *fixup_alt_jump = 0;
01812
01813
01814
01815
01816 regnum_t regnum = 0;
01817
01818 #ifdef DEBUG
01819 DEBUG_PRINT1 ("\nCompiling pattern: ");
01820 if (debug)
01821 {
01822 unsigned debug_count;
01823
01824 for (debug_count = 0; debug_count < size; debug_count++)
01825 putchar (pattern[debug_count]);
01826 putchar ('\n');
01827 }
01828 #endif
01829
01830
01831 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
01832 if (compile_stack.stack == NULL)
01833 return REG_ESPACE;
01834
01835 compile_stack.size = INIT_COMPILE_STACK_SIZE;
01836 compile_stack.avail = 0;
01837
01838
01839 bufp->syntax = syntax;
01840 bufp->fastmap_accurate = 0;
01841 bufp->not_bol = bufp->not_eol = 0;
01842
01843
01844
01845
01846 bufp->used = 0;
01847
01848
01849 bufp->re_nsub = 0;
01850
01851 #if !defined emacs && !defined SYNTAX_TABLE
01852
01853 init_syntax_once ();
01854 #endif
01855
01856 if (bufp->allocated == 0)
01857 {
01858 if (bufp->buffer)
01859 {
01860
01861
01862 RETALLOC (bufp->buffer, INIT_BUF_SIZE, unsigned char);
01863 }
01864 else
01865 {
01866 bufp->buffer = TALLOC (INIT_BUF_SIZE, unsigned char);
01867 }
01868 if (!bufp->buffer) FREE_STACK_RETURN (REG_ESPACE);
01869
01870 bufp->allocated = INIT_BUF_SIZE;
01871 }
01872
01873 begalt = b = bufp->buffer;
01874
01875
01876 while (p != pend)
01877 {
01878 PATFETCH (c);
01879
01880 switch (c)
01881 {
01882 case '^':
01883 {
01884 if (
01885 p == pattern + 1
01886
01887 || syntax & RE_CONTEXT_INDEP_ANCHORS
01888
01889 || at_begline_loc_p (pattern, p, syntax))
01890 BUF_PUSH (begline);
01891 else
01892 goto normal_char;
01893 }
01894 break;
01895
01896
01897 case '$':
01898 {
01899 if (
01900 p == pend
01901
01902 || syntax & RE_CONTEXT_INDEP_ANCHORS
01903
01904 || at_endline_loc_p (p, pend, syntax))
01905 BUF_PUSH (endline);
01906 else
01907 goto normal_char;
01908 }
01909 break;
01910
01911
01912 case '+':
01913 case '?':
01914 if ((syntax & RE_BK_PLUS_QM)
01915 || (syntax & RE_LIMITED_OPS))
01916 goto normal_char;
01917 handle_plus:
01918 case '*':
01919
01920 if (!laststart)
01921 {
01922 if (syntax & RE_CONTEXT_INVALID_OPS)
01923 FREE_STACK_RETURN (REG_BADRPT);
01924 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
01925 goto normal_char;
01926 }
01927
01928 {
01929
01930 boolean keep_string_p = false;
01931
01932
01933 char zero_times_ok = 0, many_times_ok = 0;
01934
01935
01936
01937
01938
01939
01940 for (;;)
01941 {
01942 zero_times_ok |= c != '+';
01943 many_times_ok |= c != '?';
01944
01945 if (p == pend)
01946 break;
01947
01948 PATFETCH (c);
01949
01950 if (c == '*'
01951 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
01952 ;
01953
01954 else if (syntax & RE_BK_PLUS_QM && c == '\\')
01955 {
01956 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
01957
01958 PATFETCH (c1);
01959 if (!(c1 == '+' || c1 == '?'))
01960 {
01961 PATUNFETCH;
01962 PATUNFETCH;
01963 break;
01964 }
01965
01966 c = c1;
01967 }
01968 else
01969 {
01970 PATUNFETCH;
01971 break;
01972 }
01973
01974
01975 }
01976
01977
01978
01979 if (!laststart)
01980 break;
01981
01982
01983
01984 if (many_times_ok)
01985 {
01986
01987
01988
01989
01990
01991
01992
01993
01994
01995 assert (p - 1 > pattern);
01996
01997
01998 GET_BUFFER_SPACE (3);
01999
02000
02001
02002
02003
02004
02005 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
02006 && zero_times_ok
02007 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
02008 && !(syntax & RE_DOT_NEWLINE))
02009 {
02010 STORE_JUMP (jump, b, laststart);
02011 keep_string_p = true;
02012 }
02013 else
02014
02015 STORE_JUMP (maybe_pop_jump, b, laststart - 3);
02016
02017
02018 b += 3;
02019 }
02020
02021
02022
02023 GET_BUFFER_SPACE (3);
02024 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
02025 : on_failure_jump,
02026 laststart, b + 3);
02027 pending_exact = 0;
02028 b += 3;
02029
02030 if (!zero_times_ok)
02031 {
02032
02033
02034
02035
02036
02037 GET_BUFFER_SPACE (3);
02038 INSERT_JUMP (dummy_failure_jump, laststart, laststart + 6);
02039 b += 3;
02040 }
02041 }
02042 break;
02043
02044
02045 case '.':
02046 laststart = b;
02047 BUF_PUSH (anychar);
02048 break;
02049
02050
02051 case '[':
02052 {
02053 boolean had_char_class = false;
02054
02055 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02056
02057
02058
02059 GET_BUFFER_SPACE (34);
02060
02061 laststart = b;
02062
02063
02064
02065 BUF_PUSH (*p == '^' ? charset_not : charset);
02066 if (*p == '^')
02067 p++;
02068
02069
02070 p1 = p;
02071
02072
02073 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
02074
02075
02076 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
02077
02078
02079 if ((re_opcode_t) b[-2] == charset_not
02080 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
02081 SET_LIST_BIT ('\n');
02082
02083
02084 for (;;)
02085 {
02086 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02087
02088 PATFETCH (c);
02089
02090
02091 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
02092 {
02093 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02094
02095 PATFETCH (c1);
02096 SET_LIST_BIT (c1);
02097 continue;
02098 }
02099
02100
02101
02102
02103 if (c == ']' && p != p1 + 1)
02104 break;
02105
02106
02107
02108 if (had_char_class && c == '-' && *p != ']')
02109 FREE_STACK_RETURN (REG_ERANGE);
02110
02111
02112
02113
02114
02115 if (c == '-'
02116 && !(p - 2 >= pattern && p[-2] == '[')
02117 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
02118 && *p != ']')
02119 {
02120 reg_errcode_t ret
02121 = compile_range (&p, pend, translate, syntax, b);
02122 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02123 }
02124
02125 else if (p[0] == '-' && p[1] != ']')
02126 {
02127 reg_errcode_t ret;
02128
02129
02130 PATFETCH (c1);
02131
02132 ret = compile_range (&p, pend, translate, syntax, b);
02133 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02134 }
02135
02136
02137
02138
02139 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
02140 {
02141 char str[CHAR_CLASS_MAX_LENGTH + 1];
02142
02143 PATFETCH (c);
02144 c1 = 0;
02145
02146
02147 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02148
02149 for (;;)
02150 {
02151 PATFETCH (c);
02152 if ((c == ':' && *p == ']') || p == pend
02153 || c1 == CHAR_CLASS_MAX_LENGTH)
02154 break;
02155 str[c1++] = c;
02156 }
02157 str[c1] = '\0';
02158
02159
02160
02161
02162 if (c == ':' && *p == ']')
02163 {
02164
02165
02166
02167 #if defined _LIBC || (defined HAVE_WCTYPE_H && defined HAVE_WCHAR_H && defined HAVE_BTOWC)
02168 boolean is_lower = STREQ (str, "lower");
02169 boolean is_upper = STREQ (str, "upper");
02170 wctype_t wt;
02171 int ch;
02172
02173 wt = IS_CHAR_CLASS (str);
02174 if (wt == 0)
02175 FREE_STACK_RETURN (REG_ECTYPE);
02176
02177
02178
02179 PATFETCH (c);
02180
02181 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02182
02183 for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
02184 {
02185 # ifdef _LIBC
02186 if (__iswctype (__btowc (ch), wt))
02187 SET_LIST_BIT (ch);
02188 #else
02189 if (iswctype (btowc (ch), wt))
02190 SET_LIST_BIT (ch);
02191 #endif
02192
02193 if (translate && (is_upper || is_lower)
02194 && (ISUPPER (ch) || ISLOWER (ch)))
02195 SET_LIST_BIT (ch);
02196 }
02197
02198 had_char_class = true;
02199 #else
02200 int ch;
02201 boolean is_alnum = STREQ (str, "alnum");
02202 boolean is_alpha = STREQ (str, "alpha");
02203 boolean is_blank = STREQ (str, "blank");
02204 boolean is_cntrl = STREQ (str, "cntrl");
02205 boolean is_digit = STREQ (str, "digit");
02206 boolean is_graph = STREQ (str, "graph");
02207 boolean is_lower = STREQ (str, "lower");
02208 boolean is_print = STREQ (str, "print");
02209 boolean is_punct = STREQ (str, "punct");
02210 boolean is_space = STREQ (str, "space");
02211 boolean is_upper = STREQ (str, "upper");
02212 boolean is_xdigit = STREQ (str, "xdigit");
02213
02214 if (!IS_CHAR_CLASS (str))
02215 FREE_STACK_RETURN (REG_ECTYPE);
02216
02217
02218
02219 PATFETCH (c);
02220
02221 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02222
02223 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
02224 {
02225
02226
02227 if ( (is_alnum && ISALNUM (ch))
02228 || (is_alpha && ISALPHA (ch))
02229 || (is_blank && ISBLANK (ch))
02230 || (is_cntrl && ISCNTRL (ch)))
02231 SET_LIST_BIT (ch);
02232 if ( (is_digit && ISDIGIT (ch))
02233 || (is_graph && ISGRAPH (ch))
02234 || (is_lower && ISLOWER (ch))
02235 || (is_print && ISPRINT (ch)))
02236 SET_LIST_BIT (ch);
02237 if ( (is_punct && ISPUNCT (ch))
02238 || (is_space && ISSPACE (ch))
02239 || (is_upper && ISUPPER (ch))
02240 || (is_xdigit && ISXDIGIT (ch)))
02241 SET_LIST_BIT (ch);
02242 if ( translate && (is_upper || is_lower)
02243 && (ISUPPER (ch) || ISLOWER (ch)))
02244 SET_LIST_BIT (ch);
02245 }
02246 had_char_class = true;
02247 #endif
02248 }
02249 else
02250 {
02251 c1++;
02252 while (c1--)
02253 PATUNFETCH;
02254 SET_LIST_BIT ('[');
02255 SET_LIST_BIT (':');
02256 had_char_class = false;
02257 }
02258 }
02259 else
02260 {
02261 had_char_class = false;
02262 SET_LIST_BIT (c);
02263 }
02264 }
02265
02266
02267
02268 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
02269 b[-1]--;
02270 b += b[-1];
02271 }
02272 break;
02273
02274
02275 case '(':
02276 if (syntax & RE_NO_BK_PARENS)
02277 goto handle_open;
02278 else
02279 goto normal_char;
02280
02281
02282 case ')':
02283 if (syntax & RE_NO_BK_PARENS)
02284 goto handle_close;
02285 else
02286 goto normal_char;
02287
02288
02289 case '\n':
02290 if (syntax & RE_NEWLINE_ALT)
02291 goto handle_alt;
02292 else
02293 goto normal_char;
02294
02295
02296 case '|':
02297 if (syntax & RE_NO_BK_VBAR)
02298 goto handle_alt;
02299 else
02300 goto normal_char;
02301
02302
02303 case '{':
02304 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
02305 goto handle_interval;
02306 else
02307 goto normal_char;
02308
02309
02310 case '\\':
02311 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02312
02313
02314
02315
02316 PATFETCH_RAW (c);
02317
02318 switch (c)
02319 {
02320 case '(':
02321 if (syntax & RE_NO_BK_PARENS)
02322 goto normal_backslash;
02323
02324 handle_open:
02325 bufp->re_nsub++;
02326 regnum++;
02327
02328 if (COMPILE_STACK_FULL)
02329 {
02330 RETALLOC (compile_stack.stack, compile_stack.size << 1,
02331 compile_stack_elt_t);
02332 if (compile_stack.stack == NULL) return REG_ESPACE;
02333
02334 compile_stack.size <<= 1;
02335 }
02336
02337
02338
02339
02340
02341 COMPILE_STACK_TOP.begalt_offset = begalt - bufp->buffer;
02342 COMPILE_STACK_TOP.fixup_alt_jump
02343 = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
02344 COMPILE_STACK_TOP.laststart_offset = b - bufp->buffer;
02345 COMPILE_STACK_TOP.regnum = regnum;
02346
02347
02348
02349
02350
02351 if (regnum <= MAX_REGNUM)
02352 {
02353 COMPILE_STACK_TOP.inner_group_offset = b - bufp->buffer + 2;
02354 BUF_PUSH_3 (start_memory, regnum, 0);
02355 }
02356
02357 compile_stack.avail++;
02358
02359 fixup_alt_jump = 0;
02360 laststart = 0;
02361 begalt = b;
02362
02363
02364
02365 pending_exact = 0;
02366 break;
02367
02368
02369 case ')':
02370 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
02371
02372 if (COMPILE_STACK_EMPTY)
02373 {
02374 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02375 goto normal_backslash;
02376 else
02377 FREE_STACK_RETURN (REG_ERPAREN);
02378 }
02379
02380 handle_close:
02381 if (fixup_alt_jump)
02382 {
02383
02384
02385
02386 BUF_PUSH (push_dummy_failure);
02387
02388
02389
02390 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
02391 }
02392
02393
02394 if (COMPILE_STACK_EMPTY)
02395 {
02396 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
02397 goto normal_char;
02398 else
02399 FREE_STACK_RETURN (REG_ERPAREN);
02400 }
02401
02402
02403
02404 assert (compile_stack.avail != 0);
02405 {
02406
02407
02408
02409 regnum_t this_group_regnum;
02410
02411 compile_stack.avail--;
02412 begalt = bufp->buffer + COMPILE_STACK_TOP.begalt_offset;
02413 fixup_alt_jump
02414 = COMPILE_STACK_TOP.fixup_alt_jump
02415 ? bufp->buffer + COMPILE_STACK_TOP.fixup_alt_jump - 1
02416 : 0;
02417 laststart = bufp->buffer + COMPILE_STACK_TOP.laststart_offset;
02418 this_group_regnum = COMPILE_STACK_TOP.regnum;
02419
02420
02421
02422 pending_exact = 0;
02423
02424
02425
02426 if (this_group_regnum <= MAX_REGNUM)
02427 {
02428 unsigned char *inner_group_loc
02429 = bufp->buffer + COMPILE_STACK_TOP.inner_group_offset;
02430
02431 *inner_group_loc = regnum - this_group_regnum;
02432 BUF_PUSH_3 (stop_memory, this_group_regnum,
02433 regnum - this_group_regnum);
02434 }
02435 }
02436 break;
02437
02438
02439 case '|':
02440 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
02441 goto normal_backslash;
02442 handle_alt:
02443 if (syntax & RE_LIMITED_OPS)
02444 goto normal_char;
02445
02446
02447
02448 GET_BUFFER_SPACE (3);
02449 INSERT_JUMP (on_failure_jump, begalt, b + 6);
02450 pending_exact = 0;
02451 b += 3;
02452
02453
02454
02455
02456
02457
02458
02459
02460
02461
02462
02463
02464
02465
02466
02467
02468
02469 if (fixup_alt_jump)
02470 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02471
02472
02473
02474
02475 fixup_alt_jump = b;
02476 GET_BUFFER_SPACE (3);
02477 b += 3;
02478
02479 laststart = 0;
02480 begalt = b;
02481 break;
02482
02483
02484 case '{':
02485
02486 if (!(syntax & RE_INTERVALS)
02487
02488
02489 || ((syntax & RE_INTERVALS) && (syntax & RE_NO_BK_BRACES))
02490 || (p - 2 == pattern && p == pend))
02491 goto normal_backslash;
02492
02493 handle_interval:
02494 {
02495
02496
02497
02498 int lower_bound = -1, upper_bound = -1;
02499
02500 beg_interval = p - 1;
02501
02502 if (p == pend)
02503 {
02504 if (syntax & RE_NO_BK_BRACES)
02505 goto unfetch_interval;
02506 else
02507 FREE_STACK_RETURN (REG_EBRACE);
02508 }
02509
02510 GET_UNSIGNED_NUMBER (lower_bound);
02511
02512 if (c == ',')
02513 {
02514 GET_UNSIGNED_NUMBER (upper_bound);
02515 if (upper_bound < 0) upper_bound = RE_DUP_MAX;
02516 }
02517 else
02518
02519 upper_bound = lower_bound;
02520
02521 if (lower_bound < 0 || upper_bound > RE_DUP_MAX
02522 || lower_bound > upper_bound)
02523 {
02524 if (syntax & RE_NO_BK_BRACES)
02525 goto unfetch_interval;
02526 else
02527 FREE_STACK_RETURN (REG_BADBR);
02528 }
02529
02530 if (!(syntax & RE_NO_BK_BRACES))
02531 {
02532 if (c != '\\') FREE_STACK_RETURN (REG_EBRACE);
02533
02534 PATFETCH (c);
02535 }
02536
02537 if (c != '}')
02538 {
02539 if (syntax & RE_NO_BK_BRACES)
02540 goto unfetch_interval;
02541 else
02542 FREE_STACK_RETURN (REG_BADBR);
02543 }
02544
02545
02546
02547
02548 if (!laststart)
02549 {
02550 if (syntax & RE_CONTEXT_INVALID_OPS)
02551 FREE_STACK_RETURN (REG_BADRPT);
02552 else if (syntax & RE_CONTEXT_INDEP_OPS)
02553 laststart = b;
02554 else
02555 goto unfetch_interval;
02556 }
02557
02558
02559
02560
02561 if (upper_bound == 0)
02562 {
02563 GET_BUFFER_SPACE (3);
02564 INSERT_JUMP (jump, laststart, b + 3);
02565 b += 3;
02566 }
02567
02568
02569
02570
02571
02572
02573
02574
02575
02576
02577 else
02578 {
02579
02580 unsigned nbytes = 10 + (upper_bound > 1) * 10;
02581
02582 GET_BUFFER_SPACE (nbytes);
02583
02584
02585
02586
02587
02588
02589 INSERT_JUMP2 (succeed_n, laststart,
02590 b + 5 + (upper_bound > 1) * 5,
02591 lower_bound);
02592 b += 5;
02593
02594
02595
02596
02597
02598 insert_op2 (set_number_at, laststart, 5, lower_bound, b);
02599 b += 5;
02600
02601 if (upper_bound > 1)
02602 {
02603
02604
02605
02606
02607
02608
02609 STORE_JUMP2 (jump_n, b, laststart + 5,
02610 upper_bound - 1);
02611 b += 5;
02612
02613
02614
02615
02616
02617
02618
02619
02620
02621
02622
02623
02624
02625
02626
02627 insert_op2 (set_number_at, laststart, b - laststart,
02628 upper_bound - 1, b);
02629 b += 5;
02630 }
02631 }
02632 pending_exact = 0;
02633 beg_interval = NULL;
02634 }
02635 break;
02636
02637 unfetch_interval:
02638
02639 assert (beg_interval);
02640 p = beg_interval;
02641 beg_interval = NULL;
02642
02643
02644 PATFETCH (c);
02645
02646 if (!(syntax & RE_NO_BK_BRACES))
02647 {
02648 if (p > pattern && p[-1] == '\\')
02649 goto normal_backslash;
02650 }
02651 goto normal_char;
02652
02653 #ifdef emacs
02654
02655
02656 case '=':
02657 BUF_PUSH (at_dot);
02658 break;
02659
02660 case 's':
02661 laststart = b;
02662 PATFETCH (c);
02663 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
02664 break;
02665
02666 case 'S':
02667 laststart = b;
02668 PATFETCH (c);
02669 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
02670 break;
02671 #endif
02672
02673
02674 case 'w':
02675 if (syntax & RE_NO_GNU_OPS)
02676 goto normal_char;
02677 laststart = b;
02678 BUF_PUSH (wordchar);
02679 break;
02680
02681
02682 case 'W':
02683 if (syntax & RE_NO_GNU_OPS)
02684 goto normal_char;
02685 laststart = b;
02686 BUF_PUSH (notwordchar);
02687 break;
02688
02689
02690 case '<':
02691 if (syntax & RE_NO_GNU_OPS)
02692 goto normal_char;
02693 BUF_PUSH (wordbeg);
02694 break;
02695
02696 case '>':
02697 if (syntax & RE_NO_GNU_OPS)
02698 goto normal_char;
02699 BUF_PUSH (wordend);
02700 break;
02701
02702 case 'b':
02703 if (syntax & RE_NO_GNU_OPS)
02704 goto normal_char;
02705 BUF_PUSH (wordbound);
02706 break;
02707
02708 case 'B':
02709 if (syntax & RE_NO_GNU_OPS)
02710 goto normal_char;
02711 BUF_PUSH (notwordbound);
02712 break;
02713
02714 case '`':
02715 if (syntax & RE_NO_GNU_OPS)
02716 goto normal_char;
02717 BUF_PUSH (begbuf);
02718 break;
02719
02720 case '\'':
02721 if (syntax & RE_NO_GNU_OPS)
02722 goto normal_char;
02723 BUF_PUSH (endbuf);
02724 break;
02725
02726 case '1': case '2': case '3': case '4': case '5':
02727 case '6': case '7': case '8': case '9':
02728 if (syntax & RE_NO_BK_REFS)
02729 goto normal_char;
02730
02731 c1 = c - '0';
02732
02733 if (c1 > regnum)
02734 FREE_STACK_RETURN (REG_ESUBREG);
02735
02736
02737 if (group_in_compile_stack (compile_stack, (regnum_t) c1))
02738 goto normal_char;
02739
02740 laststart = b;
02741 BUF_PUSH_2 (duplicate, c1);
02742 break;
02743
02744
02745 case '+':
02746 case '?':
02747 if (syntax & RE_BK_PLUS_QM)
02748 goto handle_plus;
02749 else
02750 goto normal_backslash;
02751
02752 default:
02753 normal_backslash:
02754
02755
02756
02757 c = TRANSLATE (c);
02758 goto normal_char;
02759 }
02760 break;
02761
02762
02763 default:
02764
02765 normal_char:
02766
02767 if (!pending_exact
02768
02769
02770 || pending_exact + *pending_exact + 1 != b
02771
02772
02773 || *pending_exact == (1 << BYTEWIDTH) - 1
02774
02775
02776 || *p == '*' || *p == '^'
02777 || ((syntax & RE_BK_PLUS_QM)
02778 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
02779 : (*p == '+' || *p == '?'))
02780 || ((syntax & RE_INTERVALS)
02781 && ((syntax & RE_NO_BK_BRACES)
02782 ? *p == '{'
02783 : (p[0] == '\\' && p[1] == '{'))))
02784 {
02785
02786
02787 laststart = b;
02788
02789 BUF_PUSH_2 (exactn, 0);
02790 pending_exact = b - 1;
02791 }
02792
02793 BUF_PUSH (c);
02794 (*pending_exact)++;
02795 break;
02796 }
02797 }
02798
02799
02800
02801
02802 if (fixup_alt_jump)
02803 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
02804
02805 if (!COMPILE_STACK_EMPTY)
02806 FREE_STACK_RETURN (REG_EPAREN);
02807
02808
02809
02810 if (syntax & RE_NO_POSIX_BACKTRACKING)
02811 BUF_PUSH (succeed);
02812
02813 free (compile_stack.stack);
02814
02815
02816 bufp->used = b - bufp->buffer;
02817
02818 #ifdef DEBUG
02819 if (debug)
02820 {
02821 DEBUG_PRINT1 ("\nCompiled pattern: \n");
02822 print_compiled_pattern (bufp);
02823 }
02824 #endif
02825
02826 #ifndef MATCH_MAY_ALLOCATE
02827
02828
02829
02830 {
02831 int num_regs = bufp->re_nsub + 1;
02832
02833
02834
02835
02836 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
02837 {
02838 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
02839
02840 # ifdef emacs
02841 if (! fail_stack.stack)
02842 fail_stack.stack
02843 = (fail_stack_elt_t *) xmalloc (fail_stack.size
02844 * sizeof (fail_stack_elt_t));
02845 else
02846 fail_stack.stack
02847 = (fail_stack_elt_t *) xrealloc (fail_stack.stack,
02848 (fail_stack.size
02849 * sizeof (fail_stack_elt_t)));
02850 # else
02851 if (! fail_stack.stack)
02852 fail_stack.stack
02853 = (fail_stack_elt_t *) malloc (fail_stack.size
02854 * sizeof (fail_stack_elt_t));
02855 else
02856 fail_stack.stack
02857 = (fail_stack_elt_t *) realloc (fail_stack.stack,
02858 (fail_stack.size
02859 * sizeof (fail_stack_elt_t)));
02860 # endif
02861 }
02862
02863 regex_grow_registers (num_regs);
02864 }
02865 #endif
02866
02867 return REG_NOERROR;
02868 }
02869
02870
02871
02872
02873
02874 static void
02875 store_op1 (op, loc, arg)
02876 re_opcode_t op;
02877 unsigned char *loc;
02878 int arg;
02879 {
02880 *loc = (unsigned char) op;
02881 STORE_NUMBER (loc + 1, arg);
02882 }
02883
02884
02885
02886
02887 static void
02888 store_op2 (op, loc, arg1, arg2)
02889 re_opcode_t op;
02890 unsigned char *loc;
02891 int arg1, arg2;
02892 {
02893 *loc = (unsigned char) op;
02894 STORE_NUMBER (loc + 1, arg1);
02895 STORE_NUMBER (loc + 3, arg2);
02896 }
02897
02898
02899
02900
02901
02902 static void
02903 insert_op1 (op, loc, arg, end)
02904 re_opcode_t op;
02905 unsigned char *loc;
02906 int arg;
02907 unsigned char *end;
02908 {
02909 register unsigned char *pfrom = end;
02910 register unsigned char *pto = end + 3;
02911
02912 while (pfrom != loc)
02913 *--pto = *--pfrom;
02914
02915 store_op1 (op, loc, arg);
02916 }
02917
02918
02919
02920
02921 static void
02922 insert_op2 (op, loc, arg1, arg2, end)
02923 re_opcode_t op;
02924 unsigned char *loc;
02925 int arg1, arg2;
02926 unsigned char *end;
02927 {
02928 register unsigned char *pfrom = end;
02929 register unsigned char *pto = end + 5;
02930
02931 while (pfrom != loc)
02932 *--pto = *--pfrom;
02933
02934 store_op2 (op, loc, arg1, arg2);
02935 }
02936
02937
02938
02939
02940
02941
02942 static boolean
02943 at_begline_loc_p (pattern, p, syntax)
02944 const char *pattern, *p;
02945 reg_syntax_t syntax;
02946 {
02947 const char *prev = p - 2;
02948 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
02949
02950 return
02951
02952 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
02953
02954 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
02955 }
02956
02957
02958
02959
02960
02961 static boolean
02962 at_endline_loc_p (p, pend, syntax)
02963 const char *p, *pend;
02964 reg_syntax_t syntax;
02965 {
02966 const char *next = p;
02967 boolean next_backslash = *next == '\\';
02968 const char *next_next = p + 1 < pend ? p + 1 : 0;
02969
02970 return
02971
02972 (syntax & RE_NO_BK_PARENS ? *next == ')'
02973 : next_backslash && next_next && *next_next == ')')
02974
02975 || (syntax & RE_NO_BK_VBAR ? *next == '|'
02976 : next_backslash && next_next && *next_next == '|');
02977 }
02978
02979
02980
02981
02982
02983 static boolean
02984 group_in_compile_stack (compile_stack, regnum)
02985 compile_stack_type compile_stack;
02986 regnum_t regnum;
02987 {
02988 int this_element;
02989
02990 for (this_element = compile_stack.avail - 1;
02991 this_element >= 0;
02992 this_element--)
02993 if (compile_stack.stack[this_element].regnum == regnum)
02994 return true;
02995
02996 return false;
02997 }
02998
02999
03000
03001
03002
03003
03004
03005
03006
03007
03008
03009
03010
03011 static reg_errcode_t
03012 compile_range (p_ptr, pend, translate, syntax, b)
03013 const char **p_ptr, *pend;
03014 RE_TRANSLATE_TYPE translate;
03015 reg_syntax_t syntax;
03016 unsigned char *b;
03017 {
03018 unsigned this_char;
03019
03020 const char *p = *p_ptr;
03021 unsigned int range_start, range_end;
03022
03023 if (p == pend)
03024 return REG_ERANGE;
03025
03026
03027
03028
03029
03030
03031
03032
03033
03034 range_start = ((const unsigned char *) p)[-2];
03035 range_end = ((const unsigned char *) p)[0];
03036
03037
03038
03039 (*p_ptr)++;
03040
03041
03042 if (range_start > range_end)
03043 return syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
03044
03045
03046
03047
03048
03049 for (this_char = range_start; this_char <= range_end; this_char++)
03050 {
03051 SET_LIST_BIT (TRANSLATE (this_char));
03052 }
03053
03054 return REG_NOERROR;
03055 }
03056
03057
03058
03059
03060
03061
03062
03063
03064
03065
03066
03067
03068
03069
03070 int
03071 re_compile_fastmap (bufp)
03072 struct re_pattern_buffer *bufp;
03073 {
03074 int j, k;
03075 #ifdef MATCH_MAY_ALLOCATE
03076 fail_stack_type fail_stack;
03077 #endif
03078 #ifndef REGEX_MALLOC
03079 char *destination;
03080 #endif
03081
03082 register char *fastmap = bufp->fastmap;
03083 unsigned char *pattern = bufp->buffer;
03084 unsigned char *p = pattern;
03085 register unsigned char *pend = pattern + bufp->used;
03086
03087 #ifdef REL_ALLOC
03088
03089
03090 fail_stack_elt_t *failure_stack_ptr;
03091 #endif
03092
03093
03094
03095
03096
03097 boolean path_can_be_null = true;
03098
03099
03100 boolean succeed_n_p = false;
03101
03102 assert (fastmap != NULL && p != NULL);
03103
03104 INIT_FAIL_STACK ();
03105 bzero (fastmap, 1 << BYTEWIDTH);
03106 bufp->fastmap_accurate = 1;
03107 bufp->can_be_null = 0;
03108
03109 while (1)
03110 {
03111 if (p == pend || *p == succeed)
03112 {
03113
03114 if (!FAIL_STACK_EMPTY ())
03115 {
03116 bufp->can_be_null |= path_can_be_null;
03117
03118
03119 path_can_be_null = true;
03120
03121 p = fail_stack.stack[--fail_stack.avail].pointer;
03122
03123 continue;
03124 }
03125 else
03126 break;
03127 }
03128
03129
03130 assert (p < pend);
03131
03132 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
03133 {
03134
03135
03136
03137
03138
03139
03140 case duplicate:
03141 bufp->can_be_null = 1;
03142 goto done;
03143
03144
03145
03146
03147
03148 case exactn:
03149 fastmap[p[1]] = 1;
03150 break;
03151
03152
03153 case charset:
03154 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
03155 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
03156 fastmap[j] = 1;
03157 break;
03158
03159
03160 case charset_not:
03161
03162 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
03163 fastmap[j] = 1;
03164
03165 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
03166 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
03167 fastmap[j] = 1;
03168 break;
03169
03170
03171 case wordchar:
03172 for (j = 0; j < (1 << BYTEWIDTH); j++)
03173 if (SYNTAX (j) == Sword)
03174 fastmap[j] = 1;
03175 break;
03176
03177
03178 case notwordchar:
03179 for (j = 0; j < (1 << BYTEWIDTH); j++)
03180 if (SYNTAX (j) != Sword)
03181 fastmap[j] = 1;
03182 break;
03183
03184
03185 case anychar:
03186 {
03187 int fastmap_newline = fastmap['\n'];
03188
03189
03190 for (j = 0; j < (1 << BYTEWIDTH); j++)
03191 fastmap[j] = 1;
03192
03193
03194 if (!(bufp->syntax & RE_DOT_NEWLINE))
03195 fastmap['\n'] = fastmap_newline;
03196
03197
03198
03199 else if (bufp->can_be_null)
03200 goto done;
03201
03202
03203 break;
03204 }
03205
03206 #ifdef emacs
03207 case syntaxspec:
03208 k = *p++;
03209 for (j = 0; j < (1 << BYTEWIDTH); j++)
03210 if (SYNTAX (j) == (enum syntaxcode) k)
03211 fastmap[j] = 1;
03212 break;
03213
03214
03215 case notsyntaxspec:
03216 k = *p++;
03217 for (j = 0; j < (1 << BYTEWIDTH); j++)
03218 if (SYNTAX (j) != (enum syntaxcode) k)
03219 fastmap[j] = 1;
03220 break;
03221
03222
03223
03224
03225
03226
03227 case before_dot:
03228 case at_dot:
03229 case after_dot:
03230 continue;
03231 #endif
03232
03233
03234 case no_op:
03235 case begline:
03236 case endline:
03237 case begbuf:
03238 case endbuf:
03239 case wordbound:
03240 case notwordbound:
03241 case wordbeg:
03242 case wordend:
03243 case push_dummy_failure:
03244 continue;
03245
03246
03247 case jump_n:
03248 case pop_failure_jump:
03249 case maybe_pop_jump:
03250 case jump:
03251 case jump_past_alt:
03252 case dummy_failure_jump:
03253 EXTRACT_NUMBER_AND_INCR (j, p);
03254 p += j;
03255 if (j > 0)
03256 continue;
03257
03258
03259
03260
03261
03262
03263 if ((re_opcode_t) *p != on_failure_jump
03264 && (re_opcode_t) *p != succeed_n)
03265 continue;
03266
03267 p++;
03268 EXTRACT_NUMBER_AND_INCR (j, p);
03269 p += j;
03270
03271
03272 if (!FAIL_STACK_EMPTY ()
03273 && fail_stack.stack[fail_stack.avail - 1].pointer == p)
03274 fail_stack.avail--;
03275
03276 continue;
03277
03278
03279 case on_failure_jump:
03280 case on_failure_keep_string_jump:
03281 handle_on_failure_jump:
03282 EXTRACT_NUMBER_AND_INCR (j, p);
03283
03284
03285
03286
03287
03288
03289
03290
03291 if (p + j < pend)
03292 {
03293 if (!PUSH_PATTERN_OP (p + j, fail_stack))
03294 {
03295 RESET_FAIL_STACK ();
03296 return -2;
03297 }
03298 }
03299 else
03300 bufp->can_be_null = 1;
03301
03302 if (succeed_n_p)
03303 {
03304 EXTRACT_NUMBER_AND_INCR (k, p);
03305 succeed_n_p = false;
03306 }
03307
03308 continue;
03309
03310
03311 case succeed_n:
03312
03313 p += 2;
03314
03315
03316 EXTRACT_NUMBER_AND_INCR (k, p);
03317 if (k == 0)
03318 {
03319 p -= 4;
03320 succeed_n_p = true;
03321 goto handle_on_failure_jump;
03322 }
03323 continue;
03324
03325
03326 case set_number_at:
03327 p += 4;
03328 continue;
03329
03330
03331 case start_memory:
03332 case stop_memory:
03333 p += 2;
03334 continue;
03335
03336
03337 default:
03338 abort ();
03339 }
03340
03341
03342
03343
03344
03345
03346
03347 path_can_be_null = false;
03348 p = pend;
03349 }
03350
03351
03352
03353 bufp->can_be_null |= path_can_be_null;
03354
03355 done:
03356 RESET_FAIL_STACK ();
03357 return 0;
03358 }
03359 #ifdef _LIBC
03360 weak_alias (__re_compile_fastmap, re_compile_fastmap)
03361 #endif
03362
03363
03364
03365
03366
03367
03368
03369
03370
03371
03372
03373
03374
03375
03376 void
03377 re_set_registers (bufp, regs, num_regs, starts, ends)
03378 struct re_pattern_buffer *bufp;
03379 struct re_registers *regs;
03380 unsigned num_regs;
03381 regoff_t *starts, *ends;
03382 {
03383 if (num_regs)
03384 {
03385 bufp->regs_allocated = REGS_REALLOCATE;
03386 regs->num_regs = num_regs;
03387 regs->start = starts;
03388 regs->end = ends;
03389 }
03390 else
03391 {
03392 bufp->regs_allocated = REGS_UNALLOCATED;
03393 regs->num_regs = 0;
03394 regs->start = regs->end = (regoff_t *) 0;
03395 }
03396 }
03397 #ifdef _LIBC
03398 weak_alias (__re_set_registers, re_set_registers)
03399 #endif
03400
03401
03402
03403
03404
03405
03406 int
03407 re_search (bufp, string, size, startpos, range, regs)
03408 struct re_pattern_buffer *bufp;
03409 const char *string;
03410 int size, startpos, range;
03411 struct re_registers *regs;
03412 {
03413 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
03414 regs, size);
03415 }
03416 #ifdef _LIBC
03417 weak_alias (__re_search, re_search)
03418 #endif
03419
03420
03421
03422
03423
03424
03425
03426
03427
03428
03429
03430
03431
03432
03433
03434
03435
03436
03437
03438
03439
03440
03441
03442 int
03443 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
03444 struct re_pattern_buffer *bufp;
03445 const char *string1, *string2;
03446 int size1, size2;
03447 int startpos;
03448 int range;
03449 struct re_registers *regs;
03450 int stop;
03451 {
03452 int val;
03453 register char *fastmap = bufp->fastmap;
03454 register RE_TRANSLATE_TYPE translate = bufp->translate;
03455 int total_size = size1 + size2;
03456 int endpos = startpos + range;
03457
03458
03459 if (startpos < 0 || startpos > total_size)
03460 return -1;
03461
03462
03463
03464
03465 if (endpos < 0)
03466 range = 0 - startpos;
03467 else if (endpos > total_size)
03468 range = total_size - startpos;
03469
03470
03471
03472 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == begbuf && range > 0)
03473 {
03474 if (startpos > 0)
03475 return -1;
03476 else
03477 range = 1;
03478 }
03479
03480 #ifdef emacs
03481
03482
03483 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
03484 {
03485 range = PT - startpos;
03486 if (range <= 0)
03487 return -1;
03488 }
03489 #endif
03490
03491
03492 if (fastmap && !bufp->fastmap_accurate)
03493 if (re_compile_fastmap (bufp) == -2)
03494 return -2;
03495
03496
03497 for (;;)
03498 {
03499
03500
03501
03502
03503 if (fastmap && startpos < total_size && !bufp->can_be_null)
03504 {
03505 if (range > 0)
03506 {
03507 register const char *d;
03508 register int lim = 0;
03509 int irange = range;
03510
03511 if (startpos < size1 && startpos + range >= size1)
03512 lim = range - (size1 - startpos);
03513
03514 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
03515
03516
03517
03518 if (translate)
03519 while (range > lim
03520 && !fastmap[(unsigned char)
03521 translate[(unsigned char) *d++]])
03522 range--;
03523 else
03524 while (range > lim && !fastmap[(unsigned char) *d++])
03525 range--;
03526
03527 startpos += irange - range;
03528 }
03529 else
03530 {
03531 register char c = (size1 == 0 || startpos >= size1
03532 ? string2[startpos - size1]
03533 : string1[startpos]);
03534
03535 if (!fastmap[(unsigned char) TRANSLATE (c)])
03536 goto advance;
03537 }
03538 }
03539
03540
03541 if (range >= 0 && startpos == total_size && fastmap
03542 && !bufp->can_be_null)
03543 return -1;
03544
03545 val = re_match_2_internal (bufp, string1, size1, string2, size2,
03546 startpos, regs, stop);
03547 #ifndef REGEX_MALLOC
03548 # ifdef C_ALLOCA
03549 alloca (0);
03550 # endif
03551 #endif
03552
03553 if (val >= 0)
03554 return startpos;
03555
03556 if (val == -2)
03557 return -2;
03558
03559 advance:
03560 if (!range)
03561 break;
03562 else if (range > 0)
03563 {
03564 range--;
03565 startpos++;
03566 }
03567 else
03568 {
03569 range++;
03570 startpos--;
03571 }
03572 }
03573 return -1;
03574 }
03575 #ifdef _LIBC
03576 weak_alias (__re_search_2, re_search_2)
03577 #endif
03578
03579
03580
03581 #define POINTER_TO_OFFSET(ptr) \
03582 (FIRST_STRING_P (ptr) \
03583 ? ((regoff_t) ((ptr) - string1)) \
03584 : ((regoff_t) ((ptr) - string2 + size1)))
03585
03586
03587
03588 #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
03589
03590
03591
03592 #define PREFETCH() \
03593 while (d == dend) \
03594 { \
03595 \
03596 if (dend == end_match_2) \
03597 goto fail; \
03598 \
03599 d = string2; \
03600 dend = end_match_2; \
03601 }
03602
03603
03604
03605
03606 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
03607 #define AT_STRINGS_END(d) ((d) == end2)
03608
03609
03610
03611
03612
03613
03614 #define WORDCHAR_P(d) \
03615 (SYNTAX ((d) == end1 ? *string2 \
03616 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
03617 == Sword)
03618
03619
03620 #if 0
03621
03622
03623 #define AT_WORD_BOUNDARY(d) \
03624 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
03625 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
03626 #endif
03627
03628
03629 #ifdef MATCH_MAY_ALLOCATE
03630 # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
03631 # define FREE_VARIABLES() \
03632 do { \
03633 REGEX_FREE_STACK (fail_stack.stack); \
03634 FREE_VAR (regstart); \
03635 FREE_VAR (regend); \
03636 FREE_VAR (old_regstart); \
03637 FREE_VAR (old_regend); \
03638 FREE_VAR (best_regstart); \
03639 FREE_VAR (best_regend); \
03640 FREE_VAR (reg_info); \
03641 FREE_VAR (reg_dummy); \
03642 FREE_VAR (reg_info_dummy); \
03643 } while (0)
03644 #else
03645 # define FREE_VARIABLES() ((void)0)
03646 #endif
03647
03648
03649
03650
03651
03652
03653
03654
03655 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
03656 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
03657
03658
03659
03660 #ifndef emacs
03661
03662
03663 int
03664 re_match (bufp, string, size, pos, regs)
03665 struct re_pattern_buffer *bufp;
03666 const char *string;
03667 int size, pos;
03668 struct re_registers *regs;
03669 {
03670 int result = re_match_2_internal (bufp, NULL, 0, string, size,
03671 pos, regs, size);
03672 # ifndef REGEX_MALLOC
03673 # ifdef C_ALLOCA
03674 alloca (0);
03675 # endif
03676 # endif
03677 return result;
03678 }
03679 # ifdef _LIBC
03680 weak_alias (__re_match, re_match)
03681 # endif
03682 #endif
03683
03684 static boolean group_match_null_string_p _RE_ARGS ((unsigned char **p,
03685 unsigned char *end,
03686 register_info_type *reg_info));
03687 static boolean alt_match_null_string_p _RE_ARGS ((unsigned char *p,
03688 unsigned char *end,
03689 register_info_type *reg_info));
03690 static boolean common_op_match_null_string_p _RE_ARGS ((unsigned char **p,
03691 unsigned char *end,
03692 register_info_type *reg_info));
03693 static int bcmp_translate _RE_ARGS ((const char *s1, const char *s2,
03694 int len, char *translate));
03695
03696
03697
03698
03699
03700
03701
03702
03703
03704
03705
03706
03707
03708
03709 int
03710 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
03711 struct re_pattern_buffer *bufp;
03712 const char *string1, *string2;
03713 int size1, size2;
03714 int pos;
03715 struct re_registers *regs;
03716 int stop;
03717 {
03718 int result = re_match_2_internal (bufp, string1, size1, string2, size2,
03719 pos, regs, stop);
03720 #ifndef REGEX_MALLOC
03721 # ifdef C_ALLOCA
03722 alloca (0);
03723 # endif
03724 #endif
03725 return result;
03726 }
03727 #ifdef _LIBC
03728 weak_alias (__re_match_2, re_match_2)
03729 #endif
03730
03731
03732
03733 static int
03734 re_match_2_internal (bufp, string1, size1, string2, size2, pos, regs, stop)
03735 struct re_pattern_buffer *bufp;
03736 const char *string1, *string2;
03737 int size1, size2;
03738 int pos;
03739 struct re_registers *regs;
03740 int stop;
03741 {
03742
03743 int mcnt;
03744 unsigned char *p1;
03745
03746
03747 const char *end1, *end2;
03748
03749
03750
03751 const char *end_match_1, *end_match_2;
03752
03753
03754 const char *d, *dend;
03755
03756
03757 unsigned char *p = bufp->buffer;
03758 register unsigned char *pend = p + bufp->used;
03759
03760
03761
03762 unsigned char *just_past_start_mem = 0;
03763
03764
03765 RE_TRANSLATE_TYPE translate = bufp->translate;
03766
03767
03768
03769
03770
03771
03772
03773
03774
03775
03776 #ifdef MATCH_MAY_ALLOCATE
03777 fail_stack_type fail_stack;
03778 #endif
03779 #ifdef DEBUG
03780 static unsigned failure_id = 0;
03781 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
03782 #endif
03783
03784 #ifdef REL_ALLOC
03785
03786
03787 fail_stack_elt_t *failure_stack_ptr;
03788 #endif
03789
03790
03791
03792
03793 size_t num_regs = bufp->re_nsub + 1;
03794
03795
03796 active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
03797 active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
03798
03799
03800
03801
03802
03803
03804
03805
03806 #ifdef MATCH_MAY_ALLOCATE
03807 const char **regstart, **regend;
03808 #endif
03809
03810
03811
03812
03813
03814
03815 #ifdef MATCH_MAY_ALLOCATE
03816 const char **old_regstart, **old_regend;
03817 #endif
03818
03819
03820
03821
03822
03823
03824
03825 #ifdef MATCH_MAY_ALLOCATE
03826 register_info_type *reg_info;
03827 #endif
03828
03829
03830
03831
03832
03833 unsigned best_regs_set = false;
03834 #ifdef MATCH_MAY_ALLOCATE
03835 const char **best_regstart, **best_regend;
03836 #endif
03837
03838
03839
03840
03841
03842
03843
03844
03845
03846 const char *match_end = NULL;
03847
03848
03849 int set_regs_matched_done = 0;
03850
03851
03852 #ifdef MATCH_MAY_ALLOCATE
03853 const char **reg_dummy;
03854 register_info_type *reg_info_dummy;
03855 #endif
03856
03857 #ifdef DEBUG
03858
03859 unsigned num_regs_pushed = 0;
03860 #endif
03861
03862 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
03863
03864 INIT_FAIL_STACK ();
03865
03866 #ifdef MATCH_MAY_ALLOCATE
03867
03868
03869
03870
03871
03872 if (bufp->re_nsub)
03873 {
03874 regstart = REGEX_TALLOC (num_regs, const char *);
03875 regend = REGEX_TALLOC (num_regs, const char *);
03876 old_regstart = REGEX_TALLOC (num_regs, const char *);
03877 old_regend = REGEX_TALLOC (num_regs, const char *);
03878 best_regstart = REGEX_TALLOC (num_regs, const char *);
03879 best_regend = REGEX_TALLOC (num_regs, const char *);
03880 reg_info = REGEX_TALLOC (num_regs, register_info_type);
03881 reg_dummy = REGEX_TALLOC (num_regs, const char *);
03882 reg_info_dummy = REGEX_TALLOC (num_regs, register_info_type);
03883
03884 if (!(regstart && regend && old_regstart && old_regend && reg_info
03885 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
03886 {
03887 FREE_VARIABLES ();
03888 return -2;
03889 }
03890 }
03891 else
03892 {
03893
03894
03895 regstart = regend = old_regstart = old_regend = best_regstart
03896 = best_regend = reg_dummy = NULL;
03897 reg_info = reg_info_dummy = (register_info_type *) NULL;
03898 }
03899 #endif
03900
03901
03902 if (pos < 0 || pos > size1 + size2)
03903 {
03904 FREE_VARIABLES ();
03905 return -1;
03906 }
03907
03908
03909
03910
03911 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
03912 {
03913 regstart[mcnt] = regend[mcnt]
03914 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
03915
03916 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
03917 IS_ACTIVE (reg_info[mcnt]) = 0;
03918 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03919 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
03920 }
03921
03922
03923
03924 if (size2 == 0 && string1 != NULL)
03925 {
03926 string2 = string1;
03927 size2 = size1;
03928 string1 = 0;
03929 size1 = 0;
03930 }
03931 end1 = string1 + size1;
03932 end2 = string2 + size2;
03933
03934
03935 if (stop <= size1)
03936 {
03937 end_match_1 = string1 + stop;
03938 end_match_2 = string2;
03939 }
03940 else
03941 {
03942 end_match_1 = end1;
03943 end_match_2 = string2 + stop - size1;
03944 }
03945
03946
03947
03948
03949
03950
03951
03952 if (size1 > 0 && pos <= size1)
03953 {
03954 d = string1 + pos;
03955 dend = end_match_1;
03956 }
03957 else
03958 {
03959 d = string2 + pos - size1;
03960 dend = end_match_2;
03961 }
03962
03963 DEBUG_PRINT1 ("The compiled pattern is:\n");
03964 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
03965 DEBUG_PRINT1 ("The string to match is: `");
03966 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
03967 DEBUG_PRINT1 ("'\n");
03968
03969
03970
03971
03972 for (;;)
03973 {
03974 #ifdef _LIBC
03975 DEBUG_PRINT2 ("\n%p: ", p);
03976 #else
03977 DEBUG_PRINT2 ("\n0x%x: ", p);
03978 #endif
03979
03980 if (p == pend)
03981 {
03982 DEBUG_PRINT1 ("end of pattern ... ");
03983
03984
03985
03986 if (d != end_match_2)
03987 {
03988
03989
03990 boolean same_str_p = (FIRST_STRING_P (match_end)
03991 == MATCHING_IN_FIRST_STRING);
03992
03993 boolean best_match_p;
03994
03995
03996
03997 if (same_str_p)
03998 best_match_p = d > match_end;
03999 else
04000 best_match_p = !MATCHING_IN_FIRST_STRING;
04001
04002 DEBUG_PRINT1 ("backtracking.\n");
04003
04004 if (!FAIL_STACK_EMPTY ())
04005 {
04006
04007
04008 if (!best_regs_set || best_match_p)
04009 {
04010 best_regs_set = true;
04011 match_end = d;
04012
04013 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
04014
04015 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
04016 {
04017 best_regstart[mcnt] = regstart[mcnt];
04018 best_regend[mcnt] = regend[mcnt];
04019 }
04020 }
04021 goto fail;
04022 }
04023
04024
04025
04026
04027 else if (best_regs_set && !best_match_p)
04028 {
04029 restore_best_regs:
04030
04031
04032
04033
04034
04035 DEBUG_PRINT1 ("Restoring best registers.\n");
04036
04037 d = match_end;
04038 dend = ((d >= string1 && d <= end1)
04039 ? end_match_1 : end_match_2);
04040
04041 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
04042 {
04043 regstart[mcnt] = best_regstart[mcnt];
04044 regend[mcnt] = best_regend[mcnt];
04045 }
04046 }
04047 }
04048
04049 succeed_label:
04050 DEBUG_PRINT1 ("Accepting match.\n");
04051
04052
04053 if (regs && !bufp->no_sub)
04054 {
04055
04056 if (bufp->regs_allocated == REGS_UNALLOCATED)
04057 {
04058
04059
04060 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
04061 regs->start = TALLOC (regs->num_regs, regoff_t);
04062 regs->end = TALLOC (regs->num_regs, regoff_t);
04063 if (regs->start == NULL || regs->end == NULL)
04064 {
04065 FREE_VARIABLES ();
04066 return -2;
04067 }
04068 bufp->regs_allocated = REGS_REALLOCATE;
04069 }
04070 else if (bufp->regs_allocated == REGS_REALLOCATE)
04071 {
04072
04073
04074 if (regs->num_regs < num_regs + 1)
04075 {
04076 regs->num_regs = num_regs + 1;
04077 RETALLOC (regs->start, regs->num_regs, regoff_t);
04078 RETALLOC (regs->end, regs->num_regs, regoff_t);
04079 if (regs->start == NULL || regs->end == NULL)
04080 {
04081 FREE_VARIABLES ();
04082 return -2;
04083 }
04084 }
04085 }
04086 else
04087 {
04088
04089
04090 assert (bufp->regs_allocated == REGS_FIXED);
04091 }
04092
04093
04094
04095
04096 if (regs->num_regs > 0)
04097 {
04098 regs->start[0] = pos;
04099 regs->end[0] = (MATCHING_IN_FIRST_STRING
04100 ? ((regoff_t) (d - string1))
04101 : ((regoff_t) (d - string2 + size1)));
04102 }
04103
04104
04105
04106 for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
04107 mcnt++)
04108 {
04109 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
04110 regs->start[mcnt] = regs->end[mcnt] = -1;
04111 else
04112 {
04113 regs->start[mcnt]
04114 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
04115 regs->end[mcnt]
04116 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
04117 }
04118 }
04119
04120
04121
04122
04123
04124
04125 for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
04126 regs->start[mcnt] = regs->end[mcnt] = -1;
04127 }
04128
04129 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
04130 nfailure_points_pushed, nfailure_points_popped,
04131 nfailure_points_pushed - nfailure_points_popped);
04132 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
04133
04134 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
04135 ? string1
04136 : string2 - size1);
04137
04138 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
04139
04140 FREE_VARIABLES ();
04141 return mcnt;
04142 }
04143
04144
04145 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
04146 {
04147
04148
04149 case no_op:
04150 DEBUG_PRINT1 ("EXECUTING no_op.\n");
04151 break;
04152
04153 case succeed:
04154 DEBUG_PRINT1 ("EXECUTING succeed.\n");
04155 goto succeed_label;
04156
04157
04158
04159
04160 case exactn:
04161 mcnt = *p++;
04162 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
04163
04164
04165
04166 if (translate)
04167 {
04168 do
04169 {
04170 PREFETCH ();
04171 if ((unsigned char) translate[(unsigned char) *d++]
04172 != (unsigned char) *p++)
04173 goto fail;
04174 }
04175 while (--mcnt);
04176 }
04177 else
04178 {
04179 do
04180 {
04181 PREFETCH ();
04182 if (*d++ != (char) *p++) goto fail;
04183 }
04184 while (--mcnt);
04185 }
04186 SET_REGS_MATCHED ();
04187 break;
04188
04189
04190
04191 case anychar:
04192 DEBUG_PRINT1 ("EXECUTING anychar.\n");
04193
04194 PREFETCH ();
04195
04196 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
04197 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
04198 goto fail;
04199
04200 SET_REGS_MATCHED ();
04201 DEBUG_PRINT2 (" Matched `%d'.\n", *d);
04202 d++;
04203 break;
04204
04205
04206 case charset:
04207 case charset_not:
04208 {
04209 register unsigned char c;
04210 boolean not = (re_opcode_t) *(p - 1) == charset_not;
04211
04212 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
04213
04214 PREFETCH ();
04215 c = TRANSLATE (*d);
04216
04217
04218
04219 if (c < (unsigned) (*p * BYTEWIDTH)
04220 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04221 not = !not;
04222
04223 p += 1 + *p;
04224
04225 if (!not) goto fail;
04226
04227 SET_REGS_MATCHED ();
04228 d++;
04229 break;
04230 }
04231
04232
04233
04234
04235
04236
04237
04238 case start_memory:
04239 DEBUG_PRINT3 ("EXECUTING start_memory %d (%d):\n", *p, p[1]);
04240
04241
04242 p1 = p;
04243
04244 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
04245 REG_MATCH_NULL_STRING_P (reg_info[*p])
04246 = group_match_null_string_p (&p1, pend, reg_info);
04247
04248
04249
04250
04251
04252
04253 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04254 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
04255 : regstart[*p];
04256 DEBUG_PRINT2 (" old_regstart: %d\n",
04257 POINTER_TO_OFFSET (old_regstart[*p]));
04258
04259 regstart[*p] = d;
04260 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
04261
04262 IS_ACTIVE (reg_info[*p]) = 1;
04263 MATCHED_SOMETHING (reg_info[*p]) = 0;
04264
04265
04266 set_regs_matched_done = 0;
04267
04268
04269 highest_active_reg = *p;
04270
04271
04272
04273 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04274 lowest_active_reg = *p;
04275
04276
04277 p += 2;
04278 just_past_start_mem = p;
04279
04280 break;
04281
04282
04283
04284
04285
04286 case stop_memory:
04287 DEBUG_PRINT3 ("EXECUTING stop_memory %d (%d):\n", *p, p[1]);
04288
04289
04290
04291
04292
04293
04294 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
04295 ? REG_UNSET (regend[*p]) ? d : regend[*p]
04296 : regend[*p];
04297 DEBUG_PRINT2 (" old_regend: %d\n",
04298 POINTER_TO_OFFSET (old_regend[*p]));
04299
04300 regend[*p] = d;
04301 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
04302
04303
04304 IS_ACTIVE (reg_info[*p]) = 0;
04305
04306
04307 set_regs_matched_done = 0;
04308
04309
04310
04311 if (lowest_active_reg == highest_active_reg)
04312 {
04313 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04314 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04315 }
04316 else
04317 {
04318
04319
04320
04321 unsigned char r = *p - 1;
04322 while (r > 0 && !IS_ACTIVE (reg_info[r]))
04323 r--;
04324
04325
04326
04327
04328
04329
04330
04331
04332 if (r == 0)
04333 {
04334 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
04335 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
04336 }
04337 else
04338 highest_active_reg = r;
04339 }
04340
04341
04342
04343
04344
04345
04346 if ((!MATCHED_SOMETHING (reg_info[*p])
04347 || just_past_start_mem == p - 1)
04348 && (p + 2) < pend)
04349 {
04350 boolean is_a_jump_n = false;
04351
04352 p1 = p + 2;
04353 mcnt = 0;
04354 switch ((re_opcode_t) *p1++)
04355 {
04356 case jump_n:
04357 is_a_jump_n = true;
04358 case pop_failure_jump:
04359 case maybe_pop_jump:
04360 case jump:
04361 case dummy_failure_jump:
04362 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04363 if (is_a_jump_n)
04364 p1 += 2;
04365 break;
04366
04367 default:
04368 ;
04369 }
04370 p1 += mcnt;
04371
04372
04373
04374
04375
04376
04377 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
04378 && (re_opcode_t) p1[3] == start_memory && p1[4] == *p)
04379 {
04380
04381
04382
04383
04384
04385
04386
04387
04388
04389
04390 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
04391 {
04392 unsigned r;
04393
04394 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
04395
04396
04397 for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
04398 r++)
04399 {
04400 regstart[r] = old_regstart[r];
04401
04402
04403 if (old_regend[r] >= regstart[r])
04404 regend[r] = old_regend[r];
04405 }
04406 }
04407 p1++;
04408 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
04409 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
04410
04411 goto fail;
04412 }
04413 }
04414
04415
04416 p += 2;
04417 break;
04418
04419
04420
04421
04422 case duplicate:
04423 {
04424 register const char *d2, *dend2;
04425 int regno = *p++;
04426 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
04427
04428
04429 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
04430 goto fail;
04431
04432
04433 d2 = regstart[regno];
04434
04435
04436
04437
04438
04439
04440 dend2 = ((FIRST_STRING_P (regstart[regno])
04441 == FIRST_STRING_P (regend[regno]))
04442 ? regend[regno] : end_match_1);
04443 for (;;)
04444 {
04445
04446
04447 while (d2 == dend2)
04448 {
04449 if (dend2 == end_match_2) break;
04450 if (dend2 == regend[regno]) break;
04451
04452
04453 d2 = string2;
04454 dend2 = regend[regno];
04455 }
04456
04457 if (d2 == dend2) break;
04458
04459
04460 PREFETCH ();
04461
04462
04463 mcnt = dend - d;
04464
04465
04466
04467 if (mcnt > dend2 - d2)
04468 mcnt = dend2 - d2;
04469
04470
04471
04472 if (translate
04473 ? bcmp_translate (d, d2, mcnt, translate)
04474 : memcmp (d, d2, mcnt))
04475 goto fail;
04476 d += mcnt, d2 += mcnt;
04477
04478
04479 SET_REGS_MATCHED ();
04480 }
04481 }
04482 break;
04483
04484
04485
04486
04487
04488 case begline:
04489 DEBUG_PRINT1 ("EXECUTING begline.\n");
04490
04491 if (AT_STRINGS_BEG (d))
04492 {
04493 if (!bufp->not_bol) break;
04494 }
04495 else if (d[-1] == '\n' && bufp->newline_anchor)
04496 {
04497 break;
04498 }
04499
04500 goto fail;
04501
04502
04503
04504 case endline:
04505 DEBUG_PRINT1 ("EXECUTING endline.\n");
04506
04507 if (AT_STRINGS_END (d))
04508 {
04509 if (!bufp->not_eol) break;
04510 }
04511
04512
04513 else if ((d == end1 ? *string2 : *d) == '\n'
04514 && bufp->newline_anchor)
04515 {
04516 break;
04517 }
04518 goto fail;
04519
04520
04521
04522 case begbuf:
04523 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
04524 if (AT_STRINGS_BEG (d))
04525 break;
04526 goto fail;
04527
04528
04529
04530 case endbuf:
04531 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
04532 if (AT_STRINGS_END (d))
04533 break;
04534 goto fail;
04535
04536
04537
04538
04539
04540
04541
04542
04543
04544
04545
04546
04547
04548
04549
04550
04551
04552
04553 case on_failure_keep_string_jump:
04554 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
04555
04556 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04557 #ifdef _LIBC
04558 DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
04559 #else
04560 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
04561 #endif
04562
04563 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
04564 break;
04565
04566
04567
04568
04569
04570
04571
04572
04573
04574
04575
04576
04577
04578
04579 case on_failure_jump:
04580 on_failure:
04581 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
04582
04583 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04584 #ifdef _LIBC
04585 DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
04586 #else
04587 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
04588 #endif
04589
04590
04591
04592
04593
04594
04595
04596
04597
04598
04599 p1 = p;
04600
04601
04602
04603
04604
04605 while (p1 < pend && (re_opcode_t) *p1 == no_op)
04606 p1++;
04607
04608 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
04609 {
04610
04611
04612
04613
04614 highest_active_reg = *(p1 + 1) + *(p1 + 2);
04615 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
04616 lowest_active_reg = *(p1 + 1);
04617 }
04618
04619 DEBUG_PRINT1 (":\n");
04620 PUSH_FAILURE_POINT (p + mcnt, d, -2);
04621 break;
04622
04623
04624
04625
04626 case maybe_pop_jump:
04627 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04628 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
04629 {
04630 register unsigned char *p2 = p;
04631
04632
04633
04634
04635
04636
04637
04638
04639
04640
04641
04642
04643
04644
04645
04646
04647
04648
04649 while (1)
04650 {
04651 if (p2 + 2 < pend
04652 && ((re_opcode_t) *p2 == stop_memory
04653 || (re_opcode_t) *p2 == start_memory))
04654 p2 += 3;
04655 else if (p2 + 6 < pend
04656 && (re_opcode_t) *p2 == dummy_failure_jump)
04657 p2 += 6;
04658 else
04659 break;
04660 }
04661
04662 p1 = p + mcnt;
04663
04664
04665
04666
04667
04668 if (p2 == pend)
04669 {
04670
04671
04672
04673 p[-3] = (unsigned char) pop_failure_jump;
04674 DEBUG_PRINT1
04675 (" End of pattern: change to `pop_failure_jump'.\n");
04676 }
04677
04678 else if ((re_opcode_t) *p2 == exactn
04679 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
04680 {
04681 register unsigned char c
04682 = *p2 == (unsigned char) endline ? '\n' : p2[2];
04683
04684 if ((re_opcode_t) p1[3] == exactn && p1[5] != c)
04685 {
04686 p[-3] = (unsigned char) pop_failure_jump;
04687 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
04688 c, p1[5]);
04689 }
04690
04691 else if ((re_opcode_t) p1[3] == charset
04692 || (re_opcode_t) p1[3] == charset_not)
04693 {
04694 int not = (re_opcode_t) p1[3] == charset_not;
04695
04696 if (c < (unsigned char) (p1[4] * BYTEWIDTH)
04697 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
04698 not = !not;
04699
04700
04701
04702 if (!not)
04703 {
04704 p[-3] = (unsigned char) pop_failure_jump;
04705 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
04706 }
04707 }
04708 }
04709 else if ((re_opcode_t) *p2 == charset)
04710 {
04711 #ifdef DEBUG
04712 register unsigned char c
04713 = *p2 == (unsigned char) endline ? '\n' : p2[2];
04714 #endif
04715
04716 #if 0
04717 if ((re_opcode_t) p1[3] == exactn
04718 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
04719 && (p2[2 + p1[5] / BYTEWIDTH]
04720 & (1 << (p1[5] % BYTEWIDTH)))))
04721 #else
04722 if ((re_opcode_t) p1[3] == exactn
04723 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[4]
04724 && (p2[2 + p1[4] / BYTEWIDTH]
04725 & (1 << (p1[4] % BYTEWIDTH)))))
04726 #endif
04727 {
04728 p[-3] = (unsigned char) pop_failure_jump;
04729 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
04730 c, p1[5]);
04731 }
04732
04733 else if ((re_opcode_t) p1[3] == charset_not)
04734 {
04735 int idx;
04736
04737
04738 for (idx = 0; idx < (int) p2[1]; idx++)
04739 if (! (p2[2 + idx] == 0
04740 || (idx < (int) p1[4]
04741 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
04742 break;
04743
04744 if (idx == p2[1])
04745 {
04746 p[-3] = (unsigned char) pop_failure_jump;
04747 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
04748 }
04749 }
04750 else if ((re_opcode_t) p1[3] == charset)
04751 {
04752 int idx;
04753
04754
04755 for (idx = 0;
04756 idx < (int) p2[1] && idx < (int) p1[4];
04757 idx++)
04758 if ((p2[2 + idx] & p1[5 + idx]) != 0)
04759 break;
04760
04761 if (idx == p2[1] || idx == p1[4])
04762 {
04763 p[-3] = (unsigned char) pop_failure_jump;
04764 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
04765 }
04766 }
04767 }
04768 }
04769 p -= 2;
04770 if ((re_opcode_t) p[-1] != pop_failure_jump)
04771 {
04772 p[-1] = (unsigned char) jump;
04773 DEBUG_PRINT1 (" Match => jump.\n");
04774 goto unconditional_jump;
04775 }
04776
04777
04778
04779
04780
04781
04782
04783
04784
04785 case pop_failure_jump:
04786 {
04787
04788
04789
04790
04791
04792 active_reg_t dummy_low_reg, dummy_high_reg;
04793 unsigned char *pdummy;
04794 const char *sdummy;
04795
04796 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
04797 POP_FAILURE_POINT (sdummy, pdummy,
04798 dummy_low_reg, dummy_high_reg,
04799 reg_dummy, reg_dummy, reg_info_dummy);
04800 }
04801
04802
04803 unconditional_jump:
04804 #ifdef _LIBC
04805 DEBUG_PRINT2 ("\n%p: ", p);
04806 #else
04807 DEBUG_PRINT2 ("\n0x%x: ", p);
04808 #endif
04809
04810
04811
04812 case jump:
04813 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04814 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
04815 p += mcnt;
04816 #ifdef _LIBC
04817 DEBUG_PRINT2 ("(to %p).\n", p);
04818 #else
04819 DEBUG_PRINT2 ("(to 0x%x).\n", p);
04820 #endif
04821 break;
04822
04823
04824
04825
04826 case jump_past_alt:
04827 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
04828 goto unconditional_jump;
04829
04830
04831
04832
04833
04834
04835
04836 case dummy_failure_jump:
04837 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
04838
04839
04840 PUSH_FAILURE_POINT (NULL, NULL, -2);
04841 goto unconditional_jump;
04842
04843
04844
04845
04846
04847
04848
04849 case push_dummy_failure:
04850 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
04851
04852
04853 PUSH_FAILURE_POINT (NULL, NULL, -2);
04854 break;
04855
04856
04857
04858 case succeed_n:
04859 EXTRACT_NUMBER (mcnt, p + 2);
04860 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
04861
04862 assert (mcnt >= 0);
04863
04864 if (mcnt > 0)
04865 {
04866 mcnt--;
04867 p += 2;
04868 STORE_NUMBER_AND_INCR (p, mcnt);
04869 #ifdef _LIBC
04870 DEBUG_PRINT3 (" Setting %p to %d.\n", p - 2, mcnt);
04871 #else
04872 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - 2, mcnt);
04873 #endif
04874 }
04875 else if (mcnt == 0)
04876 {
04877 #ifdef _LIBC
04878 DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n", p+2);
04879 #else
04880 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n", p+2);
04881 #endif
04882 p[2] = (unsigned char) no_op;
04883 p[3] = (unsigned char) no_op;
04884 goto on_failure;
04885 }
04886 break;
04887
04888 case jump_n:
04889 EXTRACT_NUMBER (mcnt, p + 2);
04890 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
04891
04892
04893 if (mcnt)
04894 {
04895 mcnt--;
04896 STORE_NUMBER (p + 2, mcnt);
04897 #ifdef _LIBC
04898 DEBUG_PRINT3 (" Setting %p to %d.\n", p + 2, mcnt);
04899 #else
04900 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + 2, mcnt);
04901 #endif
04902 goto unconditional_jump;
04903 }
04904
04905 else
04906 p += 4;
04907 break;
04908
04909 case set_number_at:
04910 {
04911 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
04912
04913 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04914 p1 = p + mcnt;
04915 EXTRACT_NUMBER_AND_INCR (mcnt, p);
04916 #ifdef _LIBC
04917 DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
04918 #else
04919 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
04920 #endif
04921 STORE_NUMBER (p1, mcnt);
04922 break;
04923 }
04924
04925 #if 0
04926
04927
04928
04929
04930
04931 case wordbound:
04932 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
04933 if (AT_WORD_BOUNDARY (d))
04934 break;
04935 goto fail;
04936
04937 case notwordbound:
04938 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
04939 if (AT_WORD_BOUNDARY (d))
04940 goto fail;
04941 break;
04942 #else
04943 case wordbound:
04944 {
04945 boolean prevchar, thischar;
04946
04947 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
04948 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
04949 break;
04950
04951 prevchar = WORDCHAR_P (d - 1);
04952 thischar = WORDCHAR_P (d);
04953 if (prevchar != thischar)
04954 break;
04955 goto fail;
04956 }
04957
04958 case notwordbound:
04959 {
04960 boolean prevchar, thischar;
04961
04962 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
04963 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
04964 goto fail;
04965
04966 prevchar = WORDCHAR_P (d - 1);
04967 thischar = WORDCHAR_P (d);
04968 if (prevchar != thischar)
04969 goto fail;
04970 break;
04971 }
04972 #endif
04973
04974 case wordbeg:
04975 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
04976 if (WORDCHAR_P (d) && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
04977 break;
04978 goto fail;
04979
04980 case wordend:
04981 DEBUG_PRINT1 ("EXECUTING wordend.\n");
04982 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
04983 && (!WORDCHAR_P (d) || AT_STRINGS_END (d)))
04984 break;
04985 goto fail;
04986
04987 #ifdef emacs
04988 case before_dot:
04989 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
04990 if (PTR_CHAR_POS ((unsigned char *) d) >= point)
04991 goto fail;
04992 break;
04993
04994 case at_dot:
04995 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
04996 if (PTR_CHAR_POS ((unsigned char *) d) != point)
04997 goto fail;
04998 break;
04999
05000 case after_dot:
05001 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
05002 if (PTR_CHAR_POS ((unsigned char *) d) <= point)
05003 goto fail;
05004 break;
05005
05006 case syntaxspec:
05007 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
05008 mcnt = *p++;
05009 goto matchsyntax;
05010
05011 case wordchar:
05012 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
05013 mcnt = (int) Sword;
05014 matchsyntax:
05015 PREFETCH ();
05016
05017 d++;
05018 if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
05019 goto fail;
05020 SET_REGS_MATCHED ();
05021 break;
05022
05023 case notsyntaxspec:
05024 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
05025 mcnt = *p++;
05026 goto matchnotsyntax;
05027
05028 case notwordchar:
05029 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
05030 mcnt = (int) Sword;
05031 matchnotsyntax:
05032 PREFETCH ();
05033
05034 d++;
05035 if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
05036 goto fail;
05037 SET_REGS_MATCHED ();
05038 break;
05039
05040 #else
05041 case wordchar:
05042 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
05043 PREFETCH ();
05044 if (!WORDCHAR_P (d))
05045 goto fail;
05046 SET_REGS_MATCHED ();
05047 d++;
05048 break;
05049
05050 case notwordchar:
05051 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
05052 PREFETCH ();
05053 if (WORDCHAR_P (d))
05054 goto fail;
05055 SET_REGS_MATCHED ();
05056 d++;
05057 break;
05058 #endif
05059
05060 default:
05061 abort ();
05062 }
05063 continue;
05064
05065
05066
05067 fail:
05068 if (!FAIL_STACK_EMPTY ())
05069 {
05070 DEBUG_PRINT1 ("\nFAIL:\n");
05071 POP_FAILURE_POINT (d, p,
05072 lowest_active_reg, highest_active_reg,
05073 regstart, regend, reg_info);
05074
05075
05076 if (!p)
05077 goto fail;
05078
05079
05080 assert (p <= pend);
05081 if (p < pend)
05082 {
05083 boolean is_a_jump_n = false;
05084
05085
05086
05087 switch ((re_opcode_t) *p)
05088 {
05089 case jump_n:
05090 is_a_jump_n = true;
05091 case maybe_pop_jump:
05092 case pop_failure_jump:
05093 case jump:
05094 p1 = p + 1;
05095 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05096 p1 += mcnt;
05097
05098 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
05099 || (!is_a_jump_n
05100 && (re_opcode_t) *p1 == on_failure_jump))
05101 goto fail;
05102 break;
05103 default:
05104 ;
05105 }
05106 }
05107
05108 if (d >= string1 && d <= end1)
05109 dend = end_match_1;
05110 }
05111 else
05112 break;
05113 }
05114
05115 if (best_regs_set)
05116 goto restore_best_regs;
05117
05118 FREE_VARIABLES ();
05119
05120 return -1;
05121 }
05122
05123
05124
05125
05126
05127
05128
05129
05130
05131
05132
05133
05134
05135
05136 static boolean
05137 group_match_null_string_p (p, end, reg_info)
05138 unsigned char **p, *end;
05139 register_info_type *reg_info;
05140 {
05141 int mcnt;
05142
05143 unsigned char *p1 = *p + 2;
05144
05145 while (p1 < end)
05146 {
05147
05148
05149
05150
05151 switch ((re_opcode_t) *p1)
05152 {
05153
05154 case on_failure_jump:
05155 p1++;
05156 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05157
05158
05159
05160
05161 if (mcnt >= 0)
05162 {
05163
05164
05165
05166
05167
05168
05169
05170
05171
05172
05173
05174
05175
05176
05177
05178
05179
05180
05181 while ((re_opcode_t) p1[mcnt-3] == jump_past_alt)
05182 {
05183
05184
05185
05186
05187 if (!alt_match_null_string_p (p1, p1 + mcnt - 3,
05188 reg_info))
05189 return false;
05190
05191
05192
05193 p1 += mcnt;
05194
05195
05196
05197 if ((re_opcode_t) *p1 != on_failure_jump)
05198 break;
05199
05200
05201
05202 p1++;
05203 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05204 if ((re_opcode_t) p1[mcnt-3] != jump_past_alt)
05205 {
05206
05207 p1 -= 3;
05208 break;
05209 }
05210 }
05211
05212
05213
05214
05215 EXTRACT_NUMBER (mcnt, p1 - 2);
05216
05217 if (!alt_match_null_string_p (p1, p1 + mcnt, reg_info))
05218 return false;
05219
05220 p1 += mcnt;
05221 }
05222 break;
05223
05224
05225 case stop_memory:
05226 assert (p1[1] == **p);
05227 *p = p1 + 2;
05228 return true;
05229
05230
05231 default:
05232 if (!common_op_match_null_string_p (&p1, end, reg_info))
05233 return false;
05234 }
05235 }
05236
05237 return false;
05238 }
05239
05240
05241
05242
05243
05244
05245 static boolean
05246 alt_match_null_string_p (p, end, reg_info)
05247 unsigned char *p, *end;
05248 register_info_type *reg_info;
05249 {
05250 int mcnt;
05251 unsigned char *p1 = p;
05252
05253 while (p1 < end)
05254 {
05255
05256
05257
05258 switch ((re_opcode_t) *p1)
05259 {
05260
05261 case on_failure_jump:
05262 p1++;
05263 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05264 p1 += mcnt;
05265 break;
05266
05267 default:
05268 if (!common_op_match_null_string_p (&p1, end, reg_info))
05269 return false;
05270 }
05271 }
05272
05273 return true;
05274 }
05275
05276
05277
05278
05279
05280
05281
05282 static boolean
05283 common_op_match_null_string_p (p, end, reg_info)
05284 unsigned char **p, *end;
05285 register_info_type *reg_info;
05286 {
05287 int mcnt;
05288 boolean ret;
05289 int reg_no;
05290 unsigned char *p1 = *p;
05291
05292 switch ((re_opcode_t) *p1++)
05293 {
05294 case no_op:
05295 case begline:
05296 case endline:
05297 case begbuf:
05298 case endbuf:
05299 case wordbeg:
05300 case wordend:
05301 case wordbound:
05302 case notwordbound:
05303 #ifdef emacs
05304 case before_dot:
05305 case at_dot:
05306 case after_dot:
05307 #endif
05308 break;
05309
05310 case start_memory:
05311 reg_no = *p1;
05312 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
05313 ret = group_match_null_string_p (&p1, end, reg_info);
05314
05315
05316
05317
05318 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
05319 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
05320
05321 if (!ret)
05322 return false;
05323 break;
05324
05325
05326 case jump:
05327 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05328 if (mcnt >= 0)
05329 p1 += mcnt;
05330 else
05331 return false;
05332 break;
05333
05334 case succeed_n:
05335
05336 p1 += 2;
05337 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05338
05339 if (mcnt == 0)
05340 {
05341 p1 -= 4;
05342 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
05343 p1 += mcnt;
05344 }
05345 else
05346 return false;
05347 break;
05348
05349 case duplicate:
05350 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
05351 return false;
05352 break;
05353
05354 case set_number_at:
05355 p1 += 4;
05356
05357 default:
05358
05359 return false;
05360 }
05361
05362 *p = p1;
05363 return true;
05364 }
05365
05366
05367
05368
05369
05370 static int
05371 bcmp_translate (s1, s2, len, translate)
05372 const char *s1, *s2;
05373 register int len;
05374 RE_TRANSLATE_TYPE translate;
05375 {
05376 register const unsigned char *p1 = (const unsigned char *) s1;
05377 register const unsigned char *p2 = (const unsigned char *) s2;
05378 while (len)
05379 {
05380 if (translate[*p1++] != translate[*p2++]) return 1;
05381 len--;
05382 }
05383 return 0;
05384 }
05385
05386
05387
05388
05389
05390
05391
05392
05393
05394
05395
05396
05397 const char *
05398 re_compile_pattern (pattern, length, bufp)
05399 const char *pattern;
05400 size_t length;
05401 struct re_pattern_buffer *bufp;
05402 {
05403 reg_errcode_t ret;
05404
05405
05406
05407 bufp->regs_allocated = REGS_UNALLOCATED;
05408
05409
05410
05411
05412 bufp->no_sub = 0;
05413
05414
05415 bufp->newline_anchor = 1;
05416
05417 ret = regex_compile (pattern, length, re_syntax_options, bufp);
05418
05419 if (!ret)
05420 return NULL;
05421 return gettext (re_error_msgid[(int) ret]);
05422 }
05423 #ifdef _LIBC
05424 weak_alias (__re_compile_pattern, re_compile_pattern)
05425 #endif
05426
05427
05428
05429
05430 #if defined _REGEX_RE_COMP || defined _LIBC
05431
05432
05433 static struct re_pattern_buffer re_comp_buf;
05434
05435 char *
05436 #ifdef _LIBC
05437
05438
05439
05440 weak_function
05441 #endif
05442 re_comp (s)
05443 const char *s;
05444 {
05445 reg_errcode_t ret;
05446
05447 if (!s)
05448 {
05449 if (!re_comp_buf.buffer)
05450 return gettext ("No previous regular expression");
05451 return 0;
05452 }
05453
05454 if (!re_comp_buf.buffer)
05455 {
05456 re_comp_buf.buffer = (unsigned char *) malloc (200);
05457 if (re_comp_buf.buffer == NULL)
05458 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
05459 re_comp_buf.allocated = 200;
05460
05461 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
05462 if (re_comp_buf.fastmap == NULL)
05463 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
05464 }
05465
05466
05467
05468
05469
05470 re_comp_buf.newline_anchor = 1;
05471
05472 ret = regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
05473
05474 if (!ret)
05475 return NULL;
05476
05477
05478 return (char *) gettext (re_error_msgid[(int) ret]);
05479 }
05480
05481
05482 int
05483 #ifdef _LIBC
05484 weak_function
05485 #endif
05486 re_exec (s)
05487 const char *s;
05488 {
05489 const int len = strlen (s);
05490 return
05491 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
05492 }
05493
05494 #endif
05495
05496
05497
05498 #ifndef emacs
05499
05500
05501
05502
05503
05504
05505
05506
05507
05508
05509
05510
05511
05512
05513
05514
05515
05516
05517
05518
05519
05520
05521
05522
05523
05524
05525
05526
05527
05528
05529
05530
05531
05532
05533
05534
05535 int
05536 regcomp (preg, pattern, cflags)
05537 regex_t *preg;
05538 const char *pattern;
05539 int cflags;
05540 {
05541 reg_errcode_t ret;
05542 reg_syntax_t syntax
05543 = (cflags & REG_EXTENDED) ?
05544 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
05545
05546
05547 preg->buffer = 0;
05548 preg->allocated = 0;
05549 preg->used = 0;
05550
05551
05552 preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
05553
05554 if (cflags & REG_ICASE)
05555 {
05556 unsigned i;
05557
05558 preg->translate
05559 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
05560 * sizeof (*(RE_TRANSLATE_TYPE)0));
05561 if (preg->translate == NULL)
05562 return (int) REG_ESPACE;
05563
05564
05565 for (i = 0; i < CHAR_SET_SIZE; i++)
05566 preg->translate[i] = TOLOWER (i);
05567 }
05568 else
05569 preg->translate = NULL;
05570
05571
05572 if (cflags & REG_NEWLINE)
05573 {
05574 syntax &= ~RE_DOT_NEWLINE;
05575 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
05576
05577 preg->newline_anchor = 1;
05578 }
05579 else
05580 preg->newline_anchor = 0;
05581
05582 preg->no_sub = !!(cflags & REG_NOSUB);
05583
05584
05585
05586 ret = regex_compile (pattern, strlen (pattern), syntax, preg);
05587
05588
05589
05590 if (ret == REG_ERPAREN) ret = REG_EPAREN;
05591
05592 if (ret == REG_NOERROR && preg->fastmap)
05593 {
05594
05595
05596 if (re_compile_fastmap (preg) == -2)
05597 {
05598
05599
05600 free (preg->fastmap);
05601 preg->fastmap = NULL;
05602 }
05603 }
05604
05605 return (int) ret;
05606 }
05607 #ifdef _LIBC
05608 weak_alias (__regcomp, regcomp)
05609 #endif
05610
05611
05612
05613
05614
05615
05616
05617
05618
05619
05620
05621
05622
05623
05624
05625
05626 int
05627 regexec (preg, string, nmatch, pmatch, eflags)
05628 const regex_t *preg;
05629 const char *string;
05630 size_t nmatch;
05631 regmatch_t pmatch[];
05632 int eflags;
05633 {
05634 int ret;
05635 struct re_registers regs;
05636 regex_t private_preg;
05637 int len = strlen (string);
05638 boolean want_reg_info = !preg->no_sub && nmatch > 0;
05639
05640 private_preg = *preg;
05641
05642 private_preg.not_bol = !!(eflags & REG_NOTBOL);
05643 private_preg.not_eol = !!(eflags & REG_NOTEOL);
05644
05645
05646
05647
05648 private_preg.regs_allocated = REGS_FIXED;
05649
05650 if (want_reg_info)
05651 {
05652 regs.num_regs = nmatch;
05653 regs.start = TALLOC (nmatch, regoff_t);
05654 regs.end = TALLOC (nmatch, regoff_t);
05655 if (regs.start == NULL || regs.end == NULL)
05656 return (int) REG_NOMATCH;
05657 }
05658
05659
05660 ret = re_search (&private_preg, string, len,
05661 0, len,
05662 want_reg_info ? ®s : (struct re_registers *) 0);
05663
05664
05665 if (want_reg_info)
05666 {
05667 if (ret >= 0)
05668 {
05669 unsigned r;
05670
05671 for (r = 0; r < nmatch; r++)
05672 {
05673 pmatch[r].rm_so = regs.start[r];
05674 pmatch[r].rm_eo = regs.end[r];
05675 }
05676 }
05677
05678
05679 free (regs.start);
05680 free (regs.end);
05681 }
05682
05683
05684 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
05685 }
05686 #ifdef _LIBC
05687 weak_alias (__regexec, regexec)
05688 #endif
05689
05690
05691
05692
05693
05694 size_t
05695 regerror (errcode, preg, errbuf, errbuf_size)
05696 int errcode;
05697 const regex_t *preg;
05698 char *errbuf;
05699 size_t errbuf_size;
05700 {
05701 const char *msg;
05702 size_t msg_size;
05703 (void)preg;
05704
05705 if (errcode < 0
05706 || errcode >= (int) (sizeof (re_error_msgid)
05707 / sizeof (re_error_msgid[0])))
05708
05709
05710
05711
05712 abort ();
05713
05714 msg = gettext (re_error_msgid[errcode]);
05715
05716 msg_size = strlen (msg) + 1;
05717
05718 if (errbuf_size != 0)
05719 {
05720 if (msg_size > errbuf_size)
05721 {
05722 #if defined HAVE_MEMPCPY || defined _LIBC
05723 *((char *) __mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
05724 #else
05725 memcpy (errbuf, msg, errbuf_size - 1);
05726 errbuf[errbuf_size - 1] = 0;
05727 #endif
05728 }
05729 else
05730 memcpy (errbuf, msg, msg_size);
05731 }
05732
05733 return msg_size;
05734 }
05735 #ifdef _LIBC
05736 weak_alias (__regerror, regerror)
05737 #endif
05738
05739
05740
05741
05742 void
05743 regfree (preg)
05744 regex_t *preg;
05745 {
05746 if (preg->buffer != NULL)
05747 free (preg->buffer);
05748 preg->buffer = NULL;
05749
05750 preg->allocated = 0;
05751 preg->used = 0;
05752
05753 if (preg->fastmap != NULL)
05754 free (preg->fastmap);
05755 preg->fastmap = NULL;
05756 preg->fastmap_accurate = 0;
05757
05758 if (preg->translate != NULL)
05759 free (preg->translate);
05760 preg->translate = NULL;
05761 }
05762 #ifdef _LIBC
05763 weak_alias (__regfree, regfree)
05764 #endif
05765
05766 #endif