00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030 #if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC
00031 #pragma alloca
00032 #endif
00033
00034 #undef _GNU_SOURCE
00035 #define _GNU_SOURCE
00036
00037 #ifndef INSIDE_RECURSION
00038 # ifdef HAVE_CONFIG_H
00039 # include <config.h>
00040 # endif
00041 #endif
00042
00043 #include <ansidecl.h>
00044
00045 #ifndef INSIDE_RECURSION
00046
00047 # if defined STDC_HEADERS && !defined emacs
00048 # include <stddef.h>
00049 # else
00050
00051 # include <sys/types.h>
00052 # endif
00053
00054 # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
00055
00056
00057
00058 # if defined _LIBC || WIDE_CHAR_SUPPORT
00059
00060 # include <wchar.h>
00061 # include <wctype.h>
00062 # endif
00063
00064 # ifdef _LIBC
00065
00066 # define regfree(preg) __regfree (preg)
00067 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
00068 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
00069 # define regerror(errcode, preg, errbuf, errbuf_size) \
00070 __regerror(errcode, preg, errbuf, errbuf_size)
00071 # define re_set_registers(bu, re, nu, st, en) \
00072 __re_set_registers (bu, re, nu, st, en)
00073 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
00074 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
00075 # define re_match(bufp, string, size, pos, regs) \
00076 __re_match (bufp, string, size, pos, regs)
00077 # define re_search(bufp, string, size, startpos, range, regs) \
00078 __re_search (bufp, string, size, startpos, range, regs)
00079 # define re_compile_pattern(pattern, length, bufp) \
00080 __re_compile_pattern (pattern, length, bufp)
00081 # define re_set_syntax(syntax) __re_set_syntax (syntax)
00082 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
00083 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
00084 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
00085
00086 # define btowc __btowc
00087
00088
00089 # include <locale/localeinfo.h>
00090 # include <locale/elem-hash.h>
00091 # include <langinfo.h>
00092 # include <locale/coll-lookup.h>
00093 # endif
00094
00095
00096 # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
00097 # include <libintl.h>
00098 # ifdef _LIBC
00099 # undef gettext
00100 # define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
00101 # endif
00102 # else
00103 # define gettext(msgid) (msgid)
00104 # endif
00105
00106 # ifndef gettext_noop
00107
00108
00109 # define gettext_noop(String) String
00110 # endif
00111
00112
00113
00114 # ifdef emacs
00115
00116 # include "lisp.h"
00117 # include "buffer.h"
00118 # include "syntax.h"
00119
00120 # else
00121
00122
00123
00124
00125 # undef REL_ALLOC
00126
00127 # if defined STDC_HEADERS || defined _LIBC
00128 # include <stdlib.h>
00129 # else
00130 char *malloc ();
00131 char *realloc ();
00132 # endif
00133
00134
00135
00136 # ifdef INHIBIT_STRING_HEADER
00137 # if !(defined HAVE_BZERO && defined HAVE_BCOPY)
00138 # if !defined bzero && !defined bcopy
00139 # undef INHIBIT_STRING_HEADER
00140 # endif
00141 # endif
00142 # endif
00143
00144
00145
00146
00147 # ifndef INHIBIT_STRING_HEADER
00148 # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
00149 # include <string.h>
00150 # ifndef bzero
00151 # ifndef _LIBC
00152 # define bzero(s, n) (memset (s, '\0', n), (s))
00153 # else
00154 # define bzero(s, n) __bzero (s, n)
00155 # endif
00156 # endif
00157 # else
00158 # include <strings.h>
00159 # ifndef memcmp
00160 # define memcmp(s1, s2, n) bcmp (s1, s2, n)
00161 # endif
00162 # ifndef memcpy
00163 # define memcpy(d, s, n) (bcopy (s, d, n), (d))
00164 # endif
00165 # endif
00166 # endif
00167
00168
00169
00170
00171
00172 # ifndef Sword
00173 # define Sword 1
00174 # endif
00175
00176 # ifdef SWITCH_ENUM_BUG
00177 # define SWITCH_ENUM_CAST(x) ((int)(x))
00178 # else
00179 # define SWITCH_ENUM_CAST(x) (x)
00180 # endif
00181
00182 # endif
00183
00184 # if defined _LIBC || HAVE_LIMITS_H
00185 # include <limits.h>
00186 # endif
00187
00188 # ifndef MB_LEN_MAX
00189 # define MB_LEN_MAX 1
00190 # endif
00191
00192
00193 # include "xregex.h"
00194
00195
00196 # include <ctype.h>
00197
00198
00199
00200
00201
00202
00203
00204
00205
00206
00207
00208
00209
00210 # undef ISASCII
00211 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
00212 # define ISASCII(c) 1
00213 # else
00214 # define ISASCII(c) isascii(c)
00215 # endif
00216
00217 # ifdef isblank
00218 # define ISBLANK(c) (ISASCII (c) && isblank (c))
00219 # else
00220 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
00221 # endif
00222 # ifdef isgraph
00223 # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
00224 # else
00225 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
00226 # endif
00227
00228 # undef ISPRINT
00229 # define ISPRINT(c) (ISASCII (c) && isprint (c))
00230 # define ISDIGIT(c) (ISASCII (c) && isdigit (c))
00231 # define ISALNUM(c) (ISASCII (c) && isalnum (c))
00232 # define ISALPHA(c) (ISASCII (c) && isalpha (c))
00233 # define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
00234 # define ISLOWER(c) (ISASCII (c) && islower (c))
00235 # define ISPUNCT(c) (ISASCII (c) && ispunct (c))
00236 # define ISSPACE(c) (ISASCII (c) && isspace (c))
00237 # define ISUPPER(c) (ISASCII (c) && isupper (c))
00238 # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
00239
00240 # ifdef _tolower
00241 # define TOLOWER(c) _tolower(c)
00242 # else
00243 # define TOLOWER(c) tolower(c)
00244 # endif
00245
00246 # ifndef NULL
00247 # define NULL (void *)0
00248 # endif
00249
00250
00251
00252
00253
00254 # undef SIGN_EXTEND_CHAR
00255 # if __STDC__
00256 # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
00257 # else
00258
00259 # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
00260 # endif
00261
00262 # ifndef emacs
00263
00264 # define CHAR_SET_SIZE 256
00265
00266 # ifdef SYNTAX_TABLE
00267
00268 extern char *re_syntax_table;
00269
00270 # else
00271
00272 static char re_syntax_table[CHAR_SET_SIZE];
00273
00274 static void init_syntax_once (void);
00275
00276 static void
00277 init_syntax_once (void)
00278 {
00279 register int c;
00280 static int done = 0;
00281
00282 if (done)
00283 return;
00284 bzero (re_syntax_table, sizeof re_syntax_table);
00285
00286 for (c = 0; c < CHAR_SET_SIZE; ++c)
00287 if (ISALNUM (c))
00288 re_syntax_table[c] = Sword;
00289
00290 re_syntax_table['_'] = Sword;
00291
00292 done = 1;
00293 }
00294
00295 # endif
00296
00297 # define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
00298
00299 # endif
00300
00301
00302 # if !defined _LIBC && !defined HAVE_UINTPTR_T
00303 typedef unsigned long int uintptr_t;
00304 # endif
00305
00306
00307
00308
00309
00310
00311
00312
00313
00314
00315
00316 # ifdef REGEX_MALLOC
00317
00318 # define REGEX_ALLOCATE malloc
00319 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
00320 # define REGEX_FREE free
00321
00322 # else
00323
00324
00325 # ifndef alloca
00326
00327
00328 # ifdef __GNUC__
00329 # define alloca __builtin_alloca
00330 # else
00331 # if HAVE_ALLOCA_H
00332 # include <alloca.h>
00333 # endif
00334 # endif
00335
00336 # endif
00337
00338 # define REGEX_ALLOCATE alloca
00339
00340
00341 # define REGEX_REALLOCATE(source, osize, nsize) \
00342 (destination = (char *) alloca (nsize), \
00343 memcpy (destination, source, osize))
00344
00345
00346 # define REGEX_FREE(arg) ((void)0)
00347
00348 # endif
00349
00350
00351
00352 # if defined REL_ALLOC && defined REGEX_MALLOC
00353
00354 # define REGEX_ALLOCATE_STACK(size) \
00355 r_alloc (&failure_stack_ptr, (size))
00356 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
00357 r_re_alloc (&failure_stack_ptr, (nsize))
00358 # define REGEX_FREE_STACK(ptr) \
00359 r_alloc_free (&failure_stack_ptr)
00360
00361 # else
00362
00363 # ifdef REGEX_MALLOC
00364
00365 # define REGEX_ALLOCATE_STACK malloc
00366 # define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
00367 # define REGEX_FREE_STACK free
00368
00369 # else
00370
00371 # define REGEX_ALLOCATE_STACK alloca
00372
00373 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
00374 REGEX_REALLOCATE (source, osize, nsize)
00375
00376 # define REGEX_FREE_STACK(arg)
00377
00378 # endif
00379 # endif
00380
00381
00382
00383
00384
00385 # define FIRST_STRING_P(ptr) \
00386 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
00387
00388
00389 # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
00390 # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
00391 # define RETALLOC_IF(addr, n, t) \
00392 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
00393 # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
00394
00395 # define BYTEWIDTH 8
00396
00397 # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
00398
00399 # undef MAX
00400 # undef MIN
00401 # define MAX(a, b) ((a) > (b) ? (a) : (b))
00402 # define MIN(a, b) ((a) < (b) ? (a) : (b))
00403
00404 typedef char boolean;
00405 # define false 0
00406 # define true 1
00407
00408 static reg_errcode_t byte_regex_compile (const char *pattern, size_t size,
00409 reg_syntax_t syntax,
00410 struct re_pattern_buffer *bufp);
00411
00412 static int byte_re_match_2_internal (struct re_pattern_buffer *bufp,
00413 const char *string1, int size1,
00414 const char *string2, int size2,
00415 int pos,
00416 struct re_registers *regs,
00417 int stop);
00418 static int byte_re_search_2 (struct re_pattern_buffer *bufp,
00419 const char *string1, int size1,
00420 const char *string2, int size2,
00421 int startpos, int range,
00422 struct re_registers *regs, int stop);
00423 static int byte_re_compile_fastmap (struct re_pattern_buffer *bufp);
00424
00425 #ifdef MBS_SUPPORT
00426 static reg_errcode_t wcs_regex_compile (const char *pattern, size_t size,
00427 reg_syntax_t syntax,
00428 struct re_pattern_buffer *bufp);
00429
00430
00431 static int wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
00432 const char *cstring1, int csize1,
00433 const char *cstring2, int csize2,
00434 int pos,
00435 struct re_registers *regs,
00436 int stop,
00437 wchar_t *string1, int size1,
00438 wchar_t *string2, int size2,
00439 int *mbs_offset1, int *mbs_offset2);
00440 static int wcs_re_search_2 (struct re_pattern_buffer *bufp,
00441 const char *string1, int size1,
00442 const char *string2, int size2,
00443 int startpos, int range,
00444 struct re_registers *regs, int stop);
00445 static int wcs_re_compile_fastmap (struct re_pattern_buffer *bufp);
00446 #endif
00447
00448
00449
00450
00451
00452
00453 typedef enum
00454 {
00455 no_op = 0,
00456
00457
00458 succeed,
00459
00460
00461 exactn,
00462
00463 # ifdef MBS_SUPPORT
00464
00465 exactn_bin,
00466 # endif
00467
00468
00469 anychar,
00470
00471
00472
00473
00474
00475
00476
00477
00478
00479
00480
00481
00482
00483
00484 charset,
00485
00486
00487
00488 charset_not,
00489
00490
00491
00492
00493
00494
00495
00496
00497 start_memory,
00498
00499
00500
00501
00502
00503
00504
00505
00506 stop_memory,
00507
00508
00509
00510 duplicate,
00511
00512
00513 begline,
00514
00515
00516 endline,
00517
00518
00519
00520 begbuf,
00521
00522
00523 endbuf,
00524
00525
00526 jump,
00527
00528
00529 jump_past_alt,
00530
00531
00532
00533
00534 on_failure_jump,
00535
00536
00537
00538 on_failure_keep_string_jump,
00539
00540
00541
00542
00543 pop_failure_jump,
00544
00545
00546
00547
00548
00549
00550
00551
00552
00553 maybe_pop_jump,
00554
00555
00556
00557
00558
00559
00560
00561 dummy_failure_jump,
00562
00563
00564
00565 push_dummy_failure,
00566
00567
00568
00569
00570 succeed_n,
00571
00572
00573
00574
00575 jump_n,
00576
00577
00578
00579
00580
00581 set_number_at,
00582
00583 wordchar,
00584 notwordchar,
00585
00586 wordbeg,
00587 wordend,
00588
00589 wordbound,
00590 notwordbound
00591
00592 # ifdef emacs
00593 ,before_dot,
00594 at_dot,
00595 after_dot,
00596
00597
00598
00599 syntaxspec,
00600
00601
00602 notsyntaxspec
00603 # endif
00604 } re_opcode_t;
00605 #endif
00606
00607
00608 #ifdef BYTE
00609 # define CHAR_T char
00610 # define UCHAR_T unsigned char
00611 # define COMPILED_BUFFER_VAR bufp->buffer
00612 # define OFFSET_ADDRESS_SIZE 2
00613 # define PREFIX(name) byte_##name
00614 # define ARG_PREFIX(name) name
00615 # define PUT_CHAR(c) putchar (c)
00616 #else
00617 # ifdef WCHAR
00618 # define CHAR_T wchar_t
00619 # define UCHAR_T wchar_t
00620 # define COMPILED_BUFFER_VAR wc_buffer
00621 # define OFFSET_ADDRESS_SIZE 1
00622 # define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
00623 # define PREFIX(name) wcs_##name
00624 # define ARG_PREFIX(name) c##name
00625
00626 # define PUT_CHAR(c) printf ("%C", c);
00627 # define TRUE 1
00628 # define FALSE 0
00629 # else
00630 # ifdef MBS_SUPPORT
00631 # define WCHAR
00632 # define INSIDE_RECURSION
00633 # include "regex.c"
00634 # undef INSIDE_RECURSION
00635 # endif
00636 # define BYTE
00637 # define INSIDE_RECURSION
00638 # include "regex.c"
00639 # undef INSIDE_RECURSION
00640 # endif
00641 #endif
00642
00643 #ifdef INSIDE_RECURSION
00644
00645
00646
00647
00648
00649 # ifdef WCHAR
00650 # define STORE_NUMBER(destination, number) \
00651 do { \
00652 *(destination) = (UCHAR_T)(number); \
00653 } while (0)
00654 # else
00655 # define STORE_NUMBER(destination, number) \
00656 do { \
00657 (destination)[0] = (number) & 0377; \
00658 (destination)[1] = (number) >> 8; \
00659 } while (0)
00660 # endif
00661
00662
00663
00664
00665
00666
00667 # define STORE_NUMBER_AND_INCR(destination, number) \
00668 do { \
00669 STORE_NUMBER (destination, number); \
00670 (destination) += OFFSET_ADDRESS_SIZE; \
00671 } while (0)
00672
00673
00674
00675
00676
00677 # ifdef WCHAR
00678 # define EXTRACT_NUMBER(destination, source) \
00679 do { \
00680 (destination) = *(source); \
00681 } while (0)
00682 # else
00683 # define EXTRACT_NUMBER(destination, source) \
00684 do { \
00685 (destination) = *(source) & 0377; \
00686 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
00687 } while (0)
00688 # endif
00689
00690 # ifdef DEBUG
00691 static void PREFIX(extract_number) (int *dest, UCHAR_T *source);
00692 static void
00693 PREFIX(extract_number) (int *dest, UCHAR_T *source)
00694 {
00695 # ifdef WCHAR
00696 *dest = *source;
00697 # else
00698 int temp = SIGN_EXTEND_CHAR (*(source + 1));
00699 *dest = *source & 0377;
00700 *dest += temp << 8;
00701 # endif
00702 }
00703
00704 # ifndef EXTRACT_MACROS
00705 # undef EXTRACT_NUMBER
00706 # define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
00707 # endif
00708
00709 # endif
00710
00711
00712
00713
00714 # define EXTRACT_NUMBER_AND_INCR(destination, source) \
00715 do { \
00716 EXTRACT_NUMBER (destination, source); \
00717 (source) += OFFSET_ADDRESS_SIZE; \
00718 } while (0)
00719
00720 # ifdef DEBUG
00721 static void PREFIX(extract_number_and_incr) (int *destination,
00722 UCHAR_T **source);
00723 static void
00724 PREFIX(extract_number_and_incr) (int *destination, UCHAR_T **source)
00725 {
00726 PREFIX(extract_number) (destination, *source);
00727 *source += OFFSET_ADDRESS_SIZE;
00728 }
00729
00730 # ifndef EXTRACT_MACROS
00731 # undef EXTRACT_NUMBER_AND_INCR
00732 # define EXTRACT_NUMBER_AND_INCR(dest, src) \
00733 PREFIX(extract_number_and_incr) (&dest, &src)
00734 # endif
00735
00736 # endif
00737
00738
00739
00740
00741
00742
00743
00744
00745
00746 # ifdef DEBUG
00747
00748 # ifndef DEFINED_ONCE
00749
00750
00751 # include <stdio.h>
00752
00753
00754 # include <assert.h>
00755
00756 static int debug;
00757
00758 # define DEBUG_STATEMENT(e) e
00759 # define DEBUG_PRINT1(x) if (debug) printf (x)
00760 # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
00761 # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
00762 # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
00763 # endif
00764
00765 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
00766 if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
00767 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
00768 if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
00769
00770
00771
00772
00773 # ifndef DEFINED_ONCE
00774 void
00775 print_fastmap (char *fastmap)
00776 {
00777 unsigned was_a_range = 0;
00778 unsigned i = 0;
00779
00780 while (i < (1 << BYTEWIDTH))
00781 {
00782 if (fastmap[i++])
00783 {
00784 was_a_range = 0;
00785 putchar (i - 1);
00786 while (i < (1 << BYTEWIDTH) && fastmap[i])
00787 {
00788 was_a_range = 1;
00789 i++;
00790 }
00791 if (was_a_range)
00792 {
00793 printf ("-");
00794 putchar (i - 1);
00795 }
00796 }
00797 }
00798 putchar ('\n');
00799 }
00800 # endif
00801
00802
00803
00804
00805
00806 void
00807 PREFIX(print_partial_compiled_pattern) (UCHAR_T *start, UCHAR_T *end)
00808 {
00809 int mcnt, mcnt2;
00810 UCHAR_T *p1;
00811 UCHAR_T *p = start;
00812 UCHAR_T *pend = end;
00813
00814 if (start == NULL)
00815 {
00816 printf ("(null)\n");
00817 return;
00818 }
00819
00820
00821 while (p < pend)
00822 {
00823 # ifdef _LIBC
00824 printf ("%td:\t", p - start);
00825 # else
00826 printf ("%ld:\t", (long int) (p - start));
00827 # endif
00828
00829 switch ((re_opcode_t) *p++)
00830 {
00831 case no_op:
00832 printf ("/no_op");
00833 break;
00834
00835 case exactn:
00836 mcnt = *p++;
00837 printf ("/exactn/%d", mcnt);
00838 do
00839 {
00840 putchar ('/');
00841 PUT_CHAR (*p++);
00842 }
00843 while (--mcnt);
00844 break;
00845
00846 # ifdef MBS_SUPPORT
00847 case exactn_bin:
00848 mcnt = *p++;
00849 printf ("/exactn_bin/%d", mcnt);
00850 do
00851 {
00852 printf("/%lx", (long int) *p++);
00853 }
00854 while (--mcnt);
00855 break;
00856 # endif
00857
00858 case start_memory:
00859 mcnt = *p++;
00860 printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
00861 break;
00862
00863 case stop_memory:
00864 mcnt = *p++;
00865 printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
00866 break;
00867
00868 case duplicate:
00869 printf ("/duplicate/%ld", (long int) *p++);
00870 break;
00871
00872 case anychar:
00873 printf ("/anychar");
00874 break;
00875
00876 case charset:
00877 case charset_not:
00878 {
00879 # ifdef WCHAR
00880 int i, length;
00881 wchar_t *workp = p;
00882 printf ("/charset [%s",
00883 (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
00884 p += 5;
00885 length = *workp++;
00886 for (i=0 ; i<length ; i++)
00887 printf("[:%lx:]", (long int) *p++);
00888 length = *workp++;
00889 for (i=0 ; i<length ;)
00890 {
00891 printf("[.");
00892 while(*p != 0)
00893 PUT_CHAR((i++,*p++));
00894 i++,p++;
00895 printf(".]");
00896 }
00897 length = *workp++;
00898 for (i=0 ; i<length ;)
00899 {
00900 printf("[=");
00901 while(*p != 0)
00902 PUT_CHAR((i++,*p++));
00903 i++,p++;
00904 printf("=]");
00905 }
00906 length = *workp++;
00907 for (i=0 ; i<length ; i++)
00908 {
00909 wchar_t range_start = *p++;
00910 wchar_t range_end = *p++;
00911 printf("%C-%C", range_start, range_end);
00912 }
00913 length = *workp++;
00914 for (i=0 ; i<length ; i++)
00915 printf("%C", *p++);
00916 putchar (']');
00917 # else
00918 register int c, last = -100;
00919 register int in_range = 0;
00920
00921 printf ("/charset [%s",
00922 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
00923
00924 assert (p + *p < pend);
00925
00926 for (c = 0; c < 256; c++)
00927 if (c / 8 < *p
00928 && (p[1 + (c/8)] & (1 << (c % 8))))
00929 {
00930
00931 if (last + 1 == c && ! in_range)
00932 {
00933 putchar ('-');
00934 in_range = 1;
00935 }
00936
00937 else if (last + 1 != c && in_range)
00938 {
00939 putchar (last);
00940 in_range = 0;
00941 }
00942
00943 if (! in_range)
00944 putchar (c);
00945
00946 last = c;
00947 }
00948
00949 if (in_range)
00950 putchar (last);
00951
00952 putchar (']');
00953
00954 p += 1 + *p;
00955 # endif
00956 }
00957 break;
00958
00959 case begline:
00960 printf ("/begline");
00961 break;
00962
00963 case endline:
00964 printf ("/endline");
00965 break;
00966
00967 case on_failure_jump:
00968 PREFIX(extract_number_and_incr) (&mcnt, &p);
00969 # ifdef _LIBC
00970 printf ("/on_failure_jump to %td", p + mcnt - start);
00971 # else
00972 printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
00973 # endif
00974 break;
00975
00976 case on_failure_keep_string_jump:
00977 PREFIX(extract_number_and_incr) (&mcnt, &p);
00978 # ifdef _LIBC
00979 printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
00980 # else
00981 printf ("/on_failure_keep_string_jump to %ld",
00982 (long int) (p + mcnt - start));
00983 # endif
00984 break;
00985
00986 case dummy_failure_jump:
00987 PREFIX(extract_number_and_incr) (&mcnt, &p);
00988 # ifdef _LIBC
00989 printf ("/dummy_failure_jump to %td", p + mcnt - start);
00990 # else
00991 printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
00992 # endif
00993 break;
00994
00995 case push_dummy_failure:
00996 printf ("/push_dummy_failure");
00997 break;
00998
00999 case maybe_pop_jump:
01000 PREFIX(extract_number_and_incr) (&mcnt, &p);
01001 # ifdef _LIBC
01002 printf ("/maybe_pop_jump to %td", p + mcnt - start);
01003 # else
01004 printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
01005 # endif
01006 break;
01007
01008 case pop_failure_jump:
01009 PREFIX(extract_number_and_incr) (&mcnt, &p);
01010 # ifdef _LIBC
01011 printf ("/pop_failure_jump to %td", p + mcnt - start);
01012 # else
01013 printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
01014 # endif
01015 break;
01016
01017 case jump_past_alt:
01018 PREFIX(extract_number_and_incr) (&mcnt, &p);
01019 # ifdef _LIBC
01020 printf ("/jump_past_alt to %td", p + mcnt - start);
01021 # else
01022 printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
01023 # endif
01024 break;
01025
01026 case jump:
01027 PREFIX(extract_number_and_incr) (&mcnt, &p);
01028 # ifdef _LIBC
01029 printf ("/jump to %td", p + mcnt - start);
01030 # else
01031 printf ("/jump to %ld", (long int) (p + mcnt - start));
01032 # endif
01033 break;
01034
01035 case succeed_n:
01036 PREFIX(extract_number_and_incr) (&mcnt, &p);
01037 p1 = p + mcnt;
01038 PREFIX(extract_number_and_incr) (&mcnt2, &p);
01039 # ifdef _LIBC
01040 printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
01041 # else
01042 printf ("/succeed_n to %ld, %d times",
01043 (long int) (p1 - start), mcnt2);
01044 # endif
01045 break;
01046
01047 case jump_n:
01048 PREFIX(extract_number_and_incr) (&mcnt, &p);
01049 p1 = p + mcnt;
01050 PREFIX(extract_number_and_incr) (&mcnt2, &p);
01051 printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
01052 break;
01053
01054 case set_number_at:
01055 PREFIX(extract_number_and_incr) (&mcnt, &p);
01056 p1 = p + mcnt;
01057 PREFIX(extract_number_and_incr) (&mcnt2, &p);
01058 # ifdef _LIBC
01059 printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
01060 # else
01061 printf ("/set_number_at location %ld to %d",
01062 (long int) (p1 - start), mcnt2);
01063 # endif
01064 break;
01065
01066 case wordbound:
01067 printf ("/wordbound");
01068 break;
01069
01070 case notwordbound:
01071 printf ("/notwordbound");
01072 break;
01073
01074 case wordbeg:
01075 printf ("/wordbeg");
01076 break;
01077
01078 case wordend:
01079 printf ("/wordend");
01080 break;
01081
01082 # ifdef emacs
01083 case before_dot:
01084 printf ("/before_dot");
01085 break;
01086
01087 case at_dot:
01088 printf ("/at_dot");
01089 break;
01090
01091 case after_dot:
01092 printf ("/after_dot");
01093 break;
01094
01095 case syntaxspec:
01096 printf ("/syntaxspec");
01097 mcnt = *p++;
01098 printf ("/%d", mcnt);
01099 break;
01100
01101 case notsyntaxspec:
01102 printf ("/notsyntaxspec");
01103 mcnt = *p++;
01104 printf ("/%d", mcnt);
01105 break;
01106 # endif
01107
01108 case wordchar:
01109 printf ("/wordchar");
01110 break;
01111
01112 case notwordchar:
01113 printf ("/notwordchar");
01114 break;
01115
01116 case begbuf:
01117 printf ("/begbuf");
01118 break;
01119
01120 case endbuf:
01121 printf ("/endbuf");
01122 break;
01123
01124 default:
01125 printf ("?%ld", (long int) *(p-1));
01126 }
01127
01128 putchar ('\n');
01129 }
01130
01131 # ifdef _LIBC
01132 printf ("%td:\tend of pattern.\n", p - start);
01133 # else
01134 printf ("%ld:\tend of pattern.\n", (long int) (p - start));
01135 # endif
01136 }
01137
01138
01139 void
01140 PREFIX(print_compiled_pattern) (struct re_pattern_buffer *bufp)
01141 {
01142 UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
01143
01144 PREFIX(print_partial_compiled_pattern) (buffer, buffer
01145 + bufp->used / sizeof(UCHAR_T));
01146 printf ("%ld bytes used/%ld bytes allocated.\n",
01147 bufp->used, bufp->allocated);
01148
01149 if (bufp->fastmap_accurate && bufp->fastmap)
01150 {
01151 printf ("fastmap: ");
01152 print_fastmap (bufp->fastmap);
01153 }
01154
01155 # ifdef _LIBC
01156 printf ("re_nsub: %Zd\t", bufp->re_nsub);
01157 # else
01158 printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
01159 # endif
01160 printf ("regs_alloc: %d\t", bufp->regs_allocated);
01161 printf ("can_be_null: %d\t", bufp->can_be_null);
01162 printf ("newline_anchor: %d\n", bufp->newline_anchor);
01163 printf ("no_sub: %d\t", bufp->no_sub);
01164 printf ("not_bol: %d\t", bufp->not_bol);
01165 printf ("not_eol: %d\t", bufp->not_eol);
01166 printf ("syntax: %lx\n", bufp->syntax);
01167
01168 }
01169
01170
01171 void
01172 PREFIX(print_double_string) (const CHAR_T *where, const CHAR_T *string1,
01173 int size1, const CHAR_T *string2, int size2)
01174 {
01175 int this_char;
01176
01177 if (where == NULL)
01178 printf ("(null)");
01179 else
01180 {
01181 int cnt;
01182
01183 if (FIRST_STRING_P (where))
01184 {
01185 for (this_char = where - string1; this_char < size1; this_char++)
01186 PUT_CHAR (string1[this_char]);
01187
01188 where = string2;
01189 }
01190
01191 cnt = 0;
01192 for (this_char = where - string2; this_char < size2; this_char++)
01193 {
01194 PUT_CHAR (string2[this_char]);
01195 if (++cnt > 100)
01196 {
01197 fputs ("...", stdout);
01198 break;
01199 }
01200 }
01201 }
01202 }
01203
01204 # ifndef DEFINED_ONCE
01205 void
01206 printchar (int c)
01207 {
01208 putc (c, stderr);
01209 }
01210 # endif
01211
01212 # else
01213
01214 # ifndef DEFINED_ONCE
01215 # undef assert
01216 # define assert(e)
01217
01218 # define DEBUG_STATEMENT(e)
01219 # define DEBUG_PRINT1(x)
01220 # define DEBUG_PRINT2(x1, x2)
01221 # define DEBUG_PRINT3(x1, x2, x3)
01222 # define DEBUG_PRINT4(x1, x2, x3, x4)
01223 # endif
01224 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
01225 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
01226
01227 # endif
01228
01229
01230
01231 # ifdef WCHAR
01232
01233
01234
01235
01236
01237
01238
01239 static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
01240 size_t len, int *offset_buffer,
01241 char *is_binary);
01242 static size_t
01243 convert_mbs_to_wcs (CHAR_T *dest, const unsigned char*src, size_t len,
01244 int *offset_buffer, char *is_binary)
01245
01246
01247
01248
01249
01250
01251
01252
01253
01254 {
01255 wchar_t *pdest = dest;
01256 const unsigned char *psrc = src;
01257 size_t wc_count = 0;
01258
01259 mbstate_t mbs;
01260 int i, consumed;
01261 size_t mb_remain = len;
01262 size_t mb_count = 0;
01263
01264
01265 memset (&mbs, 0, sizeof (mbstate_t));
01266
01267 offset_buffer[0] = 0;
01268 for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
01269 psrc += consumed)
01270 {
01271 #ifdef _LIBC
01272 consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs);
01273 #else
01274 consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
01275 #endif
01276
01277 if (consumed <= 0)
01278
01279
01280 {
01281 *pdest = *psrc;
01282 consumed = 1;
01283 is_binary[wc_count] = TRUE;
01284 }
01285 else
01286 is_binary[wc_count] = FALSE;
01287
01288
01289
01290
01291 if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
01292 *pdest = (wchar_t) *psrc;
01293
01294 offset_buffer[wc_count + 1] = mb_count += consumed;
01295 }
01296
01297
01298 for (i = wc_count + 1 ; i <= len ; i++)
01299 offset_buffer[i] = mb_count + 1;
01300
01301 return wc_count;
01302 }
01303
01304 # endif
01305
01306 #else
01307
01308
01309
01310
01311
01312
01313 reg_syntax_t re_syntax_options;
01314
01315
01316
01317
01318
01319
01320
01321
01322
01323 reg_syntax_t
01324 re_set_syntax (reg_syntax_t syntax)
01325 {
01326 reg_syntax_t ret = re_syntax_options;
01327
01328 re_syntax_options = syntax;
01329 # ifdef DEBUG
01330 if (syntax & RE_DEBUG)
01331 debug = 1;
01332 else if (debug)
01333 debug = 0;
01334 # endif
01335 return ret;
01336 }
01337 # ifdef _LIBC
01338 weak_alias (__re_set_syntax, re_set_syntax)
01339 # endif
01340
01341
01342
01343
01344
01345
01346 static const char *re_error_msgid[] =
01347 {
01348 gettext_noop ("Success"),
01349 gettext_noop ("No match"),
01350 gettext_noop ("Invalid regular expression"),
01351 gettext_noop ("Invalid collation character"),
01352 gettext_noop ("Invalid character class name"),
01353 gettext_noop ("Trailing backslash"),
01354 gettext_noop ("Invalid back reference"),
01355 gettext_noop ("Unmatched [ or [^"),
01356 gettext_noop ("Unmatched ( or \\("),
01357 gettext_noop ("Unmatched \\{"),
01358 gettext_noop ("Invalid content of \\{\\}"),
01359 gettext_noop ("Invalid range end"),
01360 gettext_noop ("Memory exhausted"),
01361 gettext_noop ("Invalid preceding regular expression"),
01362 gettext_noop ("Premature end of regular expression"),
01363 gettext_noop ("Regular expression too big"),
01364 gettext_noop ("Unmatched ) or \\)")
01365 };
01366
01367 #endif
01368
01369 #ifndef DEFINED_ONCE
01370
01371
01372
01373
01374
01375
01376
01377
01378
01379
01380
01381
01382
01383
01384
01385
01386
01387
01388
01389
01390 # define MATCH_MAY_ALLOCATE
01391
01392
01393
01394 # ifdef __GNUC__
01395 # undef C_ALLOCA
01396 # endif
01397
01398
01399
01400
01401
01402
01403 # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
01404 # undef MATCH_MAY_ALLOCATE
01405 # endif
01406 #endif
01407
01408 #ifdef INSIDE_RECURSION
01409
01410
01411
01412
01413
01414
01415
01416
01417 # ifndef INIT_FAILURE_ALLOC
01418 # define INIT_FAILURE_ALLOC 5
01419 # endif
01420
01421
01422
01423
01424
01425
01426 # ifdef INT_IS_16BIT
01427
01428 # ifndef DEFINED_ONCE
01429 # if defined MATCH_MAY_ALLOCATE
01430
01431
01432 long int re_max_failures = 4000;
01433 # else
01434 long int re_max_failures = 2000;
01435 # endif
01436 # endif
01437
01438 union PREFIX(fail_stack_elt)
01439 {
01440 UCHAR_T *pointer;
01441 long int integer;
01442 };
01443
01444 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
01445
01446 typedef struct
01447 {
01448 PREFIX(fail_stack_elt_t) *stack;
01449 unsigned long int size;
01450 unsigned long int avail;
01451 } PREFIX(fail_stack_type);
01452
01453 # else
01454
01455 # ifndef DEFINED_ONCE
01456 # if defined MATCH_MAY_ALLOCATE
01457
01458
01459 int re_max_failures = 4000;
01460 # else
01461 int re_max_failures = 2000;
01462 # endif
01463 # endif
01464
01465 union PREFIX(fail_stack_elt)
01466 {
01467 UCHAR_T *pointer;
01468 int integer;
01469 };
01470
01471 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
01472
01473 typedef struct
01474 {
01475 PREFIX(fail_stack_elt_t) *stack;
01476 unsigned size;
01477 unsigned avail;
01478 } PREFIX(fail_stack_type);
01479
01480 # endif
01481
01482 # ifndef DEFINED_ONCE
01483 # define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
01484 # define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
01485 # define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
01486 # endif
01487
01488
01489
01490
01491
01492 # ifdef MATCH_MAY_ALLOCATE
01493 # define INIT_FAIL_STACK() \
01494 do { \
01495 fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \
01496 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
01497 \
01498 if (fail_stack.stack == NULL) \
01499 return -2; \
01500 \
01501 fail_stack.size = INIT_FAILURE_ALLOC; \
01502 fail_stack.avail = 0; \
01503 } while (0)
01504
01505 # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
01506 # else
01507 # define INIT_FAIL_STACK() \
01508 do { \
01509 fail_stack.avail = 0; \
01510 } while (0)
01511
01512 # define RESET_FAIL_STACK()
01513 # endif
01514
01515
01516
01517
01518
01519
01520
01521
01522
01523 # define DOUBLE_FAIL_STACK(fail_stack) \
01524 ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
01525 ? 0 \
01526 : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \
01527 REGEX_REALLOCATE_STACK ((fail_stack).stack, \
01528 (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \
01529 ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
01530 \
01531 (fail_stack).stack == NULL \
01532 ? 0 \
01533 : ((fail_stack).size <<= 1, \
01534 1)))
01535
01536
01537
01538
01539
01540 # define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
01541 ((FAIL_STACK_FULL () \
01542 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
01543 ? 0 \
01544 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
01545 1))
01546
01547
01548
01549
01550 # define PUSH_FAILURE_POINTER(item) \
01551 fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
01552
01553
01554
01555
01556 # define PUSH_FAILURE_INT(item) \
01557 fail_stack.stack[fail_stack.avail++].integer = (item)
01558
01559
01560
01561
01562 # define PUSH_FAILURE_ELT(item) \
01563 fail_stack.stack[fail_stack.avail++] = (item)
01564
01565
01566
01567 # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
01568 # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
01569 # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
01570
01571
01572 # ifdef DEBUG
01573 # define DEBUG_PUSH PUSH_FAILURE_INT
01574 # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
01575 # else
01576 # define DEBUG_PUSH(item)
01577 # define DEBUG_POP(item_addr)
01578 # endif
01579
01580
01581
01582
01583
01584
01585
01586
01587
01588
01589
01590 # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
01591 do { \
01592 char *destination; \
01593
01594 \
01595
01596
01597 \
01598 active_reg_t this_reg; \
01599 \
01600 DEBUG_STATEMENT (failure_id++); \
01601 DEBUG_STATEMENT (nfailure_points_pushed++); \
01602 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
01603 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
01604 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
01605 \
01606 DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
01607 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
01608 \
01609 \
01610 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
01611 { \
01612 if (!DOUBLE_FAIL_STACK (fail_stack)) \
01613 return failure_code; \
01614 \
01615 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
01616 (fail_stack).size); \
01617 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
01618 } \
01619 \
01620 \
01621 DEBUG_PRINT1 ("\n"); \
01622 \
01623 if (1) \
01624 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
01625 this_reg++) \
01626 { \
01627 DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
01628 DEBUG_STATEMENT (num_regs_pushed++); \
01629 \
01630 DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
01631 PUSH_FAILURE_POINTER (regstart[this_reg]); \
01632 \
01633 DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
01634 PUSH_FAILURE_POINTER (regend[this_reg]); \
01635 \
01636 DEBUG_PRINT2 (" info: %p\n ", \
01637 reg_info[this_reg].word.pointer); \
01638 DEBUG_PRINT2 (" match_null=%d", \
01639 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
01640 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
01641 DEBUG_PRINT2 (" matched_something=%d", \
01642 MATCHED_SOMETHING (reg_info[this_reg])); \
01643 DEBUG_PRINT2 (" ever_matched=%d", \
01644 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
01645 DEBUG_PRINT1 ("\n"); \
01646 PUSH_FAILURE_ELT (reg_info[this_reg].word); \
01647 } \
01648 \
01649 DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
01650 PUSH_FAILURE_INT (lowest_active_reg); \
01651 \
01652 DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
01653 PUSH_FAILURE_INT (highest_active_reg); \
01654 \
01655 DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
01656 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
01657 PUSH_FAILURE_POINTER (pattern_place); \
01658 \
01659 DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
01660 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
01661 size2); \
01662 DEBUG_PRINT1 ("'\n"); \
01663 PUSH_FAILURE_POINTER (string_place); \
01664 \
01665 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
01666 DEBUG_PUSH (failure_id); \
01667 } while (0)
01668
01669 # ifndef DEFINED_ONCE
01670
01671
01672 # define NUM_REG_ITEMS 3
01673
01674
01675 # ifdef DEBUG
01676 # define NUM_NONREG_ITEMS 5
01677 # else
01678 # define NUM_NONREG_ITEMS 4
01679 # endif
01680
01681
01682
01683
01684
01685 # define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
01686
01687
01688 # define NUM_FAILURE_ITEMS \
01689 (((0 \
01690 ? 0 : highest_active_reg - lowest_active_reg + 1) \
01691 * NUM_REG_ITEMS) \
01692 + NUM_NONREG_ITEMS)
01693
01694
01695 # define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
01696 # endif
01697
01698
01699
01700
01701
01702
01703
01704
01705
01706
01707
01708
01709
01710 # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
01711 { \
01712 DEBUG_STATEMENT (unsigned failure_id;) \
01713 active_reg_t this_reg; \
01714 const UCHAR_T *string_temp; \
01715 \
01716 assert (!FAIL_STACK_EMPTY ()); \
01717 \
01718 \
01719 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
01720 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
01721 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
01722 \
01723 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
01724 \
01725 DEBUG_POP (&failure_id); \
01726 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
01727 \
01728
01729
01730 \
01731 string_temp = POP_FAILURE_POINTER (); \
01732 if (string_temp != NULL) \
01733 str = (const CHAR_T *) string_temp; \
01734 \
01735 DEBUG_PRINT2 (" Popping string %p: `", str); \
01736 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
01737 DEBUG_PRINT1 ("'\n"); \
01738 \
01739 pat = (UCHAR_T *) POP_FAILURE_POINTER (); \
01740 DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
01741 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
01742 \
01743 \
01744 high_reg = (active_reg_t) POP_FAILURE_INT (); \
01745 DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
01746 \
01747 low_reg = (active_reg_t) POP_FAILURE_INT (); \
01748 DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
01749 \
01750 if (1) \
01751 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
01752 { \
01753 DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
01754 \
01755 reg_info[this_reg].word = POP_FAILURE_ELT (); \
01756 DEBUG_PRINT2 (" info: %p\n", \
01757 reg_info[this_reg].word.pointer); \
01758 \
01759 regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
01760 DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
01761 \
01762 regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
01763 DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
01764 } \
01765 else \
01766 { \
01767 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
01768 { \
01769 reg_info[this_reg].word.integer = 0; \
01770 regend[this_reg] = 0; \
01771 regstart[this_reg] = 0; \
01772 } \
01773 highest_active_reg = high_reg; \
01774 } \
01775 \
01776 set_regs_matched_done = 0; \
01777 DEBUG_STATEMENT (nfailure_points_popped++); \
01778 }
01779
01780
01781
01782
01783
01784
01785
01786
01787
01788
01789
01790
01791
01792
01793
01794 typedef union
01795 {
01796 PREFIX(fail_stack_elt_t) word;
01797 struct
01798 {
01799
01800
01801 # define MATCH_NULL_UNSET_VALUE 3
01802 unsigned match_null_string_p : 2;
01803 unsigned is_active : 1;
01804 unsigned matched_something : 1;
01805 unsigned ever_matched_something : 1;
01806 } bits;
01807 } PREFIX(register_info_type);
01808
01809 # ifndef DEFINED_ONCE
01810 # define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
01811 # define IS_ACTIVE(R) ((R).bits.is_active)
01812 # define MATCHED_SOMETHING(R) ((R).bits.matched_something)
01813 # define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
01814
01815
01816
01817
01818
01819 # define SET_REGS_MATCHED() \
01820 do \
01821 { \
01822 if (!set_regs_matched_done) \
01823 { \
01824 active_reg_t r; \
01825 set_regs_matched_done = 1; \
01826 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
01827 { \
01828 MATCHED_SOMETHING (reg_info[r]) \
01829 = EVER_MATCHED_SOMETHING (reg_info[r]) \
01830 = 1; \
01831 } \
01832 } \
01833 } \
01834 while (0)
01835 # endif
01836
01837
01838 static CHAR_T PREFIX(reg_unset_dummy);
01839 # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
01840 # define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
01841
01842
01843 static void PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg);
01844 static void PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc,
01845 int arg1, int arg2);
01846 static void PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc,
01847 int arg, UCHAR_T *end);
01848 static void PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc,
01849 int arg1, int arg2, UCHAR_T *end);
01850 static boolean PREFIX(at_begline_loc_p) (const CHAR_T *pattern,
01851 const CHAR_T *p,
01852 reg_syntax_t syntax);
01853 static boolean PREFIX(at_endline_loc_p) (const CHAR_T *p,
01854 const CHAR_T *pend,
01855 reg_syntax_t syntax);
01856 # ifdef WCHAR
01857 static reg_errcode_t wcs_compile_range (CHAR_T range_start,
01858 const CHAR_T **p_ptr,
01859 const CHAR_T *pend,
01860 char *translate,
01861 reg_syntax_t syntax,
01862 UCHAR_T *b,
01863 CHAR_T *char_set);
01864 static void insert_space (int num, CHAR_T *loc, CHAR_T *end);
01865 # else
01866 static reg_errcode_t byte_compile_range (unsigned int range_start,
01867 const char **p_ptr,
01868 const char *pend,
01869 char *translate,
01870 reg_syntax_t syntax,
01871 unsigned char *b);
01872 # endif
01873
01874
01875
01876
01877
01878
01879
01880
01881 # ifndef PATFETCH
01882 # ifdef WCHAR
01883 # define PATFETCH(c) \
01884 do {if (p == pend) return REG_EEND; \
01885 c = (UCHAR_T) *p++; \
01886 if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \
01887 } while (0)
01888 # else
01889 # define PATFETCH(c) \
01890 do {if (p == pend) return REG_EEND; \
01891 c = (unsigned char) *p++; \
01892 if (translate) c = (unsigned char) translate[c]; \
01893 } while (0)
01894 # endif
01895 # endif
01896
01897
01898
01899 # define PATFETCH_RAW(c) \
01900 do {if (p == pend) return REG_EEND; \
01901 c = (UCHAR_T) *p++; \
01902 } while (0)
01903
01904
01905 # define PATUNFETCH p--
01906
01907
01908
01909
01910
01911
01912
01913
01914
01915
01916 # ifndef TRANSLATE
01917 # ifdef WCHAR
01918 # define TRANSLATE(d) \
01919 ((translate && ((UCHAR_T) (d)) <= 0xff) \
01920 ? (char) translate[(unsigned char) (d)] : (d))
01921 # else
01922 # define TRANSLATE(d) \
01923 (translate ? (char) translate[(unsigned char) (d)] : (char) (d))
01924 # endif
01925 # endif
01926
01927
01928
01929
01930
01931 # define INIT_BUF_SIZE (32 * sizeof(UCHAR_T))
01932
01933
01934 # ifdef WCHAR
01935 # define GET_BUFFER_SPACE(n) \
01936 while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \
01937 + (n)*sizeof(CHAR_T)) > bufp->allocated) \
01938 EXTEND_BUFFER ()
01939 # else
01940 # define GET_BUFFER_SPACE(n) \
01941 while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
01942 EXTEND_BUFFER ()
01943 # endif
01944
01945
01946 # define BUF_PUSH(c) \
01947 do { \
01948 GET_BUFFER_SPACE (1); \
01949 *b++ = (UCHAR_T) (c); \
01950 } while (0)
01951
01952
01953
01954 # define BUF_PUSH_2(c1, c2) \
01955 do { \
01956 GET_BUFFER_SPACE (2); \
01957 *b++ = (UCHAR_T) (c1); \
01958 *b++ = (UCHAR_T) (c2); \
01959 } while (0)
01960
01961
01962
01963 # define BUF_PUSH_3(c1, c2, c3) \
01964 do { \
01965 GET_BUFFER_SPACE (3); \
01966 *b++ = (UCHAR_T) (c1); \
01967 *b++ = (UCHAR_T) (c2); \
01968 *b++ = (UCHAR_T) (c3); \
01969 } while (0)
01970
01971
01972
01973 # define STORE_JUMP(op, loc, to) \
01974 PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
01975
01976
01977 # define STORE_JUMP2(op, loc, to, arg) \
01978 PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
01979
01980
01981 # define INSERT_JUMP(op, loc, to) \
01982 PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
01983
01984
01985 # define INSERT_JUMP2(op, loc, to, arg) \
01986 PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
01987 arg, b)
01988
01989
01990
01991
01992
01993
01994
01995
01996
01997 # ifndef DEFINED_ONCE
01998 # if defined _MSC_VER && !defined WIN32
01999
02000
02001
02002 # define MAX_BUF_SIZE 65500L
02003 # define REALLOC(p,s) realloc ((p), (size_t) (s))
02004 # else
02005 # define MAX_BUF_SIZE (1L << 16)
02006 # define REALLOC(p,s) realloc ((p), (s))
02007 # endif
02008
02009
02010
02011
02012
02013 # if __BOUNDED_POINTERS__
02014 # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
02015 # define MOVE_BUFFER_POINTER(P) \
02016 (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
02017 # define ELSE_EXTEND_BUFFER_HIGH_BOUND \
02018 else \
02019 { \
02020 SET_HIGH_BOUND (b); \
02021 SET_HIGH_BOUND (begalt); \
02022 if (fixup_alt_jump) \
02023 SET_HIGH_BOUND (fixup_alt_jump); \
02024 if (laststart) \
02025 SET_HIGH_BOUND (laststart); \
02026 if (pending_exact) \
02027 SET_HIGH_BOUND (pending_exact); \
02028 }
02029 # else
02030 # define MOVE_BUFFER_POINTER(P) (P) += incr
02031 # define ELSE_EXTEND_BUFFER_HIGH_BOUND
02032 # endif
02033 # endif
02034
02035 # ifdef WCHAR
02036 # define EXTEND_BUFFER() \
02037 do { \
02038 UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
02039 int wchar_count; \
02040 if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE) \
02041 return REG_ESIZE; \
02042 bufp->allocated <<= 1; \
02043 if (bufp->allocated > MAX_BUF_SIZE) \
02044 bufp->allocated = MAX_BUF_SIZE; \
02045 \
02046 wchar_count = bufp->allocated / sizeof(UCHAR_T); \
02047 if (wchar_count == 0) wchar_count = 1; \
02048 \
02049 bufp->allocated = wchar_count * sizeof(UCHAR_T); \
02050 RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T); \
02051 bufp->buffer = (char*)COMPILED_BUFFER_VAR; \
02052 if (COMPILED_BUFFER_VAR == NULL) \
02053 return REG_ESPACE; \
02054 \
02055 if (old_buffer != COMPILED_BUFFER_VAR) \
02056 { \
02057 int incr = COMPILED_BUFFER_VAR - old_buffer; \
02058 MOVE_BUFFER_POINTER (b); \
02059 MOVE_BUFFER_POINTER (begalt); \
02060 if (fixup_alt_jump) \
02061 MOVE_BUFFER_POINTER (fixup_alt_jump); \
02062 if (laststart) \
02063 MOVE_BUFFER_POINTER (laststart); \
02064 if (pending_exact) \
02065 MOVE_BUFFER_POINTER (pending_exact); \
02066 } \
02067 ELSE_EXTEND_BUFFER_HIGH_BOUND \
02068 } while (0)
02069 # else
02070 # define EXTEND_BUFFER() \
02071 do { \
02072 UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
02073 if (bufp->allocated == MAX_BUF_SIZE) \
02074 return REG_ESIZE; \
02075 bufp->allocated <<= 1; \
02076 if (bufp->allocated > MAX_BUF_SIZE) \
02077 bufp->allocated = MAX_BUF_SIZE; \
02078 bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR, \
02079 bufp->allocated); \
02080 if (COMPILED_BUFFER_VAR == NULL) \
02081 return REG_ESPACE; \
02082 \
02083 if (old_buffer != COMPILED_BUFFER_VAR) \
02084 { \
02085 int incr = COMPILED_BUFFER_VAR - old_buffer; \
02086 MOVE_BUFFER_POINTER (b); \
02087 MOVE_BUFFER_POINTER (begalt); \
02088 if (fixup_alt_jump) \
02089 MOVE_BUFFER_POINTER (fixup_alt_jump); \
02090 if (laststart) \
02091 MOVE_BUFFER_POINTER (laststart); \
02092 if (pending_exact) \
02093 MOVE_BUFFER_POINTER (pending_exact); \
02094 } \
02095 ELSE_EXTEND_BUFFER_HIGH_BOUND \
02096 } while (0)
02097 # endif
02098
02099 # ifndef DEFINED_ONCE
02100
02101
02102
02103 # define MAX_REGNUM 255
02104
02105
02106
02107 typedef unsigned regnum_t;
02108
02109
02110
02111
02112
02113
02114
02115 typedef long pattern_offset_t;
02116
02117 typedef struct
02118 {
02119 pattern_offset_t begalt_offset;
02120 pattern_offset_t fixup_alt_jump;
02121 pattern_offset_t inner_group_offset;
02122 pattern_offset_t laststart_offset;
02123 regnum_t regnum;
02124 } compile_stack_elt_t;
02125
02126
02127 typedef struct
02128 {
02129 compile_stack_elt_t *stack;
02130 unsigned size;
02131 unsigned avail;
02132 } compile_stack_type;
02133
02134
02135 # define INIT_COMPILE_STACK_SIZE 32
02136
02137 # define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
02138 # define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
02139
02140
02141 # define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
02142
02143 # endif
02144
02145
02146 # ifndef DEFINED_ONCE
02147 # define SET_LIST_BIT(c) \
02148 (b[((unsigned char) (c)) / BYTEWIDTH] \
02149 |= 1 << (((unsigned char) c) % BYTEWIDTH))
02150 # endif
02151
02152
02153 # define GET_UNSIGNED_NUMBER(num) \
02154 { \
02155 while (p != pend) \
02156 { \
02157 PATFETCH (c); \
02158 if (c < '0' || c > '9') \
02159 break; \
02160 if (num <= RE_DUP_MAX) \
02161 { \
02162 if (num < 0) \
02163 num = 0; \
02164 num = num * 10 + c - '0'; \
02165 } \
02166 } \
02167 }
02168
02169 # ifndef DEFINED_ONCE
02170 # if defined _LIBC || WIDE_CHAR_SUPPORT
02171
02172
02173 # ifdef CHARCLASS_NAME_MAX
02174 # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
02175 # else
02176
02177
02178 # define CHAR_CLASS_MAX_LENGTH 256
02179 # endif
02180
02181 # ifdef _LIBC
02182 # define IS_CHAR_CLASS(string) __wctype (string)
02183 # else
02184 # define IS_CHAR_CLASS(string) wctype (string)
02185 # endif
02186 # else
02187 # define CHAR_CLASS_MAX_LENGTH 6
02188
02189 # define IS_CHAR_CLASS(string) \
02190 (STREQ (string, "alpha") || STREQ (string, "upper") \
02191 || STREQ (string, "lower") || STREQ (string, "digit") \
02192 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
02193 || STREQ (string, "space") || STREQ (string, "print") \
02194 || STREQ (string, "punct") || STREQ (string, "graph") \
02195 || STREQ (string, "cntrl") || STREQ (string, "blank"))
02196 # endif
02197 # endif
02198
02199 # ifndef MATCH_MAY_ALLOCATE
02200
02201
02202
02203
02204
02205
02206
02207
02208 static PREFIX(fail_stack_type) fail_stack;
02209
02210
02211
02212
02213 # ifdef DEFINED_ONCE
02214 static int regs_allocated_size;
02215
02216 static const char ** regstart, ** regend;
02217 static const char ** old_regstart, ** old_regend;
02218 static const char **best_regstart, **best_regend;
02219 static const char **reg_dummy;
02220 # endif
02221
02222 static PREFIX(register_info_type) *PREFIX(reg_info);
02223 static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
02224
02225
02226
02227
02228 static void
02229 PREFIX(regex_grow_registers) (int num_regs)
02230 {
02231 if (num_regs > regs_allocated_size)
02232 {
02233 RETALLOC_IF (regstart, num_regs, const char *);
02234 RETALLOC_IF (regend, num_regs, const char *);
02235 RETALLOC_IF (old_regstart, num_regs, const char *);
02236 RETALLOC_IF (old_regend, num_regs, const char *);
02237 RETALLOC_IF (best_regstart, num_regs, const char *);
02238 RETALLOC_IF (best_regend, num_regs, const char *);
02239 RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
02240 RETALLOC_IF (reg_dummy, num_regs, const char *);
02241 RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
02242
02243 regs_allocated_size = num_regs;
02244 }
02245 }
02246
02247 # endif
02248
02249 # ifndef DEFINED_ONCE
02250 static boolean group_in_compile_stack (compile_stack_type compile_stack,
02251 regnum_t regnum);
02252 # endif
02253
02254
02255
02256
02257
02258
02259
02260
02261
02262
02263
02264
02265
02266
02267
02268
02269
02270
02271
02272
02273 # ifdef WCHAR
02274 # define FREE_STACK_RETURN(value) \
02275 return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
02276 # else
02277 # define FREE_STACK_RETURN(value) \
02278 return (free (compile_stack.stack), value)
02279 # endif
02280
02281 static reg_errcode_t
02282 PREFIX(regex_compile) (const char *ARG_PREFIX(pattern),
02283 size_t ARG_PREFIX(size), reg_syntax_t syntax,
02284 struct re_pattern_buffer *bufp)
02285 {
02286
02287
02288
02289 register UCHAR_T c, c1;
02290
02291 #ifdef WCHAR
02292
02293 CHAR_T *pattern, *COMPILED_BUFFER_VAR;
02294 size_t size;
02295
02296 int *mbs_offset = NULL;
02297
02298 char *is_binary = NULL;
02299
02300 char is_exactn_bin = FALSE;
02301 #endif
02302
02303
02304 const CHAR_T *p1;
02305
02306
02307 register UCHAR_T *b;
02308
02309
02310 compile_stack_type compile_stack;
02311
02312
02313 #ifdef WCHAR
02314 const CHAR_T *p;
02315 const CHAR_T *pend;
02316 #else
02317 const CHAR_T *p = pattern;
02318 const CHAR_T *pend = pattern + size;
02319 #endif
02320
02321
02322 RE_TRANSLATE_TYPE translate = bufp->translate;
02323
02324
02325
02326
02327
02328 UCHAR_T *pending_exact = 0;
02329
02330
02331
02332
02333 UCHAR_T *laststart = 0;
02334
02335
02336 UCHAR_T *begalt;
02337
02338
02339
02340
02341 UCHAR_T *fixup_alt_jump = 0;
02342
02343
02344
02345
02346 regnum_t regnum = 0;
02347
02348 #ifdef WCHAR
02349
02350 p = pend = pattern = TALLOC(csize + 1, CHAR_T);
02351 mbs_offset = TALLOC(csize + 1, int);
02352 is_binary = TALLOC(csize + 1, char);
02353 if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
02354 {
02355 free(pattern);
02356 free(mbs_offset);
02357 free(is_binary);
02358 return REG_ESPACE;
02359 }
02360 pattern[csize] = L'\0';
02361 size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
02362 pend = p + size;
02363 if (size < 0)
02364 {
02365 free(pattern);
02366 free(mbs_offset);
02367 free(is_binary);
02368 return REG_BADPAT;
02369 }
02370 #endif
02371
02372 #ifdef DEBUG
02373 DEBUG_PRINT1 ("\nCompiling pattern: ");
02374 if (debug)
02375 {
02376 unsigned debug_count;
02377
02378 for (debug_count = 0; debug_count < size; debug_count++)
02379 PUT_CHAR (pattern[debug_count]);
02380 putchar ('\n');
02381 }
02382 #endif
02383
02384
02385 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
02386 if (compile_stack.stack == NULL)
02387 {
02388 #ifdef WCHAR
02389 free(pattern);
02390 free(mbs_offset);
02391 free(is_binary);
02392 #endif
02393 return REG_ESPACE;
02394 }
02395
02396 compile_stack.size = INIT_COMPILE_STACK_SIZE;
02397 compile_stack.avail = 0;
02398
02399
02400 bufp->syntax = syntax;
02401 bufp->fastmap_accurate = 0;
02402 bufp->not_bol = bufp->not_eol = 0;
02403
02404
02405
02406
02407 bufp->used = 0;
02408
02409
02410 bufp->re_nsub = 0;
02411
02412 #if !defined emacs && !defined SYNTAX_TABLE
02413
02414 init_syntax_once ();
02415 #endif
02416
02417 if (bufp->allocated == 0)
02418 {
02419 if (bufp->buffer)
02420 {
02421
02422
02423 #ifdef WCHAR
02424
02425
02426 free(bufp->buffer);
02427 COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
02428 UCHAR_T);
02429 #else
02430 RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
02431 #endif
02432 }
02433 else
02434 {
02435 COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
02436 UCHAR_T);
02437 }
02438
02439 if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
02440 #ifdef WCHAR
02441 bufp->buffer = (char*)COMPILED_BUFFER_VAR;
02442 #endif
02443 bufp->allocated = INIT_BUF_SIZE;
02444 }
02445 #ifdef WCHAR
02446 else
02447 COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
02448 #endif
02449
02450 begalt = b = COMPILED_BUFFER_VAR;
02451
02452
02453 while (p != pend)
02454 {
02455 PATFETCH (c);
02456
02457 switch (c)
02458 {
02459 case '^':
02460 {
02461 if (
02462 p == pattern + 1
02463
02464 || syntax & RE_CONTEXT_INDEP_ANCHORS
02465
02466 || PREFIX(at_begline_loc_p) (pattern, p, syntax))
02467 BUF_PUSH (begline);
02468 else
02469 goto normal_char;
02470 }
02471 break;
02472
02473
02474 case '$':
02475 {
02476 if (
02477 p == pend
02478
02479 || syntax & RE_CONTEXT_INDEP_ANCHORS
02480
02481 || PREFIX(at_endline_loc_p) (p, pend, syntax))
02482 BUF_PUSH (endline);
02483 else
02484 goto normal_char;
02485 }
02486 break;
02487
02488
02489 case '+':
02490 case '?':
02491 if ((syntax & RE_BK_PLUS_QM)
02492 || (syntax & RE_LIMITED_OPS))
02493 goto normal_char;
02494 handle_plus:
02495 case '*':
02496
02497 if (!laststart)
02498 {
02499 if (syntax & RE_CONTEXT_INVALID_OPS)
02500 FREE_STACK_RETURN (REG_BADRPT);
02501 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
02502 goto normal_char;
02503 }
02504
02505 {
02506
02507 boolean keep_string_p = false;
02508
02509
02510 char zero_times_ok = 0, many_times_ok = 0;
02511
02512
02513
02514
02515
02516
02517 for (;;)
02518 {
02519 zero_times_ok |= c != '+';
02520 many_times_ok |= c != '?';
02521
02522 if (p == pend)
02523 break;
02524
02525 PATFETCH (c);
02526
02527 if (c == '*'
02528 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
02529 ;
02530
02531 else if (syntax & RE_BK_PLUS_QM && c == '\\')
02532 {
02533 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02534
02535 PATFETCH (c1);
02536 if (!(c1 == '+' || c1 == '?'))
02537 {
02538 PATUNFETCH;
02539 PATUNFETCH;
02540 break;
02541 }
02542
02543 c = c1;
02544 }
02545 else
02546 {
02547 PATUNFETCH;
02548 break;
02549 }
02550
02551
02552 }
02553
02554
02555
02556 if (!laststart)
02557 break;
02558
02559
02560
02561 if (many_times_ok)
02562 {
02563
02564
02565
02566
02567
02568
02569
02570
02571
02572 assert (p - 1 > pattern);
02573
02574
02575 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
02576
02577
02578
02579
02580
02581
02582 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
02583 && zero_times_ok
02584 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
02585 && !(syntax & RE_DOT_NEWLINE))
02586 {
02587 STORE_JUMP (jump, b, laststart);
02588 keep_string_p = true;
02589 }
02590 else
02591
02592 STORE_JUMP (maybe_pop_jump, b, laststart -
02593 (1 + OFFSET_ADDRESS_SIZE));
02594
02595
02596 b += 1 + OFFSET_ADDRESS_SIZE;
02597 }
02598
02599
02600
02601
02602
02603 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
02604 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
02605 : on_failure_jump,
02606 laststart, b + 1 + OFFSET_ADDRESS_SIZE);
02607 pending_exact = 0;
02608 b += 1 + OFFSET_ADDRESS_SIZE;
02609
02610 if (!zero_times_ok)
02611 {
02612
02613
02614
02615
02616
02617 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
02618 INSERT_JUMP (dummy_failure_jump, laststart, laststart +
02619 2 + 2 * OFFSET_ADDRESS_SIZE);
02620 b += 1 + OFFSET_ADDRESS_SIZE;
02621 }
02622 }
02623 break;
02624
02625
02626 case '.':
02627 laststart = b;
02628 BUF_PUSH (anychar);
02629 break;
02630
02631
02632 case '[':
02633 {
02634 boolean had_char_class = false;
02635 #ifdef WCHAR
02636 CHAR_T range_start = 0xffffffff;
02637 #else
02638 unsigned int range_start = 0xffffffff;
02639 #endif
02640 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02641
02642 #ifdef WCHAR
02643
02644
02645
02646
02647
02648
02649
02650
02651
02652
02653
02654
02655
02656
02657
02658
02659
02660
02661
02662
02663
02664
02665
02666
02667
02668
02669
02670
02671
02672
02673
02674
02675
02676
02677
02678
02679
02680
02681
02682
02683
02684
02685
02686
02687
02688
02689 GET_BUFFER_SPACE (6);
02690
02691
02692
02693
02694 laststart = b;
02695
02696
02697
02698 BUF_PUSH (*p == '^' ? charset_not : charset);
02699 if (*p == '^')
02700 p++;
02701
02702
02703
02704
02705 BUF_PUSH_3 (0, 0, 0);
02706 BUF_PUSH_2 (0, 0);
02707
02708
02709 p1 = p;
02710
02711
02712 if ((re_opcode_t) b[-6] == charset_not
02713 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
02714 {
02715 BUF_PUSH('\n');
02716 laststart[5]++;
02717 }
02718
02719
02720 for (;;)
02721 {
02722 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02723
02724 PATFETCH (c);
02725
02726
02727 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
02728 {
02729 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02730
02731 PATFETCH (c1);
02732 BUF_PUSH(c1);
02733 laststart[5]++;
02734 range_start = c1;
02735 continue;
02736 }
02737
02738
02739
02740
02741 if (c == ']' && p != p1 + 1)
02742 break;
02743
02744
02745
02746 if (had_char_class && c == '-' && *p != ']')
02747 FREE_STACK_RETURN (REG_ERANGE);
02748
02749
02750
02751
02752
02753 if (c == '-'
02754 && !(p - 2 >= pattern && p[-2] == '[')
02755 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
02756 && *p != ']')
02757 {
02758 reg_errcode_t ret;
02759
02760 GET_BUFFER_SPACE (2);
02761
02762 b += 2;
02763 ret = wcs_compile_range (range_start, &p, pend, translate,
02764 syntax, b, laststart);
02765 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02766 range_start = 0xffffffff;
02767 }
02768 else if (p[0] == '-' && p[1] != ']')
02769 {
02770 reg_errcode_t ret;
02771
02772
02773 PATFETCH (c1);
02774
02775 GET_BUFFER_SPACE (2);
02776
02777 b += 2;
02778 ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
02779 laststart);
02780 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02781 range_start = 0xffffffff;
02782 }
02783
02784
02785
02786 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
02787 {
02788 char str[CHAR_CLASS_MAX_LENGTH + 1];
02789
02790 PATFETCH (c);
02791 c1 = 0;
02792
02793
02794 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02795
02796 for (;;)
02797 {
02798 PATFETCH (c);
02799 if ((c == ':' && *p == ']') || p == pend)
02800 break;
02801 if (c1 < CHAR_CLASS_MAX_LENGTH)
02802 str[c1++] = c;
02803 else
02804
02805 str[0] = '\0';
02806 }
02807 str[c1] = '\0';
02808
02809
02810
02811
02812 if (c == ':' && *p == ']')
02813 {
02814 wctype_t wt;
02815 uintptr_t alignedp;
02816
02817
02818 wt = IS_CHAR_CLASS (str);
02819 if (wt == 0)
02820 FREE_STACK_RETURN (REG_ECTYPE);
02821
02822
02823
02824 PATFETCH (c);
02825
02826 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02827
02828
02829 GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
02830
02831 b += CHAR_CLASS_SIZE;
02832
02833
02834 insert_space(CHAR_CLASS_SIZE,
02835 laststart + 6 + laststart[1],
02836 b - 1);
02837 alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
02838 + __alignof__(wctype_t) - 1)
02839 & ~(uintptr_t)(__alignof__(wctype_t) - 1);
02840
02841 *((wctype_t*)alignedp) = wt;
02842
02843 laststart[1] += CHAR_CLASS_SIZE;
02844
02845 had_char_class = true;
02846 }
02847 else
02848 {
02849 c1++;
02850 while (c1--)
02851 PATUNFETCH;
02852 BUF_PUSH ('[');
02853 BUF_PUSH (':');
02854 laststart[5] += 2;
02855 range_start = ':';
02856 had_char_class = false;
02857 }
02858 }
02859 else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
02860 || *p == '.'))
02861 {
02862 CHAR_T str[128];
02863 CHAR_T delim = *p;
02864 # ifdef _LIBC
02865 uint32_t nrules =
02866 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
02867 # endif
02868 PATFETCH (c);
02869 c1 = 0;
02870
02871
02872 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02873
02874 for (;;)
02875 {
02876 PATFETCH (c);
02877 if ((c == delim && *p == ']') || p == pend)
02878 break;
02879 if (c1 < sizeof (str) - 1)
02880 str[c1++] = c;
02881 else
02882
02883 str[0] = '\0';
02884 }
02885 str[c1] = '\0';
02886
02887 if (c == delim && *p == ']' && str[0] != '\0')
02888 {
02889 unsigned int i, offset;
02890
02891
02892
02893
02894
02895
02896
02897
02898
02899 int datasize = c1 + 1;
02900
02901 # ifdef _LIBC
02902 int32_t idx = 0;
02903 if (nrules == 0)
02904 # endif
02905 {
02906 if (c1 != 1)
02907 FREE_STACK_RETURN (REG_ECOLLATE);
02908 }
02909 # ifdef _LIBC
02910 else
02911 {
02912 const int32_t *table;
02913 const int32_t *weights;
02914 const int32_t *extra;
02915 const int32_t *indirect;
02916 wint_t *cp;
02917
02918
02919 # include <locale/weightwc.h>
02920
02921 if(delim == '=')
02922 {
02923
02924 cp = (wint_t*)str;
02925
02926 table = (const int32_t *)
02927 _NL_CURRENT (LC_COLLATE,
02928 _NL_COLLATE_TABLEWC);
02929 weights = (const int32_t *)
02930 _NL_CURRENT (LC_COLLATE,
02931 _NL_COLLATE_WEIGHTWC);
02932 extra = (const int32_t *)
02933 _NL_CURRENT (LC_COLLATE,
02934 _NL_COLLATE_EXTRAWC);
02935 indirect = (const int32_t *)
02936 _NL_CURRENT (LC_COLLATE,
02937 _NL_COLLATE_INDIRECTWC);
02938
02939 idx = findidx ((const wint_t**)&cp);
02940 if (idx == 0 || cp < (wint_t*) str + c1)
02941
02942 FREE_STACK_RETURN (REG_ECOLLATE);
02943
02944 str[0] = (wchar_t)idx;
02945 }
02946 else
02947 {
02948
02949
02950 int32_t table_size;
02951 const int32_t *symb_table;
02952 const unsigned char *extra;
02953 int32_t idx;
02954 int32_t elem;
02955 int32_t second;
02956 int32_t hash;
02957 char char_str[c1];
02958
02959
02960
02961
02962
02963 for (i = 0; i < c1; ++i)
02964 char_str[i] = str[i];
02965
02966 table_size =
02967 _NL_CURRENT_WORD (LC_COLLATE,
02968 _NL_COLLATE_SYMB_HASH_SIZEMB);
02969 symb_table = (const int32_t *)
02970 _NL_CURRENT (LC_COLLATE,
02971 _NL_COLLATE_SYMB_TABLEMB);
02972 extra = (const unsigned char *)
02973 _NL_CURRENT (LC_COLLATE,
02974 _NL_COLLATE_SYMB_EXTRAMB);
02975
02976
02977 hash = elem_hash (char_str, c1);
02978
02979 idx = 0;
02980 elem = hash % table_size;
02981 second = hash % (table_size - 2);
02982 while (symb_table[2 * elem] != 0)
02983 {
02984
02985 if (symb_table[2 * elem] == hash
02986 && c1 == extra[symb_table[2 * elem + 1]]
02987 && memcmp (char_str,
02988 &extra[symb_table[2 * elem + 1]
02989 + 1], c1) == 0)
02990 {
02991
02992 idx = symb_table[2 * elem + 1];
02993 idx += 1 + extra[idx];
02994 break;
02995 }
02996
02997
02998 elem += second;
02999 }
03000
03001 if (symb_table[2 * elem] != 0)
03002 {
03003
03004
03005 idx += 1 + extra[idx];
03006
03007 idx = (idx + 3) & ~3;
03008
03009 str[0] = (wchar_t) idx + 4;
03010 }
03011 else if (symb_table[2 * elem] == 0 && c1 == 1)
03012 {
03013
03014
03015 had_char_class = false;
03016 BUF_PUSH(str[0]);
03017
03018 laststart[5]++;
03019 range_start = str[0];
03020
03021
03022
03023 PATFETCH (c);
03024
03025 continue;
03026 }
03027 else
03028 FREE_STACK_RETURN (REG_ECOLLATE);
03029 }
03030 datasize = 1;
03031 }
03032 # endif
03033
03034
03035 PATFETCH (c);
03036
03037
03038
03039 GET_BUFFER_SPACE(datasize);
03040
03041 b += datasize;
03042
03043 if (delim == '=')
03044 {
03045
03046
03047 offset = laststart[1] + laststart[2]
03048 + laststart[3] +6;
03049
03050 insert_space(datasize, laststart + offset, b - 1);
03051
03052
03053 for (i = 0 ; i < datasize ; i++)
03054 laststart[offset + i] = str[i];
03055
03056
03057 laststart[3] += datasize;
03058 had_char_class = true;
03059 }
03060 else
03061 {
03062
03063
03064 offset = laststart[1] + laststart[2] + 6;
03065
03066
03067 insert_space(datasize, laststart + offset, b-1);
03068 for (i = 0 ; i < datasize ; i++)
03069 laststart[offset + i] = str[i];
03070
03071
03072
03073
03074
03075
03076
03077 range_start = -(laststart[1] + laststart[2] + 6);
03078
03079 laststart[2] += datasize;
03080 had_char_class = false;
03081 }
03082 }
03083 else
03084 {
03085 c1++;
03086 while (c1--)
03087 PATUNFETCH;
03088 BUF_PUSH ('[');
03089 BUF_PUSH (delim);
03090 laststart[5] += 2;
03091 range_start = delim;
03092 had_char_class = false;
03093 }
03094 }
03095 else
03096 {
03097 had_char_class = false;
03098 BUF_PUSH(c);
03099 laststart[5]++;
03100 range_start = c;
03101 }
03102 }
03103
03104 #else
03105
03106
03107 GET_BUFFER_SPACE (34);
03108
03109 laststart = b;
03110
03111
03112
03113 BUF_PUSH (*p == '^' ? charset_not : charset);
03114 if (*p == '^')
03115 p++;
03116
03117
03118 p1 = p;
03119
03120
03121 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
03122
03123
03124 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
03125
03126
03127 if ((re_opcode_t) b[-2] == charset_not
03128 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
03129 SET_LIST_BIT ('\n');
03130
03131
03132 for (;;)
03133 {
03134 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03135
03136 PATFETCH (c);
03137
03138
03139 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
03140 {
03141 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
03142
03143 PATFETCH (c1);
03144 SET_LIST_BIT (c1);
03145 range_start = c1;
03146 continue;
03147 }
03148
03149
03150
03151
03152 if (c == ']' && p != p1 + 1)
03153 break;
03154
03155
03156
03157 if (had_char_class && c == '-' && *p != ']')
03158 FREE_STACK_RETURN (REG_ERANGE);
03159
03160
03161
03162
03163
03164 if (c == '-'
03165 && !(p - 2 >= pattern && p[-2] == '[')
03166 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
03167 && *p != ']')
03168 {
03169 reg_errcode_t ret
03170 = byte_compile_range (range_start, &p, pend, translate,
03171 syntax, b);
03172 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
03173 range_start = 0xffffffff;
03174 }
03175
03176 else if (p[0] == '-' && p[1] != ']')
03177 {
03178 reg_errcode_t ret;
03179
03180
03181 PATFETCH (c1);
03182
03183 ret = byte_compile_range (c, &p, pend, translate, syntax, b);
03184 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
03185 range_start = 0xffffffff;
03186 }
03187
03188
03189
03190
03191 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
03192 {
03193 char str[CHAR_CLASS_MAX_LENGTH + 1];
03194
03195 PATFETCH (c);
03196 c1 = 0;
03197
03198
03199 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03200
03201 for (;;)
03202 {
03203 PATFETCH (c);
03204 if ((c == ':' && *p == ']') || p == pend)
03205 break;
03206 if (c1 < CHAR_CLASS_MAX_LENGTH)
03207 str[c1++] = c;
03208 else
03209
03210 str[0] = '\0';
03211 }
03212 str[c1] = '\0';
03213
03214
03215
03216
03217 if (c == ':' && *p == ']')
03218 {
03219 # if defined _LIBC || WIDE_CHAR_SUPPORT
03220 boolean is_lower = STREQ (str, "lower");
03221 boolean is_upper = STREQ (str, "upper");
03222 wctype_t wt;
03223 int ch;
03224
03225 wt = IS_CHAR_CLASS (str);
03226 if (wt == 0)
03227 FREE_STACK_RETURN (REG_ECTYPE);
03228
03229
03230
03231 PATFETCH (c);
03232
03233 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03234
03235 for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
03236 {
03237 # ifdef _LIBC
03238 if (__iswctype (__btowc (ch), wt))
03239 SET_LIST_BIT (ch);
03240 # else
03241 if (iswctype (btowc (ch), wt))
03242 SET_LIST_BIT (ch);
03243 # endif
03244
03245 if (translate && (is_upper || is_lower)
03246 && (ISUPPER (ch) || ISLOWER (ch)))
03247 SET_LIST_BIT (ch);
03248 }
03249
03250 had_char_class = true;
03251 # else
03252 int ch;
03253 boolean is_alnum = STREQ (str, "alnum");
03254 boolean is_alpha = STREQ (str, "alpha");
03255 boolean is_blank = STREQ (str, "blank");
03256 boolean is_cntrl = STREQ (str, "cntrl");
03257 boolean is_digit = STREQ (str, "digit");
03258 boolean is_graph = STREQ (str, "graph");
03259 boolean is_lower = STREQ (str, "lower");
03260 boolean is_print = STREQ (str, "print");
03261 boolean is_punct = STREQ (str, "punct");
03262 boolean is_space = STREQ (str, "space");
03263 boolean is_upper = STREQ (str, "upper");
03264 boolean is_xdigit = STREQ (str, "xdigit");
03265
03266 if (!IS_CHAR_CLASS (str))
03267 FREE_STACK_RETURN (REG_ECTYPE);
03268
03269
03270
03271 PATFETCH (c);
03272
03273 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03274
03275 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
03276 {
03277
03278
03279 if ( (is_alnum && ISALNUM (ch))
03280 || (is_alpha && ISALPHA (ch))
03281 || (is_blank && ISBLANK (ch))
03282 || (is_cntrl && ISCNTRL (ch)))
03283 SET_LIST_BIT (ch);
03284 if ( (is_digit && ISDIGIT (ch))
03285 || (is_graph && ISGRAPH (ch))
03286 || (is_lower && ISLOWER (ch))
03287 || (is_print && ISPRINT (ch)))
03288 SET_LIST_BIT (ch);
03289 if ( (is_punct && ISPUNCT (ch))
03290 || (is_space && ISSPACE (ch))
03291 || (is_upper && ISUPPER (ch))
03292 || (is_xdigit && ISXDIGIT (ch)))
03293 SET_LIST_BIT (ch);
03294 if ( translate && (is_upper || is_lower)
03295 && (ISUPPER (ch) || ISLOWER (ch)))
03296 SET_LIST_BIT (ch);
03297 }
03298 had_char_class = true;
03299 # endif
03300 }
03301 else
03302 {
03303 c1++;
03304 while (c1--)
03305 PATUNFETCH;
03306 SET_LIST_BIT ('[');
03307 SET_LIST_BIT (':');
03308 range_start = ':';
03309 had_char_class = false;
03310 }
03311 }
03312 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
03313 {
03314 unsigned char str[MB_LEN_MAX + 1];
03315 # ifdef _LIBC
03316 uint32_t nrules =
03317 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
03318 # endif
03319
03320 PATFETCH (c);
03321 c1 = 0;
03322
03323
03324 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03325
03326 for (;;)
03327 {
03328 PATFETCH (c);
03329 if ((c == '=' && *p == ']') || p == pend)
03330 break;
03331 if (c1 < MB_LEN_MAX)
03332 str[c1++] = c;
03333 else
03334
03335 str[0] = '\0';
03336 }
03337 str[c1] = '\0';
03338
03339 if (c == '=' && *p == ']' && str[0] != '\0')
03340 {
03341
03342
03343
03344
03345
03346
03347 # ifdef _LIBC
03348 if (nrules == 0)
03349 # endif
03350 {
03351 if (c1 != 1)
03352 FREE_STACK_RETURN (REG_ECOLLATE);
03353
03354
03355
03356 PATFETCH (c);
03357
03358
03359 SET_LIST_BIT (str[0]);
03360 }
03361 # ifdef _LIBC
03362 else
03363 {
03364
03365
03366
03367
03368 const int32_t *table;
03369 const unsigned char *weights;
03370 const unsigned char *extra;
03371 const int32_t *indirect;
03372 int32_t idx;
03373 const unsigned char *cp = str;
03374 int ch;
03375
03376
03377 # include <locale/weight.h>
03378
03379 table = (const int32_t *)
03380 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
03381 weights = (const unsigned char *)
03382 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
03383 extra = (const unsigned char *)
03384 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
03385 indirect = (const int32_t *)
03386 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
03387
03388 idx = findidx (&cp);
03389 if (idx == 0 || cp < str + c1)
03390
03391 FREE_STACK_RETURN (REG_ECOLLATE);
03392
03393
03394
03395 PATFETCH (c);
03396
03397
03398
03399
03400
03401
03402
03403
03404
03405 for (ch = 1; ch < 256; ++ch)
03406
03407
03408 if (table[ch] > 0)
03409 {
03410 int32_t idx2 = table[ch];
03411 size_t len = weights[idx2];
03412
03413
03414 if (weights[idx] == len)
03415 {
03416
03417
03418 size_t cnt = 0;
03419
03420 while (cnt < len
03421 && (weights[idx + 1 + cnt]
03422 == weights[idx2 + 1 + cnt]))
03423 ++cnt;
03424
03425 if (cnt == len)
03426
03427
03428 SET_LIST_BIT (ch);
03429 }
03430 }
03431 }
03432 # endif
03433 had_char_class = true;
03434 }
03435 else
03436 {
03437 c1++;
03438 while (c1--)
03439 PATUNFETCH;
03440 SET_LIST_BIT ('[');
03441 SET_LIST_BIT ('=');
03442 range_start = '=';
03443 had_char_class = false;
03444 }
03445 }
03446 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
03447 {
03448 unsigned char str[128];
03449 # ifdef _LIBC
03450 uint32_t nrules =
03451 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
03452 # endif
03453
03454 PATFETCH (c);
03455 c1 = 0;
03456
03457
03458 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03459
03460 for (;;)
03461 {
03462 PATFETCH (c);
03463 if ((c == '.' && *p == ']') || p == pend)
03464 break;
03465 if (c1 < sizeof (str))
03466 str[c1++] = c;
03467 else
03468
03469 str[0] = '\0';
03470 }
03471 str[c1] = '\0';
03472
03473 if (c == '.' && *p == ']' && str[0] != '\0')
03474 {
03475
03476
03477
03478
03479
03480
03481
03482 # ifdef _LIBC
03483 if (nrules == 0)
03484 # endif
03485 {
03486 if (c1 != 1)
03487 FREE_STACK_RETURN (REG_ECOLLATE);
03488
03489
03490
03491 PATFETCH (c);
03492
03493
03494 SET_LIST_BIT (str[0]);
03495 range_start = ((const unsigned char *) str)[0];
03496 }
03497 # ifdef _LIBC
03498 else
03499 {
03500
03501
03502
03503
03504 int32_t table_size;
03505 const int32_t *symb_table;
03506 const unsigned char *extra;
03507 int32_t idx;
03508 int32_t elem;
03509 int32_t second;
03510 int32_t hash;
03511
03512 table_size =
03513 _NL_CURRENT_WORD (LC_COLLATE,
03514 _NL_COLLATE_SYMB_HASH_SIZEMB);
03515 symb_table = (const int32_t *)
03516 _NL_CURRENT (LC_COLLATE,
03517 _NL_COLLATE_SYMB_TABLEMB);
03518 extra = (const unsigned char *)
03519 _NL_CURRENT (LC_COLLATE,
03520 _NL_COLLATE_SYMB_EXTRAMB);
03521
03522
03523 hash = elem_hash (str, c1);
03524
03525 idx = 0;
03526 elem = hash % table_size;
03527 second = hash % (table_size - 2);
03528 while (symb_table[2 * elem] != 0)
03529 {
03530
03531 if (symb_table[2 * elem] == hash
03532 && c1 == extra[symb_table[2 * elem + 1]]
03533 && memcmp (str,
03534 &extra[symb_table[2 * elem + 1]
03535 + 1],
03536 c1) == 0)
03537 {
03538
03539 idx = symb_table[2 * elem + 1];
03540 idx += 1 + extra[idx];
03541 break;
03542 }
03543
03544
03545 elem += second;
03546 }
03547
03548 if (symb_table[2 * elem] == 0)
03549
03550 FREE_STACK_RETURN (REG_ECOLLATE);
03551
03552
03553
03554 PATFETCH (c);
03555
03556
03557
03558
03559
03560
03561
03562
03563
03564
03565
03566 c1 = extra[idx++];
03567 if (c1 == 1)
03568 range_start = extra[idx];
03569 while (c1-- > 0)
03570 {
03571 SET_LIST_BIT (extra[idx]);
03572 ++idx;
03573 }
03574 }
03575 # endif
03576 had_char_class = false;
03577 }
03578 else
03579 {
03580 c1++;
03581 while (c1--)
03582 PATUNFETCH;
03583 SET_LIST_BIT ('[');
03584 SET_LIST_BIT ('.');
03585 range_start = '.';
03586 had_char_class = false;
03587 }
03588 }
03589 else
03590 {
03591 had_char_class = false;
03592 SET_LIST_BIT (c);
03593 range_start = c;
03594 }
03595 }
03596
03597
03598
03599 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
03600 b[-1]--;
03601 b += b[-1];
03602 #endif
03603 }
03604 break;
03605
03606
03607 case '(':
03608 if (syntax & RE_NO_BK_PARENS)
03609 goto handle_open;
03610 else
03611 goto normal_char;
03612
03613
03614 case ')':
03615 if (syntax & RE_NO_BK_PARENS)
03616 goto handle_close;
03617 else
03618 goto normal_char;
03619
03620
03621 case '\n':
03622 if (syntax & RE_NEWLINE_ALT)
03623 goto handle_alt;
03624 else
03625 goto normal_char;
03626
03627
03628 case '|':
03629 if (syntax & RE_NO_BK_VBAR)
03630 goto handle_alt;
03631 else
03632 goto normal_char;
03633
03634
03635 case '{':
03636 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
03637 goto handle_interval;
03638 else
03639 goto normal_char;
03640
03641
03642 case '\\':
03643 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
03644
03645
03646
03647
03648 PATFETCH_RAW (c);
03649
03650 switch (c)
03651 {
03652 case '(':
03653 if (syntax & RE_NO_BK_PARENS)
03654 goto normal_backslash;
03655
03656 handle_open:
03657 bufp->re_nsub++;
03658 regnum++;
03659
03660 if (COMPILE_STACK_FULL)
03661 {
03662 RETALLOC (compile_stack.stack, compile_stack.size << 1,
03663 compile_stack_elt_t);
03664 if (compile_stack.stack == NULL) return REG_ESPACE;
03665
03666 compile_stack.size <<= 1;
03667 }
03668
03669
03670
03671
03672
03673 COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
03674 COMPILE_STACK_TOP.fixup_alt_jump
03675 = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
03676 COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
03677 COMPILE_STACK_TOP.regnum = regnum;
03678
03679
03680
03681
03682
03683 if (regnum <= MAX_REGNUM)
03684 {
03685 COMPILE_STACK_TOP.inner_group_offset = b
03686 - COMPILED_BUFFER_VAR + 2;
03687 BUF_PUSH_3 (start_memory, regnum, 0);
03688 }
03689
03690 compile_stack.avail++;
03691
03692 fixup_alt_jump = 0;
03693 laststart = 0;
03694 begalt = b;
03695
03696
03697
03698 pending_exact = 0;
03699 break;
03700
03701
03702 case ')':
03703 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
03704
03705 if (COMPILE_STACK_EMPTY)
03706 {
03707 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
03708 goto normal_backslash;
03709 else
03710 FREE_STACK_RETURN (REG_ERPAREN);
03711 }
03712
03713 handle_close:
03714 if (fixup_alt_jump)
03715 {
03716
03717
03718
03719 BUF_PUSH (push_dummy_failure);
03720
03721
03722
03723 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
03724 }
03725
03726
03727 if (COMPILE_STACK_EMPTY)
03728 {
03729 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
03730 goto normal_char;
03731 else
03732 FREE_STACK_RETURN (REG_ERPAREN);
03733 }
03734
03735
03736
03737 assert (compile_stack.avail != 0);
03738 {
03739
03740
03741
03742 regnum_t this_group_regnum;
03743
03744 compile_stack.avail--;
03745 begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
03746 fixup_alt_jump
03747 = COMPILE_STACK_TOP.fixup_alt_jump
03748 ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
03749 : 0;
03750 laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
03751 this_group_regnum = COMPILE_STACK_TOP.regnum;
03752
03753
03754
03755 pending_exact = 0;
03756
03757
03758
03759 if (this_group_regnum <= MAX_REGNUM)
03760 {
03761 UCHAR_T *inner_group_loc
03762 = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
03763
03764 *inner_group_loc = regnum - this_group_regnum;
03765 BUF_PUSH_3 (stop_memory, this_group_regnum,
03766 regnum - this_group_regnum);
03767 }
03768 }
03769 break;
03770
03771
03772 case '|':
03773 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
03774 goto normal_backslash;
03775 handle_alt:
03776 if (syntax & RE_LIMITED_OPS)
03777 goto normal_char;
03778
03779
03780
03781 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
03782 INSERT_JUMP (on_failure_jump, begalt,
03783 b + 2 + 2 * OFFSET_ADDRESS_SIZE);
03784 pending_exact = 0;
03785 b += 1 + OFFSET_ADDRESS_SIZE;
03786
03787
03788
03789
03790
03791
03792
03793
03794
03795
03796
03797
03798
03799
03800
03801
03802
03803 if (fixup_alt_jump)
03804 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
03805
03806
03807
03808
03809 fixup_alt_jump = b;
03810 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
03811 b += 1 + OFFSET_ADDRESS_SIZE;
03812
03813 laststart = 0;
03814 begalt = b;
03815 break;
03816
03817
03818 case '{':
03819
03820 if (!(syntax & RE_INTERVALS)
03821
03822
03823 || (syntax & RE_NO_BK_BRACES))
03824 goto normal_backslash;
03825
03826 handle_interval:
03827 {
03828
03829
03830
03831 int lower_bound = -1, upper_bound = -1;
03832
03833
03834
03835 const CHAR_T *beg_interval = p;
03836
03837 if (p == pend)
03838 goto invalid_interval;
03839
03840 GET_UNSIGNED_NUMBER (lower_bound);
03841
03842 if (c == ',')
03843 {
03844 GET_UNSIGNED_NUMBER (upper_bound);
03845 if (upper_bound < 0)
03846 upper_bound = RE_DUP_MAX;
03847 }
03848 else
03849
03850 upper_bound = lower_bound;
03851
03852 if (! (0 <= lower_bound && lower_bound <= upper_bound))
03853 goto invalid_interval;
03854
03855 if (!(syntax & RE_NO_BK_BRACES))
03856 {
03857 if (c != '\\' || p == pend)
03858 goto invalid_interval;
03859 PATFETCH (c);
03860 }
03861
03862 if (c != '}')
03863 goto invalid_interval;
03864
03865
03866 if (!laststart)
03867 {
03868 if (syntax & RE_CONTEXT_INVALID_OPS
03869 && !(syntax & RE_INVALID_INTERVAL_ORD))
03870 FREE_STACK_RETURN (REG_BADRPT);
03871 else if (syntax & RE_CONTEXT_INDEP_OPS)
03872 laststart = b;
03873 else
03874 goto unfetch_interval;
03875 }
03876
03877
03878
03879 if (RE_DUP_MAX < upper_bound)
03880 FREE_STACK_RETURN (REG_BADBR);
03881
03882
03883
03884
03885
03886
03887 if (upper_bound == 0)
03888 {
03889 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
03890 INSERT_JUMP (jump, laststart, b + 1
03891 + OFFSET_ADDRESS_SIZE);
03892 b += 1 + OFFSET_ADDRESS_SIZE;
03893 }
03894
03895
03896
03897
03898
03899
03900
03901
03902
03903
03904 else
03905 {
03906
03907 unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
03908 (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
03909
03910 GET_BUFFER_SPACE (nbytes);
03911
03912
03913
03914
03915
03916
03917 INSERT_JUMP2 (succeed_n, laststart,
03918 b + 1 + 2 * OFFSET_ADDRESS_SIZE
03919 + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
03920 , lower_bound);
03921 b += 1 + 2 * OFFSET_ADDRESS_SIZE;
03922
03923
03924
03925
03926
03927
03928
03929
03930 PREFIX(insert_op2) (set_number_at, laststart, 1
03931 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
03932 b += 1 + 2 * OFFSET_ADDRESS_SIZE;
03933
03934 if (upper_bound > 1)
03935 {
03936
03937
03938
03939
03940
03941
03942 STORE_JUMP2 (jump_n, b, laststart
03943 + 2 * OFFSET_ADDRESS_SIZE + 1,
03944 upper_bound - 1);
03945 b += 1 + 2 * OFFSET_ADDRESS_SIZE;
03946
03947
03948
03949
03950
03951
03952
03953
03954
03955
03956
03957
03958
03959
03960
03961 PREFIX(insert_op2) (set_number_at, laststart,
03962 b - laststart,
03963 upper_bound - 1, b);
03964 b += 1 + 2 * OFFSET_ADDRESS_SIZE;
03965 }
03966 }
03967 pending_exact = 0;
03968 break;
03969
03970 invalid_interval:
03971 if (!(syntax & RE_INVALID_INTERVAL_ORD))
03972 FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
03973 unfetch_interval:
03974
03975 p = beg_interval;
03976 c = '{';
03977 if (syntax & RE_NO_BK_BRACES)
03978 goto normal_char;
03979 else
03980 goto normal_backslash;
03981 }
03982
03983 #ifdef emacs
03984
03985
03986 case '=':
03987 BUF_PUSH (at_dot);
03988 break;
03989
03990 case 's':
03991 laststart = b;
03992 PATFETCH (c);
03993 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
03994 break;
03995
03996 case 'S':
03997 laststart = b;
03998 PATFETCH (c);
03999 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
04000 break;
04001 #endif
04002
04003
04004 case 'w':
04005 if (syntax & RE_NO_GNU_OPS)
04006 goto normal_char;
04007 laststart = b;
04008 BUF_PUSH (wordchar);
04009 break;
04010
04011
04012 case 'W':
04013 if (syntax & RE_NO_GNU_OPS)
04014 goto normal_char;
04015 laststart = b;
04016 BUF_PUSH (notwordchar);
04017 break;
04018
04019
04020 case '<':
04021 if (syntax & RE_NO_GNU_OPS)
04022 goto normal_char;
04023 BUF_PUSH (wordbeg);
04024 break;
04025
04026 case '>':
04027 if (syntax & RE_NO_GNU_OPS)
04028 goto normal_char;
04029 BUF_PUSH (wordend);
04030 break;
04031
04032 case 'b':
04033 if (syntax & RE_NO_GNU_OPS)
04034 goto normal_char;
04035 BUF_PUSH (wordbound);
04036 break;
04037
04038 case 'B':
04039 if (syntax & RE_NO_GNU_OPS)
04040 goto normal_char;
04041 BUF_PUSH (notwordbound);
04042 break;
04043
04044 case '`':
04045 if (syntax & RE_NO_GNU_OPS)
04046 goto normal_char;
04047 BUF_PUSH (begbuf);
04048 break;
04049
04050 case '\'':
04051 if (syntax & RE_NO_GNU_OPS)
04052 goto normal_char;
04053 BUF_PUSH (endbuf);
04054 break;
04055
04056 case '1': case '2': case '3': case '4': case '5':
04057 case '6': case '7': case '8': case '9':
04058 if (syntax & RE_NO_BK_REFS)
04059 goto normal_char;
04060
04061 c1 = c - '0';
04062
04063 if (c1 > regnum)
04064 FREE_STACK_RETURN (REG_ESUBREG);
04065
04066
04067 if (group_in_compile_stack (compile_stack, (regnum_t) c1))
04068 goto normal_char;
04069
04070 laststart = b;
04071 BUF_PUSH_2 (duplicate, c1);
04072 break;
04073
04074
04075 case '+':
04076 case '?':
04077 if (syntax & RE_BK_PLUS_QM)
04078 goto handle_plus;
04079 else
04080 goto normal_backslash;
04081
04082 default:
04083 normal_backslash:
04084
04085
04086
04087 c = TRANSLATE (c);
04088 goto normal_char;
04089 }
04090 break;
04091
04092
04093 default:
04094
04095 normal_char:
04096
04097 if (!pending_exact
04098 #ifdef WCHAR
04099
04100
04101 || is_exactn_bin != is_binary[p - 1 - pattern]
04102 #endif
04103
04104
04105 || pending_exact + *pending_exact + 1 != b
04106
04107
04108 || *pending_exact == (1 << BYTEWIDTH) - 1
04109
04110
04111 || *p == '*' || *p == '^'
04112 || ((syntax & RE_BK_PLUS_QM)
04113 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
04114 : (*p == '+' || *p == '?'))
04115 || ((syntax & RE_INTERVALS)
04116 && ((syntax & RE_NO_BK_BRACES)
04117 ? *p == '{'
04118 : (p[0] == '\\' && p[1] == '{'))))
04119 {
04120
04121
04122 laststart = b;
04123
04124 #ifdef WCHAR
04125
04126 is_exactn_bin = is_binary[p - 1 - pattern];
04127 if (is_exactn_bin)
04128 BUF_PUSH_2 (exactn_bin, 0);
04129 else
04130 BUF_PUSH_2 (exactn, 0);
04131 #else
04132 BUF_PUSH_2 (exactn, 0);
04133 #endif
04134 pending_exact = b - 1;
04135 }
04136
04137 BUF_PUSH (c);
04138 (*pending_exact)++;
04139 break;
04140 }
04141 }
04142
04143
04144
04145
04146 if (fixup_alt_jump)
04147 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
04148
04149 if (!COMPILE_STACK_EMPTY)
04150 FREE_STACK_RETURN (REG_EPAREN);
04151
04152
04153
04154 if (syntax & RE_NO_POSIX_BACKTRACKING)
04155 BUF_PUSH (succeed);
04156
04157 #ifdef WCHAR
04158 free (pattern);
04159 free (mbs_offset);
04160 free (is_binary);
04161 #endif
04162 free (compile_stack.stack);
04163
04164
04165 #ifdef WCHAR
04166 bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
04167 #else
04168 bufp->used = b - bufp->buffer;
04169 #endif
04170
04171 #ifdef DEBUG
04172 if (debug)
04173 {
04174 DEBUG_PRINT1 ("\nCompiled pattern: \n");
04175 PREFIX(print_compiled_pattern) (bufp);
04176 }
04177 #endif
04178
04179 #ifndef MATCH_MAY_ALLOCATE
04180
04181
04182
04183 {
04184 int num_regs = bufp->re_nsub + 1;
04185
04186
04187
04188
04189 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
04190 {
04191 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
04192
04193 # ifdef emacs
04194 if (! fail_stack.stack)
04195 fail_stack.stack
04196 = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
04197 * sizeof (PREFIX(fail_stack_elt_t)));
04198 else
04199 fail_stack.stack
04200 = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
04201 (fail_stack.size
04202 * sizeof (PREFIX(fail_stack_elt_t))));
04203 # else
04204 if (! fail_stack.stack)
04205 fail_stack.stack
04206 = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
04207 * sizeof (PREFIX(fail_stack_elt_t)));
04208 else
04209 fail_stack.stack
04210 = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
04211 (fail_stack.size
04212 * sizeof (PREFIX(fail_stack_elt_t))));
04213 # endif
04214 }
04215
04216 PREFIX(regex_grow_registers) (num_regs);
04217 }
04218 #endif
04219
04220 return REG_NOERROR;
04221 }
04222
04223
04224
04225
04226
04227
04228 static void
04229 PREFIX(store_op1) (re_opcode_t op, UCHAR_T *loc, int arg)
04230 {
04231 *loc = (UCHAR_T) op;
04232 STORE_NUMBER (loc + 1, arg);
04233 }
04234
04235
04236
04237
04238
04239 static void
04240 PREFIX(store_op2) (re_opcode_t op, UCHAR_T *loc, int arg1, int arg2)
04241 {
04242 *loc = (UCHAR_T) op;
04243 STORE_NUMBER (loc + 1, arg1);
04244 STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
04245 }
04246
04247
04248
04249
04250
04251
04252 static void
04253 PREFIX(insert_op1) (re_opcode_t op, UCHAR_T *loc, int arg, UCHAR_T *end)
04254 {
04255 register UCHAR_T *pfrom = end;
04256 register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
04257
04258 while (pfrom != loc)
04259 *--pto = *--pfrom;
04260
04261 PREFIX(store_op1) (op, loc, arg);
04262 }
04263
04264
04265
04266
04267
04268 static void
04269 PREFIX(insert_op2) (re_opcode_t op, UCHAR_T *loc, int arg1,
04270 int arg2, UCHAR_T *end)
04271 {
04272 register UCHAR_T *pfrom = end;
04273 register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
04274
04275 while (pfrom != loc)
04276 *--pto = *--pfrom;
04277
04278 PREFIX(store_op2) (op, loc, arg1, arg2);
04279 }
04280
04281
04282
04283
04284
04285
04286 static boolean
04287 PREFIX(at_begline_loc_p) (const CHAR_T *pattern, const CHAR_T *p,
04288 reg_syntax_t syntax)
04289 {
04290 const CHAR_T *prev = p - 2;
04291 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
04292
04293 return
04294
04295 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
04296
04297 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
04298 }
04299
04300
04301
04302
04303
04304 static boolean
04305 PREFIX(at_endline_loc_p) (const CHAR_T *p, const CHAR_T *pend,
04306 reg_syntax_t syntax)
04307 {
04308 const CHAR_T *next = p;
04309 boolean next_backslash = *next == '\\';
04310 const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
04311
04312 return
04313
04314 (syntax & RE_NO_BK_PARENS ? *next == ')'
04315 : next_backslash && next_next && *next_next == ')')
04316
04317 || (syntax & RE_NO_BK_VBAR ? *next == '|'
04318 : next_backslash && next_next && *next_next == '|');
04319 }
04320
04321 #else
04322
04323
04324
04325
04326 static boolean
04327 group_in_compile_stack (compile_stack_type compile_stack, regnum_t regnum)
04328 {
04329 int this_element;
04330
04331 for (this_element = compile_stack.avail - 1;
04332 this_element >= 0;
04333 this_element--)
04334 if (compile_stack.stack[this_element].regnum == regnum)
04335 return true;
04336
04337 return false;
04338 }
04339 #endif
04340
04341 #ifdef INSIDE_RECURSION
04342
04343 #ifdef WCHAR
04344
04345
04346 static void
04347 insert_space (int num, CHAR_T *loc, CHAR_T *end)
04348 {
04349 register CHAR_T *pto = end;
04350 register CHAR_T *pfrom = end - num;
04351
04352 while (pfrom >= loc)
04353 *pto-- = *pfrom--;
04354 }
04355 #endif
04356
04357 #ifdef WCHAR
04358 static reg_errcode_t
04359 wcs_compile_range (CHAR_T range_start_char, const CHAR_T **p_ptr,
04360 const CHAR_T *pend, RE_TRANSLATE_TYPE translate,
04361 reg_syntax_t syntax, CHAR_T *b, CHAR_T *char_set)
04362 {
04363 const CHAR_T *p = *p_ptr;
04364 CHAR_T range_start, range_end;
04365 reg_errcode_t ret;
04366 # ifdef _LIBC
04367 uint32_t nrules;
04368 uint32_t start_val, end_val;
04369 # endif
04370 if (p == pend)
04371 return REG_ERANGE;
04372
04373 # ifdef _LIBC
04374 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
04375 if (nrules != 0)
04376 {
04377 const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
04378 _NL_COLLATE_COLLSEQWC);
04379 const unsigned char *extra = (const unsigned char *)
04380 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
04381
04382 if (range_start_char < -1)
04383 {
04384
04385 int32_t *wextra;
04386
04387 wextra = (int32_t*)(extra + char_set[-range_start_char]);
04388 start_val = wextra[1 + *wextra];
04389 }
04390 else
04391 start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
04392
04393 end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
04394
04395
04396
04397 ret = ((syntax & RE_NO_EMPTY_RANGES)
04398 && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
04399
04400
04401 insert_space(2, b - char_set[5] - 2, b - 1);
04402 *(b - char_set[5] - 2) = (wchar_t)start_val;
04403 *(b - char_set[5] - 1) = (wchar_t)end_val;
04404 char_set[4]++;
04405 }
04406 else
04407 # endif
04408 {
04409 range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
04410 range_start_char;
04411 range_end = TRANSLATE (p[0]);
04412
04413
04414 ret = ((syntax & RE_NO_EMPTY_RANGES)
04415 && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
04416
04417
04418 insert_space(2, b - char_set[5] - 2, b - 1);
04419 *(b - char_set[5] - 2) = range_start;
04420 *(b - char_set[5] - 1) = range_end;
04421 char_set[4]++;
04422 }
04423
04424
04425 (*p_ptr)++;
04426
04427 return ret;
04428 }
04429 #else
04430
04431
04432
04433
04434
04435
04436
04437
04438
04439
04440
04441 static reg_errcode_t
04442 byte_compile_range (unsigned int range_start_char, const char **p_ptr,
04443 const char *pend, RE_TRANSLATE_TYPE translate,
04444 reg_syntax_t syntax, unsigned char *b)
04445 {
04446 unsigned this_char;
04447 const char *p = *p_ptr;
04448 reg_errcode_t ret;
04449 # if _LIBC
04450 const unsigned char *collseq;
04451 unsigned int start_colseq;
04452 unsigned int end_colseq;
04453 # else
04454 unsigned end_char;
04455 # endif
04456
04457 if (p == pend)
04458 return REG_ERANGE;
04459
04460
04461
04462 (*p_ptr)++;
04463
04464
04465 ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
04466
04467 # if _LIBC
04468 collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
04469 _NL_COLLATE_COLLSEQMB);
04470
04471 start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
04472 end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
04473 for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
04474 {
04475 unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
04476
04477 if (start_colseq <= this_colseq && this_colseq <= end_colseq)
04478 {
04479 SET_LIST_BIT (TRANSLATE (this_char));
04480 ret = REG_NOERROR;
04481 }
04482 }
04483 # else
04484
04485
04486
04487 range_start_char = TRANSLATE (range_start_char);
04488
04489
04490
04491
04492
04493 end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
04494
04495 for (this_char = range_start_char; this_char <= end_char; ++this_char)
04496 {
04497 SET_LIST_BIT (TRANSLATE (this_char));
04498 ret = REG_NOERROR;
04499 }
04500 # endif
04501
04502 return ret;
04503 }
04504 #endif
04505
04506
04507
04508
04509
04510
04511
04512
04513
04514
04515
04516
04517
04518
04519 #ifdef WCHAR
04520
04521
04522 static unsigned char truncate_wchar (CHAR_T c);
04523
04524 static unsigned char
04525 truncate_wchar (CHAR_T c)
04526 {
04527 unsigned char buf[MB_CUR_MAX];
04528 mbstate_t state;
04529 int retval;
04530 memset (&state, '\0', sizeof (state));
04531 # ifdef _LIBC
04532 retval = __wcrtomb (buf, c, &state);
04533 # else
04534 retval = wcrtomb (buf, c, &state);
04535 # endif
04536 return retval > 0 ? buf[0] : (unsigned char) c;
04537 }
04538 #endif
04539
04540 static int
04541 PREFIX(re_compile_fastmap) (struct re_pattern_buffer *bufp)
04542 {
04543 int j, k;
04544 #ifdef MATCH_MAY_ALLOCATE
04545 PREFIX(fail_stack_type) fail_stack;
04546 #endif
04547 #ifndef REGEX_MALLOC
04548 char *destination;
04549 #endif
04550
04551 register char *fastmap = bufp->fastmap;
04552
04553 #ifdef WCHAR
04554
04555
04556 UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
04557 register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
04558 #else
04559 UCHAR_T *pattern = bufp->buffer;
04560 register UCHAR_T *pend = pattern + bufp->used;
04561 #endif
04562 UCHAR_T *p = pattern;
04563
04564 #ifdef REL_ALLOC
04565
04566
04567 fail_stack_elt_t *failure_stack_ptr;
04568 #endif
04569
04570
04571
04572
04573
04574 boolean path_can_be_null = true;
04575
04576
04577 boolean succeed_n_p = false;
04578
04579 assert (fastmap != NULL && p != NULL);
04580
04581 INIT_FAIL_STACK ();
04582 bzero (fastmap, 1 << BYTEWIDTH);
04583 bufp->fastmap_accurate = 1;
04584 bufp->can_be_null = 0;
04585
04586 while (1)
04587 {
04588 if (p == pend || *p == (UCHAR_T) succeed)
04589 {
04590
04591 if (!FAIL_STACK_EMPTY ())
04592 {
04593 bufp->can_be_null |= path_can_be_null;
04594
04595
04596 path_can_be_null = true;
04597
04598 p = fail_stack.stack[--fail_stack.avail].pointer;
04599
04600 continue;
04601 }
04602 else
04603 break;
04604 }
04605
04606
04607 assert (p < pend);
04608
04609 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
04610 {
04611
04612
04613
04614
04615
04616
04617 case duplicate:
04618 bufp->can_be_null = 1;
04619 goto done;
04620
04621
04622
04623
04624
04625 #ifdef WCHAR
04626 case exactn:
04627 fastmap[truncate_wchar(p[1])] = 1;
04628 break;
04629 #else
04630 case exactn:
04631 fastmap[p[1]] = 1;
04632 break;
04633 #endif
04634 #ifdef MBS_SUPPORT
04635 case exactn_bin:
04636 fastmap[p[1]] = 1;
04637 break;
04638 #endif
04639
04640 #ifdef WCHAR
04641
04642
04643 case charset:
04644 case charset_not:
04645 case wordchar:
04646 case notwordchar:
04647 bufp->can_be_null = 1;
04648 goto done;
04649 #else
04650 case charset:
04651 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
04652 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
04653 fastmap[j] = 1;
04654 break;
04655
04656
04657 case charset_not:
04658
04659 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
04660 fastmap[j] = 1;
04661
04662 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
04663 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
04664 fastmap[j] = 1;
04665 break;
04666
04667
04668 case wordchar:
04669 for (j = 0; j < (1 << BYTEWIDTH); j++)
04670 if (SYNTAX (j) == Sword)
04671 fastmap[j] = 1;
04672 break;
04673
04674
04675 case notwordchar:
04676 for (j = 0; j < (1 << BYTEWIDTH); j++)
04677 if (SYNTAX (j) != Sword)
04678 fastmap[j] = 1;
04679 break;
04680 #endif
04681
04682 case anychar:
04683 {
04684 int fastmap_newline = fastmap['\n'];
04685
04686
04687 for (j = 0; j < (1 << BYTEWIDTH); j++)
04688 fastmap[j] = 1;
04689
04690
04691 if (!(bufp->syntax & RE_DOT_NEWLINE))
04692 fastmap['\n'] = fastmap_newline;
04693
04694
04695
04696 else if (bufp->can_be_null)
04697 goto done;
04698
04699
04700 break;
04701 }
04702
04703 #ifdef emacs
04704 case syntaxspec:
04705 k = *p++;
04706 for (j = 0; j < (1 << BYTEWIDTH); j++)
04707 if (SYNTAX (j) == (enum syntaxcode) k)
04708 fastmap[j] = 1;
04709 break;
04710
04711
04712 case notsyntaxspec:
04713 k = *p++;
04714 for (j = 0; j < (1 << BYTEWIDTH); j++)
04715 if (SYNTAX (j) != (enum syntaxcode) k)
04716 fastmap[j] = 1;
04717 break;
04718
04719
04720
04721
04722
04723
04724 case before_dot:
04725 case at_dot:
04726 case after_dot:
04727 continue;
04728 #endif
04729
04730
04731 case no_op:
04732 case begline:
04733 case endline:
04734 case begbuf:
04735 case endbuf:
04736 case wordbound:
04737 case notwordbound:
04738 case wordbeg:
04739 case wordend:
04740 case push_dummy_failure:
04741 continue;
04742
04743
04744 case jump_n:
04745 case pop_failure_jump:
04746 case maybe_pop_jump:
04747 case jump:
04748 case jump_past_alt:
04749 case dummy_failure_jump:
04750 EXTRACT_NUMBER_AND_INCR (j, p);
04751 p += j;
04752 if (j > 0)
04753 continue;
04754
04755
04756
04757
04758
04759
04760 if ((re_opcode_t) *p != on_failure_jump
04761 && (re_opcode_t) *p != succeed_n)
04762 continue;
04763
04764 p++;
04765 EXTRACT_NUMBER_AND_INCR (j, p);
04766 p += j;
04767
04768
04769 if (!FAIL_STACK_EMPTY ()
04770 && fail_stack.stack[fail_stack.avail - 1].pointer == p)
04771 fail_stack.avail--;
04772
04773 continue;
04774
04775
04776 case on_failure_jump:
04777 case on_failure_keep_string_jump:
04778 handle_on_failure_jump:
04779 EXTRACT_NUMBER_AND_INCR (j, p);
04780
04781
04782
04783
04784
04785
04786
04787
04788 if (p + j < pend)
04789 {
04790 if (!PUSH_PATTERN_OP (p + j, fail_stack))
04791 {
04792 RESET_FAIL_STACK ();
04793 return -2;
04794 }
04795 }
04796 else
04797 bufp->can_be_null = 1;
04798
04799 if (succeed_n_p)
04800 {
04801 EXTRACT_NUMBER_AND_INCR (k, p);
04802 succeed_n_p = false;
04803 }
04804
04805 continue;
04806
04807
04808 case succeed_n:
04809
04810 p += OFFSET_ADDRESS_SIZE;
04811
04812
04813 EXTRACT_NUMBER_AND_INCR (k, p);
04814 if (k == 0)
04815 {
04816 p -= 2 * OFFSET_ADDRESS_SIZE;
04817 succeed_n_p = true;
04818 goto handle_on_failure_jump;
04819 }
04820 continue;
04821
04822
04823 case set_number_at:
04824 p += 2 * OFFSET_ADDRESS_SIZE;
04825 continue;
04826
04827
04828 case start_memory:
04829 case stop_memory:
04830 p += 2;
04831 continue;
04832
04833
04834 default:
04835 abort ();
04836 }
04837
04838
04839
04840
04841
04842
04843
04844 path_can_be_null = false;
04845 p = pend;
04846 }
04847
04848
04849
04850 bufp->can_be_null |= path_can_be_null;
04851
04852 done:
04853 RESET_FAIL_STACK ();
04854 return 0;
04855 }
04856
04857 #else
04858
04859 int
04860 re_compile_fastmap (struct re_pattern_buffer *bufp)
04861 {
04862 # ifdef MBS_SUPPORT
04863 if (MB_CUR_MAX != 1)
04864 return wcs_re_compile_fastmap(bufp);
04865 else
04866 # endif
04867 return byte_re_compile_fastmap(bufp);
04868 }
04869 #ifdef _LIBC
04870 weak_alias (__re_compile_fastmap, re_compile_fastmap)
04871 #endif
04872
04873
04874
04875
04876
04877
04878
04879
04880
04881
04882
04883
04884
04885
04886
04887 void
04888 re_set_registers (struct re_pattern_buffer *bufp,
04889 struct re_registers *regs, unsigned num_regs,
04890 regoff_t *starts, regoff_t *ends)
04891 {
04892 if (num_regs)
04893 {
04894 bufp->regs_allocated = REGS_REALLOCATE;
04895 regs->num_regs = num_regs;
04896 regs->start = starts;
04897 regs->end = ends;
04898 }
04899 else
04900 {
04901 bufp->regs_allocated = REGS_UNALLOCATED;
04902 regs->num_regs = 0;
04903 regs->start = regs->end = (regoff_t *) 0;
04904 }
04905 }
04906 #ifdef _LIBC
04907 weak_alias (__re_set_registers, re_set_registers)
04908 #endif
04909
04910
04911
04912
04913
04914
04915 int
04916 re_search (struct re_pattern_buffer *bufp, const char *string, int size,
04917 int startpos, int range, struct re_registers *regs)
04918 {
04919 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
04920 regs, size);
04921 }
04922 #ifdef _LIBC
04923 weak_alias (__re_search, re_search)
04924 #endif
04925
04926
04927
04928
04929
04930
04931
04932
04933
04934
04935
04936
04937
04938
04939
04940
04941
04942
04943
04944
04945
04946
04947
04948 int
04949 re_search_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
04950 const char *string2, int size2, int startpos, int range,
04951 struct re_registers *regs, int stop)
04952 {
04953 # ifdef MBS_SUPPORT
04954 if (MB_CUR_MAX != 1)
04955 return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
04956 range, regs, stop);
04957 else
04958 # endif
04959 return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
04960 range, regs, stop);
04961 }
04962 #ifdef _LIBC
04963 weak_alias (__re_search_2, re_search_2)
04964 #endif
04965
04966 #endif
04967
04968 #ifdef INSIDE_RECURSION
04969
04970 #ifdef MATCH_MAY_ALLOCATE
04971 # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
04972 #else
04973 # define FREE_VAR(var) if (var) free (var); var = NULL
04974 #endif
04975
04976 #ifdef WCHAR
04977 # define MAX_ALLOCA_SIZE 2000
04978
04979 # define FREE_WCS_BUFFERS() \
04980 do { \
04981 if (size1 > MAX_ALLOCA_SIZE) \
04982 { \
04983 free (wcs_string1); \
04984 free (mbs_offset1); \
04985 } \
04986 else \
04987 { \
04988 FREE_VAR (wcs_string1); \
04989 FREE_VAR (mbs_offset1); \
04990 } \
04991 if (size2 > MAX_ALLOCA_SIZE) \
04992 { \
04993 free (wcs_string2); \
04994 free (mbs_offset2); \
04995 } \
04996 else \
04997 { \
04998 FREE_VAR (wcs_string2); \
04999 FREE_VAR (mbs_offset2); \
05000 } \
05001 } while (0)
05002
05003 #endif
05004
05005
05006 static int
05007 PREFIX(re_search_2) (struct re_pattern_buffer *bufp, const char *string1,
05008 int size1, const char *string2, int size2,
05009 int startpos, int range,
05010 struct re_registers *regs, int stop)
05011 {
05012 int val;
05013 register char *fastmap = bufp->fastmap;
05014 register RE_TRANSLATE_TYPE translate = bufp->translate;
05015 int total_size = size1 + size2;
05016 int endpos = startpos + range;
05017 #ifdef WCHAR
05018
05019 wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
05020
05021 int wcs_size1 = 0, wcs_size2 = 0;
05022
05023 int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
05024
05025 char *is_binary = NULL;
05026 #endif
05027
05028
05029 if (startpos < 0 || startpos > total_size)
05030 return -1;
05031
05032
05033
05034
05035 if (endpos < 0)
05036 range = 0 - startpos;
05037 else if (endpos > total_size)
05038 range = total_size - startpos;
05039
05040
05041
05042 if (bufp->used > 0 && range > 0
05043 && ((re_opcode_t) bufp->buffer[0] == begbuf
05044
05045 || ((re_opcode_t) bufp->buffer[0] == begline
05046 && !bufp->newline_anchor)))
05047 {
05048 if (startpos > 0)
05049 return -1;
05050 else
05051 range = 1;
05052 }
05053
05054 #ifdef emacs
05055
05056
05057 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
05058 {
05059 range = PT - startpos;
05060 if (range <= 0)
05061 return -1;
05062 }
05063 #endif
05064
05065
05066 if (fastmap && !bufp->fastmap_accurate)
05067 if (re_compile_fastmap (bufp) == -2)
05068 return -2;
05069
05070 #ifdef WCHAR
05071
05072
05073 if (size1 != 0)
05074 {
05075 if (size1 > MAX_ALLOCA_SIZE)
05076 {
05077 wcs_string1 = TALLOC (size1 + 1, CHAR_T);
05078 mbs_offset1 = TALLOC (size1 + 1, int);
05079 is_binary = TALLOC (size1 + 1, char);
05080 }
05081 else
05082 {
05083 wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
05084 mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
05085 is_binary = REGEX_TALLOC (size1 + 1, char);
05086 }
05087 if (!wcs_string1 || !mbs_offset1 || !is_binary)
05088 {
05089 if (size1 > MAX_ALLOCA_SIZE)
05090 {
05091 free (wcs_string1);
05092 free (mbs_offset1);
05093 free (is_binary);
05094 }
05095 else
05096 {
05097 FREE_VAR (wcs_string1);
05098 FREE_VAR (mbs_offset1);
05099 FREE_VAR (is_binary);
05100 }
05101 return -2;
05102 }
05103 wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
05104 mbs_offset1, is_binary);
05105 wcs_string1[wcs_size1] = L'\0';
05106 if (size1 > MAX_ALLOCA_SIZE)
05107 free (is_binary);
05108 else
05109 FREE_VAR (is_binary);
05110 }
05111 if (size2 != 0)
05112 {
05113 if (size2 > MAX_ALLOCA_SIZE)
05114 {
05115 wcs_string2 = TALLOC (size2 + 1, CHAR_T);
05116 mbs_offset2 = TALLOC (size2 + 1, int);
05117 is_binary = TALLOC (size2 + 1, char);
05118 }
05119 else
05120 {
05121 wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
05122 mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
05123 is_binary = REGEX_TALLOC (size2 + 1, char);
05124 }
05125 if (!wcs_string2 || !mbs_offset2 || !is_binary)
05126 {
05127 FREE_WCS_BUFFERS ();
05128 if (size2 > MAX_ALLOCA_SIZE)
05129 free (is_binary);
05130 else
05131 FREE_VAR (is_binary);
05132 return -2;
05133 }
05134 wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
05135 mbs_offset2, is_binary);
05136 wcs_string2[wcs_size2] = L'\0';
05137 if (size2 > MAX_ALLOCA_SIZE)
05138 free (is_binary);
05139 else
05140 FREE_VAR (is_binary);
05141 }
05142 #endif
05143
05144
05145
05146 for (;;)
05147 {
05148
05149
05150
05151
05152 if (fastmap && startpos < total_size && !bufp->can_be_null)
05153 {
05154 if (range > 0)
05155 {
05156 register const char *d;
05157 register int lim = 0;
05158 int irange = range;
05159
05160 if (startpos < size1 && startpos + range >= size1)
05161 lim = range - (size1 - startpos);
05162
05163 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
05164
05165
05166
05167 if (translate)
05168 while (range > lim
05169 && !fastmap[(unsigned char)
05170 translate[(unsigned char) *d++]])
05171 range--;
05172 else
05173 while (range > lim && !fastmap[(unsigned char) *d++])
05174 range--;
05175
05176 startpos += irange - range;
05177 }
05178 else
05179 {
05180 register CHAR_T c = (size1 == 0 || startpos >= size1
05181 ? string2[startpos - size1]
05182 : string1[startpos]);
05183
05184 if (!fastmap[(unsigned char) TRANSLATE (c)])
05185 goto advance;
05186 }
05187 }
05188
05189
05190 if (range >= 0 && startpos == total_size && fastmap
05191 && !bufp->can_be_null)
05192 {
05193 #ifdef WCHAR
05194 FREE_WCS_BUFFERS ();
05195 #endif
05196 return -1;
05197 }
05198
05199 #ifdef WCHAR
05200 val = wcs_re_match_2_internal (bufp, string1, size1, string2,
05201 size2, startpos, regs, stop,
05202 wcs_string1, wcs_size1,
05203 wcs_string2, wcs_size2,
05204 mbs_offset1, mbs_offset2);
05205 #else
05206 val = byte_re_match_2_internal (bufp, string1, size1, string2,
05207 size2, startpos, regs, stop);
05208 #endif
05209
05210 #ifndef REGEX_MALLOC
05211 # ifdef C_ALLOCA
05212 alloca (0);
05213 # endif
05214 #endif
05215
05216 if (val >= 0)
05217 {
05218 #ifdef WCHAR
05219 FREE_WCS_BUFFERS ();
05220 #endif
05221 return startpos;
05222 }
05223
05224 if (val == -2)
05225 {
05226 #ifdef WCHAR
05227 FREE_WCS_BUFFERS ();
05228 #endif
05229 return -2;
05230 }
05231
05232 advance:
05233 if (!range)
05234 break;
05235 else if (range > 0)
05236 {
05237 range--;
05238 startpos++;
05239 }
05240 else
05241 {
05242 range++;
05243 startpos--;
05244 }
05245 }
05246 #ifdef WCHAR
05247 FREE_WCS_BUFFERS ();
05248 #endif
05249 return -1;
05250 }
05251
05252 #ifdef WCHAR
05253
05254
05255
05256
05257 # define POINTER_TO_OFFSET(ptr) \
05258 (FIRST_STRING_P (ptr) \
05259 ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0)) \
05260 : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0) \
05261 + csize1)))
05262 #else
05263
05264
05265 # define POINTER_TO_OFFSET(ptr) \
05266 (FIRST_STRING_P (ptr) \
05267 ? ((regoff_t) ((ptr) - string1)) \
05268 : ((regoff_t) ((ptr) - string2 + size1)))
05269 #endif
05270
05271
05272
05273 #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
05274
05275
05276
05277 #define PREFETCH() \
05278 while (d == dend) \
05279 { \
05280 \
05281 if (dend == end_match_2) \
05282 goto fail; \
05283 \
05284 d = string2; \
05285 dend = end_match_2; \
05286 }
05287
05288
05289
05290 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
05291 #define AT_STRINGS_END(d) ((d) == end2)
05292
05293
05294
05295
05296
05297
05298 #ifdef WCHAR
05299
05300 # define WORDCHAR_P(d) \
05301 (iswalnum ((wint_t)((d) == end1 ? *string2 \
05302 : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0 \
05303 || ((d) == end1 ? *string2 \
05304 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
05305 #else
05306 # define WORDCHAR_P(d) \
05307 (SYNTAX ((d) == end1 ? *string2 \
05308 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
05309 == Sword)
05310 #endif
05311
05312
05313 #if 0
05314
05315
05316 #define AT_WORD_BOUNDARY(d) \
05317 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
05318 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
05319 #endif
05320
05321
05322 #ifdef MATCH_MAY_ALLOCATE
05323 # ifdef WCHAR
05324 # define FREE_VARIABLES() \
05325 do { \
05326 REGEX_FREE_STACK (fail_stack.stack); \
05327 FREE_VAR (regstart); \
05328 FREE_VAR (regend); \
05329 FREE_VAR (old_regstart); \
05330 FREE_VAR (old_regend); \
05331 FREE_VAR (best_regstart); \
05332 FREE_VAR (best_regend); \
05333 FREE_VAR (reg_info); \
05334 FREE_VAR (reg_dummy); \
05335 FREE_VAR (reg_info_dummy); \
05336 if (!cant_free_wcs_buf) \
05337 { \
05338 FREE_VAR (string1); \
05339 FREE_VAR (string2); \
05340 FREE_VAR (mbs_offset1); \
05341 FREE_VAR (mbs_offset2); \
05342 } \
05343 } while (0)
05344 # else
05345 # define FREE_VARIABLES() \
05346 do { \
05347 REGEX_FREE_STACK (fail_stack.stack); \
05348 FREE_VAR (regstart); \
05349 FREE_VAR (regend); \
05350 FREE_VAR (old_regstart); \
05351 FREE_VAR (old_regend); \
05352 FREE_VAR (best_regstart); \
05353 FREE_VAR (best_regend); \
05354 FREE_VAR (reg_info); \
05355 FREE_VAR (reg_dummy); \
05356 FREE_VAR (reg_info_dummy); \
05357 } while (0)
05358 # endif
05359 #else
05360 # ifdef WCHAR
05361 # define FREE_VARIABLES() \
05362 do { \
05363 if (!cant_free_wcs_buf) \
05364 { \
05365 FREE_VAR (string1); \
05366 FREE_VAR (string2); \
05367 FREE_VAR (mbs_offset1); \
05368 FREE_VAR (mbs_offset2); \
05369 } \
05370 } while (0)
05371 # else
05372 # define FREE_VARIABLES() ((void)0)
05373 # endif
05374 #endif
05375
05376
05377
05378
05379
05380
05381
05382
05383 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
05384 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
05385
05386 #else
05387
05388
05389 #ifndef emacs
05390
05391
05392 int
05393 re_match (struct re_pattern_buffer *bufp, const char *string,
05394 int size, int pos, struct re_registers *regs)
05395 {
05396 int result;
05397 # ifdef MBS_SUPPORT
05398 if (MB_CUR_MAX != 1)
05399 result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
05400 pos, regs, size,
05401 NULL, 0, NULL, 0, NULL, NULL);
05402 else
05403 # endif
05404 result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
05405 pos, regs, size);
05406 # ifndef REGEX_MALLOC
05407 # ifdef C_ALLOCA
05408 alloca (0);
05409 # endif
05410 # endif
05411 return result;
05412 }
05413 # ifdef _LIBC
05414 weak_alias (__re_match, re_match)
05415 # endif
05416 #endif
05417
05418 #endif
05419
05420 #ifdef INSIDE_RECURSION
05421 static boolean PREFIX(group_match_null_string_p) (UCHAR_T **p,
05422 UCHAR_T *end,
05423 PREFIX(register_info_type) *reg_info);
05424 static boolean PREFIX(alt_match_null_string_p) (UCHAR_T *p,
05425 UCHAR_T *end,
05426 PREFIX(register_info_type) *reg_info);
05427 static boolean PREFIX(common_op_match_null_string_p) (UCHAR_T **p,
05428 UCHAR_T *end,
05429 PREFIX(register_info_type) *reg_info);
05430 static int PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2,
05431 int len, char *translate);
05432 #else
05433
05434
05435
05436
05437
05438
05439
05440
05441
05442
05443
05444
05445
05446
05447 int
05448 re_match_2 (struct re_pattern_buffer *bufp, const char *string1, int size1,
05449 const char *string2, int size2, int pos,
05450 struct re_registers *regs, int stop)
05451 {
05452 int result;
05453 # ifdef MBS_SUPPORT
05454 if (MB_CUR_MAX != 1)
05455 result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
05456 pos, regs, stop,
05457 NULL, 0, NULL, 0, NULL, NULL);
05458 else
05459 # endif
05460 result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
05461 pos, regs, stop);
05462
05463 #ifndef REGEX_MALLOC
05464 # ifdef C_ALLOCA
05465 alloca (0);
05466 # endif
05467 #endif
05468 return result;
05469 }
05470 #ifdef _LIBC
05471 weak_alias (__re_match_2, re_match_2)
05472 #endif
05473
05474 #endif
05475
05476 #ifdef INSIDE_RECURSION
05477
05478 #ifdef WCHAR
05479 static int count_mbs_length (int *, int);
05480
05481
05482
05483
05484
05485
05486 static int
05487 count_mbs_length(int *offset_buffer, int length)
05488 {
05489 int upper, lower;
05490
05491
05492 if (length < 0)
05493 return -1;
05494
05495 if (offset_buffer == NULL)
05496 return 0;
05497
05498
05499
05500 if (offset_buffer[length] == length)
05501 return length;
05502
05503
05504 upper = length;
05505 lower = 0;
05506
05507 while (true)
05508 {
05509 int middle = (lower + upper) / 2;
05510 if (middle == lower || middle == upper)
05511 break;
05512 if (offset_buffer[middle] > length)
05513 upper = middle;
05514 else if (offset_buffer[middle] < length)
05515 lower = middle;
05516 else
05517 return middle;
05518 }
05519
05520 return -1;
05521 }
05522 #endif
05523
05524
05525
05526 #ifdef WCHAR
05527 static int
05528 wcs_re_match_2_internal (struct re_pattern_buffer *bufp,
05529 const char *cstring1, int csize1,
05530 const char *cstring2, int csize2,
05531 int pos,
05532 struct re_registers *regs,
05533 int stop,
05534
05535
05536
05537 wchar_t *string1, int size1,
05538 wchar_t *string2, int size2,
05539
05540 int *mbs_offset1, int *mbs_offset2)
05541 #else
05542 static int
05543 byte_re_match_2_internal (struct re_pattern_buffer *bufp,
05544 const char *string1, int size1,
05545 const char *string2, int size2,
05546 int pos,
05547 struct re_registers *regs, int stop)
05548 #endif
05549 {
05550
05551 int mcnt;
05552 UCHAR_T *p1;
05553 #ifdef WCHAR
05554
05555 char *is_binary = NULL;
05556
05557 int cant_free_wcs_buf = 1;
05558 #endif
05559
05560
05561 const CHAR_T *end1, *end2;
05562
05563
05564
05565 const CHAR_T *end_match_1, *end_match_2;
05566
05567
05568 const CHAR_T *d, *dend;
05569
05570
05571 #ifdef WCHAR
05572 UCHAR_T *pattern, *p;
05573 register UCHAR_T *pend;
05574 #else
05575 UCHAR_T *p = bufp->buffer;
05576 register UCHAR_T *pend = p + bufp->used;
05577 #endif
05578
05579
05580
05581 UCHAR_T *just_past_start_mem = 0;
05582
05583
05584 RE_TRANSLATE_TYPE translate = bufp->translate;
05585
05586
05587
05588
05589
05590
05591
05592
05593
05594
05595 #ifdef MATCH_MAY_ALLOCATE
05596 PREFIX(fail_stack_type) fail_stack;
05597 #endif
05598 #ifdef DEBUG
05599 static unsigned failure_id;
05600 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
05601 #endif
05602
05603 #ifdef REL_ALLOC
05604
05605
05606 fail_stack_elt_t *failure_stack_ptr;
05607 #endif
05608
05609
05610
05611
05612 size_t num_regs = bufp->re_nsub + 1;
05613
05614
05615 active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
05616 active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
05617
05618
05619
05620
05621
05622
05623
05624
05625 #ifdef MATCH_MAY_ALLOCATE
05626 const CHAR_T **regstart, **regend;
05627 #endif
05628
05629
05630
05631
05632
05633
05634 #ifdef MATCH_MAY_ALLOCATE
05635 const CHAR_T **old_regstart, **old_regend;
05636 #endif
05637
05638
05639
05640
05641
05642
05643
05644 #ifdef MATCH_MAY_ALLOCATE
05645 PREFIX(register_info_type) *reg_info;
05646 #endif
05647
05648
05649
05650
05651
05652 unsigned best_regs_set = false;
05653 #ifdef MATCH_MAY_ALLOCATE
05654 const CHAR_T **best_regstart, **best_regend;
05655 #endif
05656
05657
05658
05659
05660
05661
05662
05663
05664
05665 const CHAR_T *match_end = NULL;
05666
05667
05668 int set_regs_matched_done = 0;
05669
05670
05671 #ifdef MATCH_MAY_ALLOCATE
05672 const CHAR_T **reg_dummy;
05673 PREFIX(register_info_type) *reg_info_dummy;
05674 #endif
05675
05676 #ifdef DEBUG
05677
05678 unsigned num_regs_pushed = 0;
05679 #endif
05680
05681 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
05682
05683 INIT_FAIL_STACK ();
05684
05685 #ifdef MATCH_MAY_ALLOCATE
05686
05687
05688
05689
05690
05691 if (bufp->re_nsub)
05692 {
05693 regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
05694 regend = REGEX_TALLOC (num_regs, const CHAR_T *);
05695 old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
05696 old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
05697 best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
05698 best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
05699 reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
05700 reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
05701 reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
05702
05703 if (!(regstart && regend && old_regstart && old_regend && reg_info
05704 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
05705 {
05706 FREE_VARIABLES ();
05707 return -2;
05708 }
05709 }
05710 else
05711 {
05712
05713
05714 regstart = regend = old_regstart = old_regend = best_regstart
05715 = best_regend = reg_dummy = NULL;
05716 reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
05717 }
05718 #endif
05719
05720
05721 #ifdef WCHAR
05722 if (pos < 0 || pos > csize1 + csize2)
05723 #else
05724 if (pos < 0 || pos > size1 + size2)
05725 #endif
05726 {
05727 FREE_VARIABLES ();
05728 return -1;
05729 }
05730
05731 #ifdef WCHAR
05732
05733
05734 if (string1 == NULL && string2 == NULL)
05735 {
05736
05737
05738
05739 cant_free_wcs_buf = 0;
05740
05741 if (csize1 != 0)
05742 {
05743 string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
05744 mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
05745 is_binary = REGEX_TALLOC (csize1 + 1, char);
05746 if (!string1 || !mbs_offset1 || !is_binary)
05747 {
05748 FREE_VAR (string1);
05749 FREE_VAR (mbs_offset1);
05750 FREE_VAR (is_binary);
05751 return -2;
05752 }
05753 }
05754 if (csize2 != 0)
05755 {
05756 string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
05757 mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
05758 is_binary = REGEX_TALLOC (csize2 + 1, char);
05759 if (!string2 || !mbs_offset2 || !is_binary)
05760 {
05761 FREE_VAR (string1);
05762 FREE_VAR (mbs_offset1);
05763 FREE_VAR (string2);
05764 FREE_VAR (mbs_offset2);
05765 FREE_VAR (is_binary);
05766 return -2;
05767 }
05768 size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
05769 mbs_offset2, is_binary);
05770 string2[size2] = L'\0';
05771 FREE_VAR (is_binary);
05772 }
05773 }
05774
05775
05776
05777 p = pattern = (CHAR_T*)bufp->buffer;
05778 pend = (CHAR_T*)(bufp->buffer + bufp->used);
05779
05780 #endif
05781
05782
05783
05784
05785 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
05786 {
05787 regstart[mcnt] = regend[mcnt]
05788 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
05789
05790 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
05791 IS_ACTIVE (reg_info[mcnt]) = 0;
05792 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
05793 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
05794 }
05795
05796
05797
05798 if (size2 == 0 && string1 != NULL)
05799 {
05800 string2 = string1;
05801 size2 = size1;
05802 string1 = 0;
05803 size1 = 0;
05804 #ifdef WCHAR
05805 mbs_offset2 = mbs_offset1;
05806 csize2 = csize1;
05807 mbs_offset1 = NULL;
05808 csize1 = 0;
05809 #endif
05810 }
05811 end1 = string1 + size1;
05812 end2 = string2 + size2;
05813
05814
05815 #ifdef WCHAR
05816 if (stop <= csize1)
05817 {
05818 mcnt = count_mbs_length(mbs_offset1, stop);
05819 end_match_1 = string1 + mcnt;
05820 end_match_2 = string2;
05821 }
05822 else
05823 {
05824 if (stop > csize1 + csize2)
05825 stop = csize1 + csize2;
05826 end_match_1 = end1;
05827 mcnt = count_mbs_length(mbs_offset2, stop-csize1);
05828 end_match_2 = string2 + mcnt;
05829 }
05830 if (mcnt < 0)
05831 {
05832 FREE_VARIABLES ();
05833 return -1;
05834 }
05835 #else
05836 if (stop <= size1)
05837 {
05838 end_match_1 = string1 + stop;
05839 end_match_2 = string2;
05840 }
05841 else
05842 {
05843 end_match_1 = end1;
05844 end_match_2 = string2 + stop - size1;
05845 }
05846 #endif
05847
05848
05849
05850
05851
05852
05853
05854 #ifdef WCHAR
05855 if (size1 > 0 && pos <= csize1)
05856 {
05857 mcnt = count_mbs_length(mbs_offset1, pos);
05858 d = string1 + mcnt;
05859 dend = end_match_1;
05860 }
05861 else
05862 {
05863 mcnt = count_mbs_length(mbs_offset2, pos-csize1);
05864 d = string2 + mcnt;
05865 dend = end_match_2;
05866 }
05867
05868 if (mcnt < 0)
05869 {
05870 FREE_VARIABLES ();
05871 return -1;
05872 }
05873 #else
05874 if (size1 > 0 && pos <= size1)
05875 {
05876 d = string1 + pos;
05877 dend = end_match_1;
05878 }
05879 else
05880 {
05881 d = string2 + pos - size1;
05882 dend = end_match_2;
05883 }
05884 #endif
05885
05886 DEBUG_PRINT1 ("The compiled pattern is:\n");
05887 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
05888 DEBUG_PRINT1 ("The string to match is: `");
05889 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
05890 DEBUG_PRINT1 ("'\n");
05891
05892
05893
05894
05895 for (;;)
05896 {
05897 #ifdef _LIBC
05898 DEBUG_PRINT2 ("\n%p: ", p);
05899 #else
05900 DEBUG_PRINT2 ("\n0x%x: ", p);
05901 #endif
05902
05903 if (p == pend)
05904 {
05905 DEBUG_PRINT1 ("end of pattern ... ");
05906
05907
05908
05909 if (d != end_match_2)
05910 {
05911
05912
05913 boolean same_str_p = (FIRST_STRING_P (match_end)
05914 == MATCHING_IN_FIRST_STRING);
05915
05916 boolean best_match_p;
05917
05918
05919
05920 if (same_str_p)
05921 best_match_p = d > match_end;
05922 else
05923 best_match_p = !MATCHING_IN_FIRST_STRING;
05924
05925 DEBUG_PRINT1 ("backtracking.\n");
05926
05927 if (!FAIL_STACK_EMPTY ())
05928 {
05929
05930
05931 if (!best_regs_set || best_match_p)
05932 {
05933 best_regs_set = true;
05934 match_end = d;
05935
05936 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
05937
05938 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
05939 {
05940 best_regstart[mcnt] = regstart[mcnt];
05941 best_regend[mcnt] = regend[mcnt];
05942 }
05943 }
05944 goto fail;
05945 }
05946
05947
05948
05949
05950 else if (best_regs_set && !best_match_p)
05951 {
05952 restore_best_regs:
05953
05954
05955
05956
05957
05958 DEBUG_PRINT1 ("Restoring best registers.\n");
05959
05960 d = match_end;
05961 dend = ((d >= string1 && d <= end1)
05962 ? end_match_1 : end_match_2);
05963
05964 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
05965 {
05966 regstart[mcnt] = best_regstart[mcnt];
05967 regend[mcnt] = best_regend[mcnt];
05968 }
05969 }
05970 }
05971
05972 succeed_label:
05973 DEBUG_PRINT1 ("Accepting match.\n");
05974
05975 if (regs && !bufp->no_sub)
05976 {
05977
05978 if (bufp->regs_allocated == REGS_UNALLOCATED)
05979 {
05980
05981
05982 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
05983 regs->start = TALLOC (regs->num_regs, regoff_t);
05984 regs->end = TALLOC (regs->num_regs, regoff_t);
05985 if (regs->start == NULL || regs->end == NULL)
05986 {
05987 FREE_VARIABLES ();
05988 return -2;
05989 }
05990 bufp->regs_allocated = REGS_REALLOCATE;
05991 }
05992 else if (bufp->regs_allocated == REGS_REALLOCATE)
05993 {
05994
05995
05996 if (regs->num_regs < num_regs + 1)
05997 {
05998 regs->num_regs = num_regs + 1;
05999 RETALLOC (regs->start, regs->num_regs, regoff_t);
06000 RETALLOC (regs->end, regs->num_regs, regoff_t);
06001 if (regs->start == NULL || regs->end == NULL)
06002 {
06003 FREE_VARIABLES ();
06004 return -2;
06005 }
06006 }
06007 }
06008 else
06009 {
06010
06011
06012 assert (bufp->regs_allocated == REGS_FIXED);
06013 }
06014
06015
06016
06017
06018 if (regs->num_regs > 0)
06019 {
06020 regs->start[0] = pos;
06021 #ifdef WCHAR
06022 if (MATCHING_IN_FIRST_STRING)
06023 regs->end[0] = mbs_offset1 != NULL ?
06024 mbs_offset1[d-string1] : 0;
06025 else
06026 regs->end[0] = csize1 + (mbs_offset2 != NULL ?
06027 mbs_offset2[d-string2] : 0);
06028 #else
06029 regs->end[0] = (MATCHING_IN_FIRST_STRING
06030 ? ((regoff_t) (d - string1))
06031 : ((regoff_t) (d - string2 + size1)));
06032 #endif
06033 }
06034
06035
06036
06037 for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
06038 mcnt++)
06039 {
06040 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
06041 regs->start[mcnt] = regs->end[mcnt] = -1;
06042 else
06043 {
06044 regs->start[mcnt]
06045 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
06046 regs->end[mcnt]
06047 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
06048 }
06049 }
06050
06051
06052
06053
06054
06055
06056 for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
06057 regs->start[mcnt] = regs->end[mcnt] = -1;
06058 }
06059
06060 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
06061 nfailure_points_pushed, nfailure_points_popped,
06062 nfailure_points_pushed - nfailure_points_popped);
06063 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
06064
06065 #ifdef WCHAR
06066 if (MATCHING_IN_FIRST_STRING)
06067 mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
06068 else
06069 mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
06070 csize1;
06071 mcnt -= pos;
06072 #else
06073 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
06074 ? string1
06075 : string2 - size1);
06076 #endif
06077
06078 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
06079
06080 FREE_VARIABLES ();
06081 return mcnt;
06082 }
06083
06084
06085 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
06086 {
06087
06088
06089 case no_op:
06090 DEBUG_PRINT1 ("EXECUTING no_op.\n");
06091 break;
06092
06093 case succeed:
06094 DEBUG_PRINT1 ("EXECUTING succeed.\n");
06095 goto succeed_label;
06096
06097
06098
06099
06100 case exactn:
06101 #ifdef MBS_SUPPORT
06102 case exactn_bin:
06103 #endif
06104 mcnt = *p++;
06105 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
06106
06107
06108
06109 if (translate)
06110 {
06111 do
06112 {
06113 PREFETCH ();
06114 #ifdef WCHAR
06115 if (*d <= 0xff)
06116 {
06117 if ((UCHAR_T) translate[(unsigned char) *d++]
06118 != (UCHAR_T) *p++)
06119 goto fail;
06120 }
06121 else
06122 {
06123 if (*d++ != (CHAR_T) *p++)
06124 goto fail;
06125 }
06126 #else
06127 if ((UCHAR_T) translate[(unsigned char) *d++]
06128 != (UCHAR_T) *p++)
06129 goto fail;
06130 #endif
06131 }
06132 while (--mcnt);
06133 }
06134 else
06135 {
06136 do
06137 {
06138 PREFETCH ();
06139 if (*d++ != (CHAR_T) *p++) goto fail;
06140 }
06141 while (--mcnt);
06142 }
06143 SET_REGS_MATCHED ();
06144 break;
06145
06146
06147
06148 case anychar:
06149 DEBUG_PRINT1 ("EXECUTING anychar.\n");
06150
06151 PREFETCH ();
06152
06153 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
06154 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
06155 goto fail;
06156
06157 SET_REGS_MATCHED ();
06158 DEBUG_PRINT2 (" Matched `%ld'.\n", (long int) *d);
06159 d++;
06160 break;
06161
06162
06163 case charset:
06164 case charset_not:
06165 {
06166 register UCHAR_T c;
06167 #ifdef WCHAR
06168 unsigned int i, char_class_length, coll_symbol_length,
06169 equiv_class_length, ranges_length, chars_length, length;
06170 CHAR_T *workp, *workp2, *charset_top;
06171 #define WORK_BUFFER_SIZE 128
06172 CHAR_T str_buf[WORK_BUFFER_SIZE];
06173 # ifdef _LIBC
06174 uint32_t nrules;
06175 # endif
06176 #endif
06177 boolean negate = (re_opcode_t) *(p - 1) == charset_not;
06178
06179 DEBUG_PRINT2 ("EXECUTING charset%s.\n", negate ? "_not" : "");
06180 PREFETCH ();
06181 c = TRANSLATE (*d);
06182 #ifdef WCHAR
06183 # ifdef _LIBC
06184 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
06185 # endif
06186 charset_top = p - 1;
06187 char_class_length = *p++;
06188 coll_symbol_length = *p++;
06189 equiv_class_length = *p++;
06190 ranges_length = *p++;
06191 chars_length = *p++;
06192
06193
06194
06195
06196
06197 workp = p;
06198
06199 p += char_class_length + coll_symbol_length+ equiv_class_length +
06200 2*ranges_length + chars_length;
06201
06202
06203 for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
06204 {
06205 wctype_t wctype;
06206 uintptr_t alignedp = ((uintptr_t)workp
06207 + __alignof__(wctype_t) - 1)
06208 & ~(uintptr_t)(__alignof__(wctype_t) - 1);
06209 wctype = *((wctype_t*)alignedp);
06210 workp += CHAR_CLASS_SIZE;
06211 # ifdef _LIBC
06212 if (__iswctype((wint_t)c, wctype))
06213 goto char_set_matched;
06214 # else
06215 if (iswctype((wint_t)c, wctype))
06216 goto char_set_matched;
06217 # endif
06218 }
06219
06220
06221 # ifdef _LIBC
06222 if (nrules != 0)
06223 {
06224 const unsigned char *extra = (const unsigned char *)
06225 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
06226
06227 for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
06228 workp++)
06229 {
06230 int32_t *wextra;
06231 wextra = (int32_t*)(extra + *workp++);
06232 for (i = 0; i < *wextra; ++i)
06233 if (TRANSLATE(d[i]) != wextra[1 + i])
06234 break;
06235
06236 if (i == *wextra)
06237 {
06238
06239
06240 d += i - 1;
06241 goto char_set_matched;
06242 }
06243 }
06244 }
06245 else
06246 # endif
06247
06248
06249 {
06250 for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
06251 {
06252 const CHAR_T *backup_d = d, *backup_dend = dend;
06253 # ifdef _LIBC
06254 length = __wcslen (workp);
06255 # else
06256 length = wcslen (workp);
06257 # endif
06258
06259
06260
06261
06262 # ifdef _LIBC
06263 if (__wcscoll (workp, d) > 0)
06264 # else
06265 if (wcscoll (workp, d) > 0)
06266 # endif
06267 {
06268 workp += length + 1;
06269 continue;
06270 }
06271
06272
06273
06274
06275
06276 for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
06277 {
06278 int match;
06279 if (d == dend)
06280 {
06281 if (dend == end_match_2)
06282 break;
06283 d = string2;
06284 dend = end_match_2;
06285 }
06286
06287
06288 str_buf[i] = TRANSLATE(*d);
06289 str_buf[i+1] = '\0';
06290
06291 # ifdef _LIBC
06292 match = __wcscoll (workp, str_buf);
06293 # else
06294 match = wcscoll (workp, str_buf);
06295 # endif
06296 if (match == 0)
06297 goto char_set_matched;
06298
06299 if (match < 0)
06300
06301
06302
06303 break;
06304
06305
06306
06307
06308 }
06309
06310 d = backup_d;
06311 dend = backup_dend;
06312 workp += length + 1;
06313 }
06314 }
06315
06316 # ifdef _LIBC
06317 if (nrules != 0)
06318 {
06319 const CHAR_T *backup_d = d, *backup_dend = dend;
06320
06321
06322 const int32_t *table;
06323 const int32_t *weights;
06324 const int32_t *extra;
06325 const int32_t *indirect;
06326 int32_t idx, idx2;
06327 wint_t *cp;
06328 size_t len;
06329
06330
06331 # include <locale/weightwc.h>
06332
06333 table = (const int32_t *)
06334 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
06335 weights = (const wint_t *)
06336 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
06337 extra = (const wint_t *)
06338 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
06339 indirect = (const int32_t *)
06340 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
06341
06342
06343
06344 idx2 = 0;
06345
06346 for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
06347 {
06348 cp = (wint_t*)str_buf;
06349 if (d == dend)
06350 {
06351 if (dend == end_match_2)
06352 break;
06353 d = string2;
06354 dend = end_match_2;
06355 }
06356 str_buf[i] = TRANSLATE(*(d+i));
06357 str_buf[i+1] = '\0';
06358 idx2 = findidx ((const wint_t**)&cp);
06359 }
06360
06361
06362
06363 d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
06364 if (d >= dend)
06365 {
06366 if (dend == end_match_2)
06367 d = dend;
06368 else
06369 {
06370 d = string2;
06371 dend = end_match_2;
06372 }
06373 }
06374
06375 len = weights[idx2];
06376
06377 for (workp2 = workp + equiv_class_length ; workp < workp2 ;
06378 workp++)
06379 {
06380 idx = (int32_t)*workp;
06381
06382
06383 if (idx2 != 0 && len == weights[idx])
06384 {
06385 int cnt = 0;
06386 while (cnt < len && (weights[idx + 1 + cnt]
06387 == weights[idx2 + 1 + cnt]))
06388 ++cnt;
06389
06390 if (cnt == len)
06391 goto char_set_matched;
06392 }
06393 }
06394
06395 d = backup_d;
06396 dend = backup_dend;
06397 }
06398 else
06399 # endif
06400
06401
06402 {
06403 for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
06404 {
06405 const CHAR_T *backup_d = d, *backup_dend = dend;
06406 # ifdef _LIBC
06407 length = __wcslen (workp);
06408 # else
06409 length = wcslen (workp);
06410 # endif
06411
06412
06413
06414
06415 # ifdef _LIBC
06416 if (__wcscoll (workp, d) > 0)
06417 # else
06418 if (wcscoll (workp, d) > 0)
06419 # endif
06420 {
06421 workp += length + 1;
06422 break;
06423 }
06424
06425
06426
06427
06428
06429 for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
06430 {
06431 int match;
06432 if (d == dend)
06433 {
06434 if (dend == end_match_2)
06435 break;
06436 d = string2;
06437 dend = end_match_2;
06438 }
06439
06440
06441 str_buf[i] = TRANSLATE(*d);
06442 str_buf[i+1] = '\0';
06443
06444 # ifdef _LIBC
06445 match = __wcscoll (workp, str_buf);
06446 # else
06447 match = wcscoll (workp, str_buf);
06448 # endif
06449
06450 if (match == 0)
06451 goto char_set_matched;
06452
06453 if (match < 0)
06454
06455
06456
06457 break;
06458
06459
06460
06461
06462 }
06463
06464 d = backup_d;
06465 dend = backup_dend;
06466 workp += length + 1;
06467 }
06468 }
06469
06470
06471 # ifdef _LIBC
06472 if (nrules != 0)
06473 {
06474 uint32_t collseqval;
06475 const char *collseq = (const char *)
06476 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
06477
06478 collseqval = collseq_table_lookup (collseq, c);
06479
06480 for (; workp < p - chars_length ;)
06481 {
06482 uint32_t start_val, end_val;
06483
06484
06485
06486 start_val = (uint32_t) *workp++;
06487 end_val = (uint32_t) *workp++;
06488
06489 if (start_val <= collseqval && collseqval <= end_val)
06490 goto char_set_matched;
06491 }
06492 }
06493 else
06494 # endif
06495 {
06496
06497
06498 str_buf[1] = 0;
06499 str_buf[2] = c;
06500 str_buf[3] = 0;
06501 str_buf[5] = 0;
06502 for (; workp < p - chars_length ;)
06503 {
06504 wchar_t *range_start_char, *range_end_char;
06505
06506
06507
06508
06509
06510
06511
06512
06513 if (*workp < 0)
06514 range_start_char = charset_top - (*workp++);
06515 else
06516 {
06517 str_buf[0] = *workp++;
06518 range_start_char = str_buf;
06519 }
06520
06521
06522 if (*workp < 0)
06523 range_end_char = charset_top - (*workp++);
06524 else
06525 {
06526 str_buf[4] = *workp++;
06527 range_end_char = str_buf + 4;
06528 }
06529
06530 # ifdef _LIBC
06531 if (__wcscoll (range_start_char, str_buf+2) <= 0
06532 && __wcscoll (str_buf+2, range_end_char) <= 0)
06533 # else
06534 if (wcscoll (range_start_char, str_buf+2) <= 0
06535 && wcscoll (str_buf+2, range_end_char) <= 0)
06536 # endif
06537 goto char_set_matched;
06538 }
06539 }
06540
06541
06542 for (; workp < p ; workp++)
06543 if (c == *workp)
06544 goto char_set_matched;
06545
06546 negate = !negate;
06547
06548 char_set_matched:
06549 if (negate) goto fail;
06550 #else
06551
06552
06553 if (c < (unsigned) (*p * BYTEWIDTH)
06554 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
06555 negate = !negate;
06556
06557 p += 1 + *p;
06558
06559 if (!negate) goto fail;
06560 #undef WORK_BUFFER_SIZE
06561 #endif
06562 SET_REGS_MATCHED ();
06563 d++;
06564 break;
06565 }
06566
06567
06568
06569
06570
06571
06572
06573 case start_memory:
06574 DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
06575 (long int) *p, (long int) p[1]);
06576
06577
06578 p1 = p;
06579
06580 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
06581 REG_MATCH_NULL_STRING_P (reg_info[*p])
06582 = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
06583
06584
06585
06586
06587
06588
06589 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
06590 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
06591 : regstart[*p];
06592 DEBUG_PRINT2 (" old_regstart: %d\n",
06593 POINTER_TO_OFFSET (old_regstart[*p]));
06594
06595 regstart[*p] = d;
06596 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
06597
06598 IS_ACTIVE (reg_info[*p]) = 1;
06599 MATCHED_SOMETHING (reg_info[*p]) = 0;
06600
06601
06602 set_regs_matched_done = 0;
06603
06604
06605 highest_active_reg = *p;
06606
06607
06608
06609 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
06610 lowest_active_reg = *p;
06611
06612
06613 p += 2;
06614 just_past_start_mem = p;
06615
06616 break;
06617
06618
06619
06620
06621
06622 case stop_memory:
06623 DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
06624 (long int) *p, (long int) p[1]);
06625
06626
06627
06628
06629
06630
06631 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
06632 ? REG_UNSET (regend[*p]) ? d : regend[*p]
06633 : regend[*p];
06634 DEBUG_PRINT2 (" old_regend: %d\n",
06635 POINTER_TO_OFFSET (old_regend[*p]));
06636
06637 regend[*p] = d;
06638 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
06639
06640
06641 IS_ACTIVE (reg_info[*p]) = 0;
06642
06643
06644 set_regs_matched_done = 0;
06645
06646
06647
06648 if (lowest_active_reg == highest_active_reg)
06649 {
06650 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
06651 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
06652 }
06653 else
06654 {
06655
06656
06657
06658 UCHAR_T r = *p - 1;
06659 while (r > 0 && !IS_ACTIVE (reg_info[r]))
06660 r--;
06661
06662
06663
06664
06665
06666
06667
06668
06669 if (r == 0)
06670 {
06671 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
06672 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
06673 }
06674 else
06675 highest_active_reg = r;
06676 }
06677
06678
06679
06680
06681
06682
06683 if ((!MATCHED_SOMETHING (reg_info[*p])
06684 || just_past_start_mem == p - 1)
06685 && (p + 2) < pend)
06686 {
06687 boolean is_a_jump_n = false;
06688
06689 p1 = p + 2;
06690 mcnt = 0;
06691 switch ((re_opcode_t) *p1++)
06692 {
06693 case jump_n:
06694 is_a_jump_n = true;
06695 case pop_failure_jump:
06696 case maybe_pop_jump:
06697 case jump:
06698 case dummy_failure_jump:
06699 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
06700 if (is_a_jump_n)
06701 p1 += OFFSET_ADDRESS_SIZE;
06702 break;
06703
06704 default:
06705 ;
06706 }
06707 p1 += mcnt;
06708
06709
06710
06711
06712
06713
06714 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
06715 && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
06716 && p1[2+OFFSET_ADDRESS_SIZE] == *p)
06717 {
06718
06719
06720
06721
06722
06723
06724
06725
06726
06727
06728 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
06729 {
06730 unsigned r;
06731
06732 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
06733
06734
06735 for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
06736 r++)
06737 {
06738 regstart[r] = old_regstart[r];
06739
06740
06741 if (old_regend[r] >= regstart[r])
06742 regend[r] = old_regend[r];
06743 }
06744 }
06745 p1++;
06746 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
06747 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
06748
06749 goto fail;
06750 }
06751 }
06752
06753
06754 p += 2;
06755 break;
06756
06757
06758
06759
06760 case duplicate:
06761 {
06762 register const CHAR_T *d2, *dend2;
06763 int regno = *p++;
06764 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
06765
06766
06767 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
06768 goto fail;
06769
06770
06771 d2 = regstart[regno];
06772
06773
06774
06775
06776
06777
06778 dend2 = ((FIRST_STRING_P (regstart[regno])
06779 == FIRST_STRING_P (regend[regno]))
06780 ? regend[regno] : end_match_1);
06781 for (;;)
06782 {
06783
06784
06785 while (d2 == dend2)
06786 {
06787 if (dend2 == end_match_2) break;
06788 if (dend2 == regend[regno]) break;
06789
06790
06791 d2 = string2;
06792 dend2 = regend[regno];
06793 }
06794
06795 if (d2 == dend2) break;
06796
06797
06798 PREFETCH ();
06799
06800
06801 mcnt = dend - d;
06802
06803
06804
06805 if (mcnt > dend2 - d2)
06806 mcnt = dend2 - d2;
06807
06808
06809
06810 if (translate
06811 ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
06812 : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
06813 goto fail;
06814 d += mcnt, d2 += mcnt;
06815
06816
06817 SET_REGS_MATCHED ();
06818 }
06819 }
06820 break;
06821
06822
06823
06824
06825
06826 case begline:
06827 DEBUG_PRINT1 ("EXECUTING begline.\n");
06828
06829 if (AT_STRINGS_BEG (d))
06830 {
06831 if (!bufp->not_bol) break;
06832 }
06833 else if (d[-1] == '\n' && bufp->newline_anchor)
06834 {
06835 break;
06836 }
06837
06838 goto fail;
06839
06840
06841
06842 case endline:
06843 DEBUG_PRINT1 ("EXECUTING endline.\n");
06844
06845 if (AT_STRINGS_END (d))
06846 {
06847 if (!bufp->not_eol) break;
06848 }
06849
06850
06851 else if ((d == end1 ? *string2 : *d) == '\n'
06852 && bufp->newline_anchor)
06853 {
06854 break;
06855 }
06856 goto fail;
06857
06858
06859
06860 case begbuf:
06861 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
06862 if (AT_STRINGS_BEG (d))
06863 break;
06864 goto fail;
06865
06866
06867
06868 case endbuf:
06869 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
06870 if (AT_STRINGS_END (d))
06871 break;
06872 goto fail;
06873
06874
06875
06876
06877
06878
06879
06880
06881
06882
06883
06884
06885
06886
06887
06888
06889
06890
06891 case on_failure_keep_string_jump:
06892 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
06893
06894 EXTRACT_NUMBER_AND_INCR (mcnt, p);
06895 #ifdef _LIBC
06896 DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
06897 #else
06898 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
06899 #endif
06900
06901 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
06902 break;
06903
06904
06905
06906
06907
06908
06909
06910
06911
06912
06913
06914
06915
06916
06917 case on_failure_jump:
06918 on_failure:
06919 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
06920
06921 EXTRACT_NUMBER_AND_INCR (mcnt, p);
06922 #ifdef _LIBC
06923 DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
06924 #else
06925 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
06926 #endif
06927
06928
06929
06930
06931
06932
06933
06934
06935
06936
06937 p1 = p;
06938
06939
06940
06941
06942
06943 while (p1 < pend && (re_opcode_t) *p1 == no_op)
06944 p1++;
06945
06946 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
06947 {
06948
06949
06950
06951
06952 highest_active_reg = *(p1 + 1) + *(p1 + 2);
06953 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
06954 lowest_active_reg = *(p1 + 1);
06955 }
06956
06957 DEBUG_PRINT1 (":\n");
06958 PUSH_FAILURE_POINT (p + mcnt, d, -2);
06959 break;
06960
06961
06962
06963
06964 case maybe_pop_jump:
06965 EXTRACT_NUMBER_AND_INCR (mcnt, p);
06966 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
06967 {
06968 register UCHAR_T *p2 = p;
06969
06970
06971
06972
06973
06974
06975
06976
06977
06978
06979
06980
06981
06982
06983
06984
06985
06986
06987 while (1)
06988 {
06989 if (p2 + 2 < pend
06990 && ((re_opcode_t) *p2 == stop_memory
06991 || (re_opcode_t) *p2 == start_memory))
06992 p2 += 3;
06993 else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
06994 && (re_opcode_t) *p2 == dummy_failure_jump)
06995 p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
06996 else
06997 break;
06998 }
06999
07000 p1 = p + mcnt;
07001
07002
07003
07004
07005
07006 if (p2 == pend)
07007 {
07008
07009
07010
07011 p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
07012 pop_failure_jump;
07013 DEBUG_PRINT1
07014 (" End of pattern: change to `pop_failure_jump'.\n");
07015 }
07016
07017 else if ((re_opcode_t) *p2 == exactn
07018 #ifdef MBS_SUPPORT
07019 || (re_opcode_t) *p2 == exactn_bin
07020 #endif
07021 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
07022 {
07023 register UCHAR_T c
07024 = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
07025
07026 if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
07027 #ifdef MBS_SUPPORT
07028 || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
07029 #endif
07030 ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
07031 {
07032 p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
07033 pop_failure_jump;
07034 #ifdef WCHAR
07035 DEBUG_PRINT3 (" %C != %C => pop_failure_jump.\n",
07036 (wint_t) c,
07037 (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
07038 #else
07039 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
07040 (char) c,
07041 (char) p1[3+OFFSET_ADDRESS_SIZE]);
07042 #endif
07043 }
07044
07045 #ifndef WCHAR
07046 else if ((re_opcode_t) p1[3] == charset
07047 || (re_opcode_t) p1[3] == charset_not)
07048 {
07049 int negate = (re_opcode_t) p1[3] == charset_not;
07050
07051 if (c < (unsigned) (p1[4] * BYTEWIDTH)
07052 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
07053 negate = !negate;
07054
07055
07056
07057 if (!negate)
07058 {
07059 p[-3] = (unsigned char) pop_failure_jump;
07060 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
07061 }
07062 }
07063 #endif
07064 }
07065 #ifndef WCHAR
07066 else if ((re_opcode_t) *p2 == charset)
07067 {
07068
07069
07070 if ((re_opcode_t) p1[3] == exactn
07071 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
07072 && (p2[2 + p1[5] / BYTEWIDTH]
07073 & (1 << (p1[5] % BYTEWIDTH)))))
07074 {
07075 p[-3] = (unsigned char) pop_failure_jump;
07076 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
07077 }
07078
07079 else if ((re_opcode_t) p1[3] == charset_not)
07080 {
07081 int idx;
07082
07083
07084 for (idx = 0; idx < (int) p2[1]; idx++)
07085 if (! (p2[2 + idx] == 0
07086 || (idx < (int) p1[4]
07087 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
07088 break;
07089
07090 if (idx == p2[1])
07091 {
07092 p[-3] = (unsigned char) pop_failure_jump;
07093 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
07094 }
07095 }
07096 else if ((re_opcode_t) p1[3] == charset)
07097 {
07098 int idx;
07099
07100
07101 for (idx = 0;
07102 idx < (int) p2[1] && idx < (int) p1[4];
07103 idx++)
07104 if ((p2[2 + idx] & p1[5 + idx]) != 0)
07105 break;
07106
07107 if (idx == p2[1] || idx == p1[4])
07108 {
07109 p[-3] = (unsigned char) pop_failure_jump;
07110 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
07111 }
07112 }
07113 }
07114 #endif
07115 }
07116 p -= OFFSET_ADDRESS_SIZE;
07117 if ((re_opcode_t) p[-1] != pop_failure_jump)
07118 {
07119 p[-1] = (UCHAR_T) jump;
07120 DEBUG_PRINT1 (" Match => jump.\n");
07121 goto unconditional_jump;
07122 }
07123
07124
07125
07126
07127
07128
07129
07130
07131
07132 case pop_failure_jump:
07133 {
07134
07135
07136
07137
07138
07139 active_reg_t dummy_low_reg, dummy_high_reg;
07140 UCHAR_T *pdummy = NULL;
07141 const CHAR_T *sdummy = NULL;
07142
07143 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
07144 POP_FAILURE_POINT (sdummy, pdummy,
07145 dummy_low_reg, dummy_high_reg,
07146 reg_dummy, reg_dummy, reg_info_dummy);
07147 }
07148
07149
07150 unconditional_jump:
07151 #ifdef _LIBC
07152 DEBUG_PRINT2 ("\n%p: ", p);
07153 #else
07154 DEBUG_PRINT2 ("\n0x%x: ", p);
07155 #endif
07156
07157
07158
07159 case jump:
07160 EXTRACT_NUMBER_AND_INCR (mcnt, p);
07161 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
07162 p += mcnt;
07163 #ifdef _LIBC
07164 DEBUG_PRINT2 ("(to %p).\n", p);
07165 #else
07166 DEBUG_PRINT2 ("(to 0x%x).\n", p);
07167 #endif
07168 break;
07169
07170
07171
07172
07173 case jump_past_alt:
07174 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
07175 goto unconditional_jump;
07176
07177
07178
07179
07180
07181
07182
07183 case dummy_failure_jump:
07184 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
07185
07186
07187 PUSH_FAILURE_POINT (NULL, NULL, -2);
07188 goto unconditional_jump;
07189
07190
07191
07192
07193
07194
07195
07196 case push_dummy_failure:
07197 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
07198
07199
07200 PUSH_FAILURE_POINT (NULL, NULL, -2);
07201 break;
07202
07203
07204
07205 case succeed_n:
07206 EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
07207 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
07208
07209 assert (mcnt >= 0);
07210
07211 if (mcnt > 0)
07212 {
07213 mcnt--;
07214 p += OFFSET_ADDRESS_SIZE;
07215 STORE_NUMBER_AND_INCR (p, mcnt);
07216 #ifdef _LIBC
07217 DEBUG_PRINT3 (" Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
07218 , mcnt);
07219 #else
07220 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
07221 , mcnt);
07222 #endif
07223 }
07224 else if (mcnt == 0)
07225 {
07226 #ifdef _LIBC
07227 DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n",
07228 p + OFFSET_ADDRESS_SIZE);
07229 #else
07230 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n",
07231 p + OFFSET_ADDRESS_SIZE);
07232 #endif
07233
07234 #ifdef WCHAR
07235 p[1] = (UCHAR_T) no_op;
07236 #else
07237 p[2] = (UCHAR_T) no_op;
07238 p[3] = (UCHAR_T) no_op;
07239 #endif
07240 goto on_failure;
07241 }
07242 break;
07243
07244 case jump_n:
07245 EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
07246 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
07247
07248
07249 if (mcnt)
07250 {
07251 mcnt--;
07252 STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
07253
07254 #ifdef _LIBC
07255 DEBUG_PRINT3 (" Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
07256 mcnt);
07257 #else
07258 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
07259 mcnt);
07260 #endif
07261 goto unconditional_jump;
07262 }
07263
07264 else
07265 p += 2 * OFFSET_ADDRESS_SIZE;
07266 break;
07267
07268 case set_number_at:
07269 {
07270 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
07271
07272 EXTRACT_NUMBER_AND_INCR (mcnt, p);
07273 p1 = p + mcnt;
07274 EXTRACT_NUMBER_AND_INCR (mcnt, p);
07275 #ifdef _LIBC
07276 DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
07277 #else
07278 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
07279 #endif
07280 STORE_NUMBER (p1, mcnt);
07281 break;
07282 }
07283
07284 #if 0
07285
07286
07287
07288
07289
07290 case wordbound:
07291 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
07292 if (AT_WORD_BOUNDARY (d))
07293 break;
07294 goto fail;
07295
07296 case notwordbound:
07297 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
07298 if (AT_WORD_BOUNDARY (d))
07299 goto fail;
07300 break;
07301 #else
07302 case wordbound:
07303 {
07304 boolean prevchar, thischar;
07305
07306 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
07307 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
07308 break;
07309
07310 prevchar = WORDCHAR_P (d - 1);
07311 thischar = WORDCHAR_P (d);
07312 if (prevchar != thischar)
07313 break;
07314 goto fail;
07315 }
07316
07317 case notwordbound:
07318 {
07319 boolean prevchar, thischar;
07320
07321 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
07322 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
07323 goto fail;
07324
07325 prevchar = WORDCHAR_P (d - 1);
07326 thischar = WORDCHAR_P (d);
07327 if (prevchar != thischar)
07328 goto fail;
07329 break;
07330 }
07331 #endif
07332
07333 case wordbeg:
07334 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
07335 if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
07336 && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
07337 break;
07338 goto fail;
07339
07340 case wordend:
07341 DEBUG_PRINT1 ("EXECUTING wordend.\n");
07342 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
07343 && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
07344 break;
07345 goto fail;
07346
07347 #ifdef emacs
07348 case before_dot:
07349 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
07350 if (PTR_CHAR_POS ((unsigned char *) d) >= point)
07351 goto fail;
07352 break;
07353
07354 case at_dot:
07355 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
07356 if (PTR_CHAR_POS ((unsigned char *) d) != point)
07357 goto fail;
07358 break;
07359
07360 case after_dot:
07361 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
07362 if (PTR_CHAR_POS ((unsigned char *) d) <= point)
07363 goto fail;
07364 break;
07365
07366 case syntaxspec:
07367 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
07368 mcnt = *p++;
07369 goto matchsyntax;
07370
07371 case wordchar:
07372 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
07373 mcnt = (int) Sword;
07374 matchsyntax:
07375 PREFETCH ();
07376
07377 d++;
07378 if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
07379 goto fail;
07380 SET_REGS_MATCHED ();
07381 break;
07382
07383 case notsyntaxspec:
07384 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
07385 mcnt = *p++;
07386 goto matchnotsyntax;
07387
07388 case notwordchar:
07389 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
07390 mcnt = (int) Sword;
07391 matchnotsyntax:
07392 PREFETCH ();
07393
07394 d++;
07395 if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
07396 goto fail;
07397 SET_REGS_MATCHED ();
07398 break;
07399
07400 #else
07401 case wordchar:
07402 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
07403 PREFETCH ();
07404 if (!WORDCHAR_P (d))
07405 goto fail;
07406 SET_REGS_MATCHED ();
07407 d++;
07408 break;
07409
07410 case notwordchar:
07411 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
07412 PREFETCH ();
07413 if (WORDCHAR_P (d))
07414 goto fail;
07415 SET_REGS_MATCHED ();
07416 d++;
07417 break;
07418 #endif
07419
07420 default:
07421 abort ();
07422 }
07423 continue;
07424
07425
07426
07427 fail:
07428 if (!FAIL_STACK_EMPTY ())
07429 {
07430 DEBUG_PRINT1 ("\nFAIL:\n");
07431 POP_FAILURE_POINT (d, p,
07432 lowest_active_reg, highest_active_reg,
07433 regstart, regend, reg_info);
07434
07435
07436 if (!p)
07437 goto fail;
07438
07439
07440 assert (p <= pend);
07441 if (p < pend)
07442 {
07443 boolean is_a_jump_n = false;
07444
07445
07446
07447 switch ((re_opcode_t) *p)
07448 {
07449 case jump_n:
07450 is_a_jump_n = true;
07451 case maybe_pop_jump:
07452 case pop_failure_jump:
07453 case jump:
07454 p1 = p + 1;
07455 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07456 p1 += mcnt;
07457
07458 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
07459 || (!is_a_jump_n
07460 && (re_opcode_t) *p1 == on_failure_jump))
07461 goto fail;
07462 break;
07463 default:
07464 ;
07465 }
07466 }
07467
07468 if (d >= string1 && d <= end1)
07469 dend = end_match_1;
07470 }
07471 else
07472 break;
07473 }
07474
07475 if (best_regs_set)
07476 goto restore_best_regs;
07477
07478 FREE_VARIABLES ();
07479
07480 return -1;
07481 }
07482
07483
07484
07485
07486
07487
07488
07489
07490
07491
07492
07493
07494
07495
07496 static boolean
07497 PREFIX(group_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
07498 PREFIX(register_info_type) *reg_info)
07499 {
07500 int mcnt;
07501
07502 UCHAR_T *p1 = *p + 2;
07503
07504 while (p1 < end)
07505 {
07506
07507
07508
07509
07510 switch ((re_opcode_t) *p1)
07511 {
07512
07513 case on_failure_jump:
07514 p1++;
07515 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07516
07517
07518
07519
07520 if (mcnt >= 0)
07521 {
07522
07523
07524
07525
07526
07527
07528
07529
07530
07531
07532
07533
07534
07535
07536
07537
07538
07539
07540 while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
07541 jump_past_alt)
07542 {
07543
07544
07545
07546
07547 if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
07548 (1 + OFFSET_ADDRESS_SIZE),
07549 reg_info))
07550 return false;
07551
07552
07553
07554 p1 += mcnt;
07555
07556
07557
07558 if ((re_opcode_t) *p1 != on_failure_jump)
07559 break;
07560
07561
07562
07563 p1++;
07564 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07565 if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
07566 jump_past_alt)
07567 {
07568
07569 p1 -= 1 + OFFSET_ADDRESS_SIZE;
07570 break;
07571 }
07572 }
07573
07574
07575
07576
07577 EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
07578
07579 if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
07580 return false;
07581
07582 p1 += mcnt;
07583 }
07584 break;
07585
07586
07587 case stop_memory:
07588 assert (p1[1] == **p);
07589 *p = p1 + 2;
07590 return true;
07591
07592
07593 default:
07594 if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
07595 return false;
07596 }
07597 }
07598
07599 return false;
07600 }
07601
07602
07603
07604
07605
07606
07607 static boolean
07608 PREFIX(alt_match_null_string_p) (UCHAR_T *p, UCHAR_T *end,
07609 PREFIX(register_info_type) *reg_info)
07610 {
07611 int mcnt;
07612 UCHAR_T *p1 = p;
07613
07614 while (p1 < end)
07615 {
07616
07617
07618
07619 switch ((re_opcode_t) *p1)
07620 {
07621
07622 case on_failure_jump:
07623 p1++;
07624 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07625 p1 += mcnt;
07626 break;
07627
07628 default:
07629 if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
07630 return false;
07631 }
07632 }
07633
07634 return true;
07635 }
07636
07637
07638
07639
07640
07641
07642
07643 static boolean
07644 PREFIX(common_op_match_null_string_p) (UCHAR_T **p, UCHAR_T *end,
07645 PREFIX(register_info_type) *reg_info)
07646 {
07647 int mcnt;
07648 boolean ret;
07649 int reg_no;
07650 UCHAR_T *p1 = *p;
07651
07652 switch ((re_opcode_t) *p1++)
07653 {
07654 case no_op:
07655 case begline:
07656 case endline:
07657 case begbuf:
07658 case endbuf:
07659 case wordbeg:
07660 case wordend:
07661 case wordbound:
07662 case notwordbound:
07663 #ifdef emacs
07664 case before_dot:
07665 case at_dot:
07666 case after_dot:
07667 #endif
07668 break;
07669
07670 case start_memory:
07671 reg_no = *p1;
07672 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
07673 ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
07674
07675
07676
07677
07678 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
07679 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
07680
07681 if (!ret)
07682 return false;
07683 break;
07684
07685
07686 case jump:
07687 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07688 if (mcnt >= 0)
07689 p1 += mcnt;
07690 else
07691 return false;
07692 break;
07693
07694 case succeed_n:
07695
07696 p1 += OFFSET_ADDRESS_SIZE;
07697 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07698
07699 if (mcnt == 0)
07700 {
07701 p1 -= 2 * OFFSET_ADDRESS_SIZE;
07702 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07703 p1 += mcnt;
07704 }
07705 else
07706 return false;
07707 break;
07708
07709 case duplicate:
07710 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
07711 return false;
07712 break;
07713
07714 case set_number_at:
07715 p1 += 2 * OFFSET_ADDRESS_SIZE;
07716
07717 default:
07718
07719 return false;
07720 }
07721
07722 *p = p1;
07723 return true;
07724 }
07725
07726
07727
07728
07729
07730 static int
07731 PREFIX(bcmp_translate) (const CHAR_T *s1, const CHAR_T *s2, register int len,
07732 RE_TRANSLATE_TYPE translate)
07733 {
07734 register const UCHAR_T *p1 = (const UCHAR_T *) s1;
07735 register const UCHAR_T *p2 = (const UCHAR_T *) s2;
07736 while (len)
07737 {
07738 #ifdef WCHAR
07739 if (((*p1<=0xff)?translate[*p1++]:*p1++)
07740 != ((*p2<=0xff)?translate[*p2++]:*p2++))
07741 return 1;
07742 #else
07743 if (translate[*p1++] != translate[*p2++]) return 1;
07744 #endif
07745 len--;
07746 }
07747 return 0;
07748 }
07749
07750
07751 #else
07752
07753
07754
07755
07756
07757
07758
07759
07760
07761
07762
07763
07764 const char *
07765 re_compile_pattern (const char *pattern, size_t length,
07766 struct re_pattern_buffer *bufp)
07767 {
07768 reg_errcode_t ret;
07769
07770
07771
07772 bufp->regs_allocated = REGS_UNALLOCATED;
07773
07774
07775
07776
07777 bufp->no_sub = 0;
07778
07779
07780 bufp->newline_anchor = 1;
07781
07782 # ifdef MBS_SUPPORT
07783 if (MB_CUR_MAX != 1)
07784 ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
07785 else
07786 # endif
07787 ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
07788
07789 if (!ret)
07790 return NULL;
07791 return gettext (re_error_msgid[(int) ret]);
07792 }
07793 #ifdef _LIBC
07794 weak_alias (__re_compile_pattern, re_compile_pattern)
07795 #endif
07796
07797
07798
07799
07800 #if defined _REGEX_RE_COMP || defined _LIBC
07801
07802
07803 static struct re_pattern_buffer re_comp_buf;
07804
07805 char *
07806 #ifdef _LIBC
07807
07808
07809
07810 weak_function
07811 #endif
07812 re_comp (const char *s)
07813 {
07814 reg_errcode_t ret;
07815
07816 if (!s)
07817 {
07818 if (!re_comp_buf.buffer)
07819 return (char *) gettext ("No previous regular expression");
07820 return 0;
07821 }
07822
07823 if (!re_comp_buf.buffer)
07824 {
07825 re_comp_buf.buffer = (unsigned char *) malloc (200);
07826 if (re_comp_buf.buffer == NULL)
07827 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
07828 re_comp_buf.allocated = 200;
07829
07830 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
07831 if (re_comp_buf.fastmap == NULL)
07832 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
07833 }
07834
07835
07836
07837
07838
07839 re_comp_buf.newline_anchor = 1;
07840
07841 # ifdef MBS_SUPPORT
07842 if (MB_CUR_MAX != 1)
07843 ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
07844 else
07845 # endif
07846 ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
07847
07848 if (!ret)
07849 return NULL;
07850
07851
07852 return (char *) gettext (re_error_msgid[(int) ret]);
07853 }
07854
07855
07856 int
07857 #ifdef _LIBC
07858 weak_function
07859 #endif
07860 re_exec (const char *s)
07861 {
07862 const int len = strlen (s);
07863 return
07864 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
07865 }
07866
07867 #endif
07868
07869
07870
07871 #ifndef emacs
07872
07873
07874
07875
07876
07877
07878
07879
07880
07881
07882
07883
07884
07885
07886
07887
07888
07889
07890
07891
07892
07893
07894
07895
07896
07897
07898
07899
07900
07901
07902
07903
07904
07905
07906
07907
07908 int
07909 regcomp (regex_t *preg, const char *pattern, int cflags)
07910 {
07911 reg_errcode_t ret;
07912 reg_syntax_t syntax
07913 = (cflags & REG_EXTENDED) ?
07914 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
07915
07916
07917 preg->buffer = 0;
07918 preg->allocated = 0;
07919 preg->used = 0;
07920
07921
07922 preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
07923
07924 if (cflags & REG_ICASE)
07925 {
07926 int i;
07927
07928 preg->translate
07929 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
07930 * sizeof (*(RE_TRANSLATE_TYPE)0));
07931 if (preg->translate == NULL)
07932 return (int) REG_ESPACE;
07933
07934
07935 for (i = 0; i < CHAR_SET_SIZE; i++)
07936 preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : i;
07937 }
07938 else
07939 preg->translate = NULL;
07940
07941
07942 if (cflags & REG_NEWLINE)
07943 {
07944 syntax &= ~RE_DOT_NEWLINE;
07945 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
07946
07947 preg->newline_anchor = 1;
07948 }
07949 else
07950 preg->newline_anchor = 0;
07951
07952 preg->no_sub = !!(cflags & REG_NOSUB);
07953
07954
07955
07956 # ifdef MBS_SUPPORT
07957 if (MB_CUR_MAX != 1)
07958 ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
07959 else
07960 # endif
07961 ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
07962
07963
07964
07965 if (ret == REG_ERPAREN) ret = REG_EPAREN;
07966
07967 if (ret == REG_NOERROR && preg->fastmap)
07968 {
07969
07970
07971 if (re_compile_fastmap (preg) == -2)
07972 {
07973
07974
07975 free (preg->fastmap);
07976 preg->fastmap = NULL;
07977 }
07978 }
07979
07980 return (int) ret;
07981 }
07982 #ifdef _LIBC
07983 weak_alias (__regcomp, regcomp)
07984 #endif
07985
07986
07987
07988
07989
07990
07991
07992
07993
07994
07995
07996
07997
07998
07999
08000
08001 int
08002 regexec (const regex_t *preg, const char *string, size_t nmatch,
08003 regmatch_t pmatch[], int eflags)
08004 {
08005 int ret;
08006 struct re_registers regs;
08007 regex_t private_preg;
08008 int len = strlen (string);
08009 boolean want_reg_info = !preg->no_sub && nmatch > 0;
08010
08011 private_preg = *preg;
08012
08013 private_preg.not_bol = !!(eflags & REG_NOTBOL);
08014 private_preg.not_eol = !!(eflags & REG_NOTEOL);
08015
08016
08017
08018
08019 private_preg.regs_allocated = REGS_FIXED;
08020
08021 if (want_reg_info)
08022 {
08023 regs.num_regs = nmatch;
08024 regs.start = TALLOC (nmatch * 2, regoff_t);
08025 if (regs.start == NULL)
08026 return (int) REG_NOMATCH;
08027 regs.end = regs.start + nmatch;
08028 }
08029
08030
08031 ret = re_search (&private_preg, string, len,
08032 0, len,
08033 want_reg_info ? ®s : (struct re_registers *) 0);
08034
08035
08036 if (want_reg_info)
08037 {
08038 if (ret >= 0)
08039 {
08040 unsigned r;
08041
08042 for (r = 0; r < nmatch; r++)
08043 {
08044 pmatch[r].rm_so = regs.start[r];
08045 pmatch[r].rm_eo = regs.end[r];
08046 }
08047 }
08048
08049
08050 free (regs.start);
08051 }
08052
08053
08054 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
08055 }
08056 #ifdef _LIBC
08057 weak_alias (__regexec, regexec)
08058 #endif
08059
08060
08061
08062
08063
08064 size_t
08065 regerror (int errcode, const regex_t *preg ATTRIBUTE_UNUSED,
08066 char *errbuf, size_t errbuf_size)
08067 {
08068 const char *msg;
08069 size_t msg_size;
08070
08071 if (errcode < 0
08072 || errcode >= (int) (sizeof (re_error_msgid)
08073 / sizeof (re_error_msgid[0])))
08074
08075
08076
08077
08078 abort ();
08079
08080 msg = gettext (re_error_msgid[errcode]);
08081
08082 msg_size = strlen (msg) + 1;
08083
08084 if (errbuf_size != 0)
08085 {
08086 if (msg_size > errbuf_size)
08087 {
08088 #if defined HAVE_MEMPCPY || defined _LIBC
08089 *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
08090 #else
08091 memcpy (errbuf, msg, errbuf_size - 1);
08092 errbuf[errbuf_size - 1] = 0;
08093 #endif
08094 }
08095 else
08096 memcpy (errbuf, msg, msg_size);
08097 }
08098
08099 return msg_size;
08100 }
08101 #ifdef _LIBC
08102 weak_alias (__regerror, regerror)
08103 #endif
08104
08105
08106
08107
08108 void
08109 regfree (regex_t *preg)
08110 {
08111 if (preg->buffer != NULL)
08112 free (preg->buffer);
08113 preg->buffer = NULL;
08114
08115 preg->allocated = 0;
08116 preg->used = 0;
08117
08118 if (preg->fastmap != NULL)
08119 free (preg->fastmap);
08120 preg->fastmap = NULL;
08121 preg->fastmap_accurate = 0;
08122
08123 if (preg->translate != NULL)
08124 free (preg->translate);
08125 preg->translate = NULL;
08126 }
08127 #ifdef _LIBC
08128 weak_alias (__regfree, regfree)
08129 #endif
08130
08131 #endif
08132
08133 #endif
08134
08135
08136 #undef STORE_NUMBER
08137 #undef STORE_NUMBER_AND_INCR
08138 #undef EXTRACT_NUMBER
08139 #undef EXTRACT_NUMBER_AND_INCR
08140
08141 #undef DEBUG_PRINT_COMPILED_PATTERN
08142 #undef DEBUG_PRINT_DOUBLE_STRING
08143
08144 #undef INIT_FAIL_STACK
08145 #undef RESET_FAIL_STACK
08146 #undef DOUBLE_FAIL_STACK
08147 #undef PUSH_PATTERN_OP
08148 #undef PUSH_FAILURE_POINTER
08149 #undef PUSH_FAILURE_INT
08150 #undef PUSH_FAILURE_ELT
08151 #undef POP_FAILURE_POINTER
08152 #undef POP_FAILURE_INT
08153 #undef POP_FAILURE_ELT
08154 #undef DEBUG_PUSH
08155 #undef DEBUG_POP
08156 #undef PUSH_FAILURE_POINT
08157 #undef POP_FAILURE_POINT
08158
08159 #undef REG_UNSET_VALUE
08160 #undef REG_UNSET
08161
08162 #undef PATFETCH
08163 #undef PATFETCH_RAW
08164 #undef PATUNFETCH
08165 #undef TRANSLATE
08166
08167 #undef INIT_BUF_SIZE
08168 #undef GET_BUFFER_SPACE
08169 #undef BUF_PUSH
08170 #undef BUF_PUSH_2
08171 #undef BUF_PUSH_3
08172 #undef STORE_JUMP
08173 #undef STORE_JUMP2
08174 #undef INSERT_JUMP
08175 #undef INSERT_JUMP2
08176 #undef EXTEND_BUFFER
08177 #undef GET_UNSIGNED_NUMBER
08178 #undef FREE_STACK_RETURN
08179
08180 # undef POINTER_TO_OFFSET
08181 # undef MATCHING_IN_FRST_STRING
08182 # undef PREFETCH
08183 # undef AT_STRINGS_BEG
08184 # undef AT_STRINGS_END
08185 # undef WORDCHAR_P
08186 # undef FREE_VAR
08187 # undef FREE_VARIABLES
08188 # undef NO_HIGHEST_ACTIVE_REG
08189 # undef NO_LOWEST_ACTIVE_REG
08190
08191 # undef CHAR_T
08192 # undef UCHAR_T
08193 # undef COMPILED_BUFFER_VAR
08194 # undef OFFSET_ADDRESS_SIZE
08195 # undef CHAR_CLASS_SIZE
08196 # undef PREFIX
08197 # undef ARG_PREFIX
08198 # undef PUT_CHAR
08199 # undef BYTE
08200 # undef WCHAR
08201
08202 # define DEFINED_ONCE