00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024
00025
00026
00027
00028
00029
00030
00031
00032 #if defined _AIX && !defined __GNUC__ && !defined REGEX_MALLOC
00033 #pragma alloca
00034 #endif
00035
00036 #undef _GNU_SOURCE
00037 #define _GNU_SOURCE
00038
00039 #ifndef INSIDE_RECURSION
00040 # ifdef HAVE_CONFIG_H
00041 # include <config.h>
00042 # endif
00043 #endif
00044
00045 #include <ansidecl.h>
00046
00047 #ifndef PARAMS
00048 # if defined __GNUC__ || (defined __STDC__ && __STDC__)
00049 # define PARAMS(args) args
00050 # else
00051 # define PARAMS(args) ()
00052 # endif
00053 #endif
00054
00055 #ifndef INSIDE_RECURSION
00056
00057 # if defined STDC_HEADERS && !defined emacs
00058 # include <stddef.h>
00059 # else
00060
00061 # include <sys/types.h>
00062 # endif
00063
00064 # define WIDE_CHAR_SUPPORT (HAVE_WCTYPE_H && HAVE_WCHAR_H && HAVE_BTOWC)
00065
00066
00067
00068 # if defined _LIBC || WIDE_CHAR_SUPPORT
00069
00070 # include <wchar.h>
00071 # include <wctype.h>
00072 # endif
00073
00074 # ifdef _LIBC
00075
00076 # define regfree(preg) __regfree (preg)
00077 # define regexec(pr, st, nm, pm, ef) __regexec (pr, st, nm, pm, ef)
00078 # define regcomp(preg, pattern, cflags) __regcomp (preg, pattern, cflags)
00079 # define regerror(errcode, preg, errbuf, errbuf_size) \
00080 __regerror(errcode, preg, errbuf, errbuf_size)
00081 # define re_set_registers(bu, re, nu, st, en) \
00082 __re_set_registers (bu, re, nu, st, en)
00083 # define re_match_2(bufp, string1, size1, string2, size2, pos, regs, stop) \
00084 __re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
00085 # define re_match(bufp, string, size, pos, regs) \
00086 __re_match (bufp, string, size, pos, regs)
00087 # define re_search(bufp, string, size, startpos, range, regs) \
00088 __re_search (bufp, string, size, startpos, range, regs)
00089 # define re_compile_pattern(pattern, length, bufp) \
00090 __re_compile_pattern (pattern, length, bufp)
00091 # define re_set_syntax(syntax) __re_set_syntax (syntax)
00092 # define re_search_2(bufp, st1, s1, st2, s2, startpos, range, regs, stop) \
00093 __re_search_2 (bufp, st1, s1, st2, s2, startpos, range, regs, stop)
00094 # define re_compile_fastmap(bufp) __re_compile_fastmap (bufp)
00095
00096 # define btowc __btowc
00097
00098
00099 # include <locale/localeinfo.h>
00100 # include <locale/elem-hash.h>
00101 # include <langinfo.h>
00102 # include <locale/coll-lookup.h>
00103 # endif
00104
00105
00106 # if (HAVE_LIBINTL_H && ENABLE_NLS) || defined _LIBC
00107 # include <libintl.h>
00108 # ifdef _LIBC
00109 # undef gettext
00110 # define gettext(msgid) __dcgettext ("libc", msgid, LC_MESSAGES)
00111 # endif
00112 # else
00113 # define gettext(msgid) (msgid)
00114 # endif
00115
00116 # ifndef gettext_noop
00117
00118
00119 # define gettext_noop(String) String
00120 # endif
00121
00122
00123
00124 # ifdef emacs
00125
00126 # include "lisp.h"
00127 # include "buffer.h"
00128 # include "syntax.h"
00129
00130 # else
00131
00132
00133
00134
00135 # undef REL_ALLOC
00136
00137 # if defined STDC_HEADERS || defined _LIBC
00138 # include <stdlib.h>
00139 # else
00140 char *malloc ();
00141 char *realloc ();
00142 # endif
00143
00144
00145
00146 # ifdef INHIBIT_STRING_HEADER
00147 # if !(defined HAVE_BZERO && defined HAVE_BCOPY)
00148 # if !defined bzero && !defined bcopy
00149 # undef INHIBIT_STRING_HEADER
00150 # endif
00151 # endif
00152 # endif
00153
00154
00155
00156
00157 # ifndef INHIBIT_STRING_HEADER
00158 # if defined HAVE_STRING_H || defined STDC_HEADERS || defined _LIBC
00159 # include <string.h>
00160 # ifndef bzero
00161 # ifndef _LIBC
00162 # define bzero(s, n) (memset (s, '\0', n), (s))
00163 # else
00164 # define bzero(s, n) __bzero (s, n)
00165 # endif
00166 # endif
00167 # else
00168 # include <strings.h>
00169 # ifndef memcmp
00170 # define memcmp(s1, s2, n) bcmp (s1, s2, n)
00171 # endif
00172 # ifndef memcpy
00173 # define memcpy(d, s, n) (bcopy (s, d, n), (d))
00174 # endif
00175 # endif
00176 # endif
00177
00178
00179
00180
00181
00182 # ifndef Sword
00183 # define Sword 1
00184 # endif
00185
00186 # ifdef SWITCH_ENUM_BUG
00187 # define SWITCH_ENUM_CAST(x) ((int)(x))
00188 # else
00189 # define SWITCH_ENUM_CAST(x) (x)
00190 # endif
00191
00192 # endif
00193
00194 # if defined _LIBC || HAVE_LIMITS_H
00195 # include <limits.h>
00196 # endif
00197
00198 # ifndef MB_LEN_MAX
00199 # define MB_LEN_MAX 1
00200 # endif
00201
00202
00203 # include "xregex.h"
00204
00205
00206 # include <ctype.h>
00207
00208
00209
00210
00211
00212
00213
00214
00215
00216
00217
00218
00219
00220 # undef ISASCII
00221 # if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
00222 # define ISASCII(c) 1
00223 # else
00224 # define ISASCII(c) isascii(c)
00225 # endif
00226
00227 # ifdef isblank
00228 # define ISBLANK(c) (ISASCII (c) && isblank (c))
00229 # else
00230 # define ISBLANK(c) ((c) == ' ' || (c) == '\t')
00231 # endif
00232 # ifdef isgraph
00233 # define ISGRAPH(c) (ISASCII (c) && isgraph (c))
00234 # else
00235 # define ISGRAPH(c) (ISASCII (c) && isprint (c) && !isspace (c))
00236 # endif
00237
00238 # undef ISPRINT
00239 # define ISPRINT(c) (ISASCII (c) && isprint (c))
00240 # define ISDIGIT(c) (ISASCII (c) && isdigit (c))
00241 # define ISALNUM(c) (ISASCII (c) && isalnum (c))
00242 # define ISALPHA(c) (ISASCII (c) && isalpha (c))
00243 # define ISCNTRL(c) (ISASCII (c) && iscntrl (c))
00244 # define ISLOWER(c) (ISASCII (c) && islower (c))
00245 # define ISPUNCT(c) (ISASCII (c) && ispunct (c))
00246 # define ISSPACE(c) (ISASCII (c) && isspace (c))
00247 # define ISUPPER(c) (ISASCII (c) && isupper (c))
00248 # define ISXDIGIT(c) (ISASCII (c) && isxdigit (c))
00249
00250 # ifdef _tolower
00251 # define TOLOWER(c) _tolower(c)
00252 # else
00253 # define TOLOWER(c) tolower(c)
00254 # endif
00255
00256 # ifndef NULL
00257 # define NULL (void *)0
00258 # endif
00259
00260
00261
00262
00263
00264 # undef SIGN_EXTEND_CHAR
00265 # if __STDC__
00266 # define SIGN_EXTEND_CHAR(c) ((signed char) (c))
00267 # else
00268
00269 # define SIGN_EXTEND_CHAR(c) ((((unsigned char) (c)) ^ 128) - 128)
00270 # endif
00271
00272 # ifndef emacs
00273
00274 # define CHAR_SET_SIZE 256
00275
00276 # ifdef SYNTAX_TABLE
00277
00278 extern char *re_syntax_table;
00279
00280 # else
00281
00282 static char re_syntax_table[CHAR_SET_SIZE];
00283
00284 static void init_syntax_once PARAMS ((void));
00285
00286 static void
00287 init_syntax_once ()
00288 {
00289 register int c;
00290 static int done = 0;
00291
00292 if (done)
00293 return;
00294 bzero (re_syntax_table, sizeof re_syntax_table);
00295
00296 for (c = 0; c < CHAR_SET_SIZE; ++c)
00297 if (ISALNUM (c))
00298 re_syntax_table[c] = Sword;
00299
00300 re_syntax_table['_'] = Sword;
00301
00302 done = 1;
00303 }
00304
00305 # endif
00306
00307 # define SYNTAX(c) re_syntax_table[(unsigned char) (c)]
00308
00309 # endif
00310
00311
00312 # if !defined _LIBC && !defined HAVE_UINTPTR_T
00313 typedef unsigned long int uintptr_t;
00314 # endif
00315
00316
00317
00318
00319
00320
00321
00322
00323
00324
00325
00326 # ifdef REGEX_MALLOC
00327
00328 # define REGEX_ALLOCATE malloc
00329 # define REGEX_REALLOCATE(source, osize, nsize) realloc (source, nsize)
00330 # define REGEX_FREE free
00331
00332 # else
00333
00334
00335 # ifndef alloca
00336
00337
00338 # ifdef __GNUC__
00339 # define alloca __builtin_alloca
00340 # else
00341 # if HAVE_ALLOCA_H
00342 # include <alloca.h>
00343 # endif
00344 # endif
00345
00346 # endif
00347
00348 # define REGEX_ALLOCATE alloca
00349
00350
00351 # define REGEX_REALLOCATE(source, osize, nsize) \
00352 (destination = (char *) alloca (nsize), \
00353 memcpy (destination, source, osize))
00354
00355
00356 # define REGEX_FREE(arg) ((void)0)
00357
00358 # endif
00359
00360
00361
00362 # if defined REL_ALLOC && defined REGEX_MALLOC
00363
00364 # define REGEX_ALLOCATE_STACK(size) \
00365 r_alloc (&failure_stack_ptr, (size))
00366 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
00367 r_re_alloc (&failure_stack_ptr, (nsize))
00368 # define REGEX_FREE_STACK(ptr) \
00369 r_alloc_free (&failure_stack_ptr)
00370
00371 # else
00372
00373 # ifdef REGEX_MALLOC
00374
00375 # define REGEX_ALLOCATE_STACK malloc
00376 # define REGEX_REALLOCATE_STACK(source, osize, nsize) realloc (source, nsize)
00377 # define REGEX_FREE_STACK free
00378
00379 # else
00380
00381 # define REGEX_ALLOCATE_STACK alloca
00382
00383 # define REGEX_REALLOCATE_STACK(source, osize, nsize) \
00384 REGEX_REALLOCATE (source, osize, nsize)
00385
00386 # define REGEX_FREE_STACK(arg)
00387
00388 # endif
00389 # endif
00390
00391
00392
00393
00394
00395 # define FIRST_STRING_P(ptr) \
00396 (size1 && string1 <= (ptr) && (ptr) <= string1 + size1)
00397
00398
00399 # define TALLOC(n, t) ((t *) malloc ((n) * sizeof (t)))
00400 # define RETALLOC(addr, n, t) ((addr) = (t *) realloc (addr, (n) * sizeof (t)))
00401 # define RETALLOC_IF(addr, n, t) \
00402 if (addr) RETALLOC((addr), (n), t); else (addr) = TALLOC ((n), t)
00403 # define REGEX_TALLOC(n, t) ((t *) REGEX_ALLOCATE ((n) * sizeof (t)))
00404
00405 # define BYTEWIDTH 8
00406
00407 # define STREQ(s1, s2) ((strcmp (s1, s2) == 0))
00408
00409 # undef MAX
00410 # undef MIN
00411 # define MAX(a, b) ((a) > (b) ? (a) : (b))
00412 # define MIN(a, b) ((a) < (b) ? (a) : (b))
00413
00414 typedef char boolean;
00415 # define false 0
00416 # define true 1
00417
00418 static reg_errcode_t byte_regex_compile _RE_ARGS ((const char *pattern, size_t size,
00419 reg_syntax_t syntax,
00420 struct re_pattern_buffer *bufp));
00421
00422 static int byte_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
00423 const char *string1, int size1,
00424 const char *string2, int size2,
00425 int pos,
00426 struct re_registers *regs,
00427 int stop));
00428 static int byte_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
00429 const char *string1, int size1,
00430 const char *string2, int size2,
00431 int startpos, int range,
00432 struct re_registers *regs, int stop));
00433 static int byte_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
00434
00435 #ifdef MBS_SUPPORT
00436 static reg_errcode_t wcs_regex_compile _RE_ARGS ((const char *pattern, size_t size,
00437 reg_syntax_t syntax,
00438 struct re_pattern_buffer *bufp));
00439
00440
00441 static int wcs_re_match_2_internal PARAMS ((struct re_pattern_buffer *bufp,
00442 const char *cstring1, int csize1,
00443 const char *cstring2, int csize2,
00444 int pos,
00445 struct re_registers *regs,
00446 int stop,
00447 wchar_t *string1, int size1,
00448 wchar_t *string2, int size2,
00449 int *mbs_offset1, int *mbs_offset2));
00450 static int wcs_re_search_2 PARAMS ((struct re_pattern_buffer *bufp,
00451 const char *string1, int size1,
00452 const char *string2, int size2,
00453 int startpos, int range,
00454 struct re_registers *regs, int stop));
00455 static int wcs_re_compile_fastmap PARAMS ((struct re_pattern_buffer *bufp));
00456 #endif
00457
00458
00459
00460
00461
00462
00463 typedef enum
00464 {
00465 no_op = 0,
00466
00467
00468 succeed,
00469
00470
00471 exactn,
00472
00473 # ifdef MBS_SUPPORT
00474
00475 exactn_bin,
00476 # endif
00477
00478
00479 anychar,
00480
00481
00482
00483
00484
00485
00486
00487
00488
00489
00490
00491
00492
00493
00494 charset,
00495
00496
00497
00498 charset_not,
00499
00500
00501
00502
00503
00504
00505
00506
00507 start_memory,
00508
00509
00510
00511
00512
00513
00514
00515
00516 stop_memory,
00517
00518
00519
00520 duplicate,
00521
00522
00523 begline,
00524
00525
00526 endline,
00527
00528
00529
00530 begbuf,
00531
00532
00533 endbuf,
00534
00535
00536 jump,
00537
00538
00539 jump_past_alt,
00540
00541
00542
00543
00544 on_failure_jump,
00545
00546
00547
00548 on_failure_keep_string_jump,
00549
00550
00551
00552
00553 pop_failure_jump,
00554
00555
00556
00557
00558
00559
00560
00561
00562
00563 maybe_pop_jump,
00564
00565
00566
00567
00568
00569
00570
00571 dummy_failure_jump,
00572
00573
00574
00575 push_dummy_failure,
00576
00577
00578
00579
00580 succeed_n,
00581
00582
00583
00584
00585 jump_n,
00586
00587
00588
00589
00590
00591 set_number_at,
00592
00593 wordchar,
00594 notwordchar,
00595
00596 wordbeg,
00597 wordend,
00598
00599 wordbound,
00600 notwordbound
00601
00602 # ifdef emacs
00603 ,before_dot,
00604 at_dot,
00605 after_dot,
00606
00607
00608
00609 syntaxspec,
00610
00611
00612 notsyntaxspec
00613 # endif
00614 } re_opcode_t;
00615 #endif
00616
00617
00618 #ifdef BYTE
00619 # define CHAR_T char
00620 # define UCHAR_T unsigned char
00621 # define COMPILED_BUFFER_VAR bufp->buffer
00622 # define OFFSET_ADDRESS_SIZE 2
00623 # if defined (__STDC__) || defined (ALMOST_STDC) || defined (HAVE_STRINGIZE)
00624 # define PREFIX(name) byte_##name
00625 # else
00626 # define PREFIX(name) byte_name
00627 # endif
00628 # define ARG_PREFIX(name) name
00629 # define PUT_CHAR(c) putchar (c)
00630 #else
00631 # ifdef WCHAR
00632 # define CHAR_T wchar_t
00633 # define UCHAR_T wchar_t
00634 # define COMPILED_BUFFER_VAR wc_buffer
00635 # define OFFSET_ADDRESS_SIZE 1
00636 # define CHAR_CLASS_SIZE ((__alignof__(wctype_t)+sizeof(wctype_t))/sizeof(CHAR_T)+1)
00637 # if defined (__STDC__) || defined (ALMOST_STDC) || defined (HAVE_STRINGIZE)
00638 # define PREFIX(name) wcs_##name
00639 # define ARG_PREFIX(name) c##name
00640 # else
00641 # define PREFIX(name) wcs_name
00642 # define ARG_PREFIX(name) cname
00643 # endif
00644
00645 # define PUT_CHAR(c) printf ("%C", c);
00646 # define TRUE 1
00647 # define FALSE 0
00648 # else
00649 # ifdef MBS_SUPPORT
00650 # define WCHAR
00651 # define INSIDE_RECURSION
00652 # include "regex.c"
00653 # undef INSIDE_RECURSION
00654 # endif
00655 # define BYTE
00656 # define INSIDE_RECURSION
00657 # include "regex.c"
00658 # undef INSIDE_RECURSION
00659 # endif
00660 #endif
00661
00662 #ifdef INSIDE_RECURSION
00663
00664
00665
00666
00667
00668 # ifdef WCHAR
00669 # define STORE_NUMBER(destination, number) \
00670 do { \
00671 *(destination) = (UCHAR_T)(number); \
00672 } while (0)
00673 # else
00674 # define STORE_NUMBER(destination, number) \
00675 do { \
00676 (destination)[0] = (number) & 0377; \
00677 (destination)[1] = (number) >> 8; \
00678 } while (0)
00679 # endif
00680
00681
00682
00683
00684
00685
00686 # define STORE_NUMBER_AND_INCR(destination, number) \
00687 do { \
00688 STORE_NUMBER (destination, number); \
00689 (destination) += OFFSET_ADDRESS_SIZE; \
00690 } while (0)
00691
00692
00693
00694
00695
00696 # ifdef WCHAR
00697 # define EXTRACT_NUMBER(destination, source) \
00698 do { \
00699 (destination) = *(source); \
00700 } while (0)
00701 # else
00702 # define EXTRACT_NUMBER(destination, source) \
00703 do { \
00704 (destination) = *(source) & 0377; \
00705 (destination) += SIGN_EXTEND_CHAR (*((source) + 1)) << 8; \
00706 } while (0)
00707 # endif
00708
00709 # ifdef DEBUG
00710 static void PREFIX(extract_number) _RE_ARGS ((int *dest, UCHAR_T *source));
00711 static void
00712 PREFIX(extract_number) (dest, source)
00713 int *dest;
00714 UCHAR_T *source;
00715 {
00716 # ifdef WCHAR
00717 *dest = *source;
00718 # else
00719 int temp = SIGN_EXTEND_CHAR (*(source + 1));
00720 *dest = *source & 0377;
00721 *dest += temp << 8;
00722 # endif
00723 }
00724
00725 # ifndef EXTRACT_MACROS
00726 # undef EXTRACT_NUMBER
00727 # define EXTRACT_NUMBER(dest, src) PREFIX(extract_number) (&dest, src)
00728 # endif
00729
00730 # endif
00731
00732
00733
00734
00735 # define EXTRACT_NUMBER_AND_INCR(destination, source) \
00736 do { \
00737 EXTRACT_NUMBER (destination, source); \
00738 (source) += OFFSET_ADDRESS_SIZE; \
00739 } while (0)
00740
00741 # ifdef DEBUG
00742 static void PREFIX(extract_number_and_incr) _RE_ARGS ((int *destination,
00743 UCHAR_T **source));
00744 static void
00745 PREFIX(extract_number_and_incr) (destination, source)
00746 int *destination;
00747 UCHAR_T **source;
00748 {
00749 PREFIX(extract_number) (destination, *source);
00750 *source += OFFSET_ADDRESS_SIZE;
00751 }
00752
00753 # ifndef EXTRACT_MACROS
00754 # undef EXTRACT_NUMBER_AND_INCR
00755 # define EXTRACT_NUMBER_AND_INCR(dest, src) \
00756 PREFIX(extract_number_and_incr) (&dest, &src)
00757 # endif
00758
00759 # endif
00760
00761
00762
00763
00764
00765
00766
00767
00768
00769 # ifdef DEBUG
00770
00771 # ifndef DEFINED_ONCE
00772
00773
00774 # include <stdio.h>
00775
00776
00777 # include <assert.h>
00778
00779 static int debug;
00780
00781 # define DEBUG_STATEMENT(e) e
00782 # define DEBUG_PRINT1(x) if (debug) printf (x)
00783 # define DEBUG_PRINT2(x1, x2) if (debug) printf (x1, x2)
00784 # define DEBUG_PRINT3(x1, x2, x3) if (debug) printf (x1, x2, x3)
00785 # define DEBUG_PRINT4(x1, x2, x3, x4) if (debug) printf (x1, x2, x3, x4)
00786 # endif
00787
00788 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e) \
00789 if (debug) PREFIX(print_partial_compiled_pattern) (s, e)
00790 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2) \
00791 if (debug) PREFIX(print_double_string) (w, s1, sz1, s2, sz2)
00792
00793
00794
00795
00796 # ifndef DEFINED_ONCE
00797 void
00798 print_fastmap (fastmap)
00799 char *fastmap;
00800 {
00801 unsigned was_a_range = 0;
00802 unsigned i = 0;
00803
00804 while (i < (1 << BYTEWIDTH))
00805 {
00806 if (fastmap[i++])
00807 {
00808 was_a_range = 0;
00809 putchar (i - 1);
00810 while (i < (1 << BYTEWIDTH) && fastmap[i])
00811 {
00812 was_a_range = 1;
00813 i++;
00814 }
00815 if (was_a_range)
00816 {
00817 printf ("-");
00818 putchar (i - 1);
00819 }
00820 }
00821 }
00822 putchar ('\n');
00823 }
00824 # endif
00825
00826
00827
00828
00829
00830 void
00831 PREFIX(print_partial_compiled_pattern) (start, end)
00832 UCHAR_T *start;
00833 UCHAR_T *end;
00834 {
00835 int mcnt, mcnt2;
00836 UCHAR_T *p1;
00837 UCHAR_T *p = start;
00838 UCHAR_T *pend = end;
00839
00840 if (start == NULL)
00841 {
00842 printf ("(null)\n");
00843 return;
00844 }
00845
00846
00847 while (p < pend)
00848 {
00849 # ifdef _LIBC
00850 printf ("%td:\t", p - start);
00851 # else
00852 printf ("%ld:\t", (long int) (p - start));
00853 # endif
00854
00855 switch ((re_opcode_t) *p++)
00856 {
00857 case no_op:
00858 printf ("/no_op");
00859 break;
00860
00861 case exactn:
00862 mcnt = *p++;
00863 printf ("/exactn/%d", mcnt);
00864 do
00865 {
00866 putchar ('/');
00867 PUT_CHAR (*p++);
00868 }
00869 while (--mcnt);
00870 break;
00871
00872 # ifdef MBS_SUPPORT
00873 case exactn_bin:
00874 mcnt = *p++;
00875 printf ("/exactn_bin/%d", mcnt);
00876 do
00877 {
00878 printf("/%lx", (long int) *p++);
00879 }
00880 while (--mcnt);
00881 break;
00882 # endif
00883
00884 case start_memory:
00885 mcnt = *p++;
00886 printf ("/start_memory/%d/%ld", mcnt, (long int) *p++);
00887 break;
00888
00889 case stop_memory:
00890 mcnt = *p++;
00891 printf ("/stop_memory/%d/%ld", mcnt, (long int) *p++);
00892 break;
00893
00894 case duplicate:
00895 printf ("/duplicate/%ld", (long int) *p++);
00896 break;
00897
00898 case anychar:
00899 printf ("/anychar");
00900 break;
00901
00902 case charset:
00903 case charset_not:
00904 {
00905 # ifdef WCHAR
00906 int i, length;
00907 wchar_t *workp = p;
00908 printf ("/charset [%s",
00909 (re_opcode_t) *(workp - 1) == charset_not ? "^" : "");
00910 p += 5;
00911 length = *workp++;
00912 for (i=0 ; i<length ; i++)
00913 printf("[:%lx:]", (long int) *p++);
00914 length = *workp++;
00915 for (i=0 ; i<length ;)
00916 {
00917 printf("[.");
00918 while(*p != 0)
00919 PUT_CHAR((i++,*p++));
00920 i++,p++;
00921 printf(".]");
00922 }
00923 length = *workp++;
00924 for (i=0 ; i<length ;)
00925 {
00926 printf("[=");
00927 while(*p != 0)
00928 PUT_CHAR((i++,*p++));
00929 i++,p++;
00930 printf("=]");
00931 }
00932 length = *workp++;
00933 for (i=0 ; i<length ; i++)
00934 {
00935 wchar_t range_start = *p++;
00936 wchar_t range_end = *p++;
00937 printf("%C-%C", range_start, range_end);
00938 }
00939 length = *workp++;
00940 for (i=0 ; i<length ; i++)
00941 printf("%C", *p++);
00942 putchar (']');
00943 # else
00944 register int c, last = -100;
00945 register int in_range = 0;
00946
00947 printf ("/charset [%s",
00948 (re_opcode_t) *(p - 1) == charset_not ? "^" : "");
00949
00950 assert (p + *p < pend);
00951
00952 for (c = 0; c < 256; c++)
00953 if (c / 8 < *p
00954 && (p[1 + (c/8)] & (1 << (c % 8))))
00955 {
00956
00957 if (last + 1 == c && ! in_range)
00958 {
00959 putchar ('-');
00960 in_range = 1;
00961 }
00962
00963 else if (last + 1 != c && in_range)
00964 {
00965 putchar (last);
00966 in_range = 0;
00967 }
00968
00969 if (! in_range)
00970 putchar (c);
00971
00972 last = c;
00973 }
00974
00975 if (in_range)
00976 putchar (last);
00977
00978 putchar (']');
00979
00980 p += 1 + *p;
00981 # endif
00982 }
00983 break;
00984
00985 case begline:
00986 printf ("/begline");
00987 break;
00988
00989 case endline:
00990 printf ("/endline");
00991 break;
00992
00993 case on_failure_jump:
00994 PREFIX(extract_number_and_incr) (&mcnt, &p);
00995 # ifdef _LIBC
00996 printf ("/on_failure_jump to %td", p + mcnt - start);
00997 # else
00998 printf ("/on_failure_jump to %ld", (long int) (p + mcnt - start));
00999 # endif
01000 break;
01001
01002 case on_failure_keep_string_jump:
01003 PREFIX(extract_number_and_incr) (&mcnt, &p);
01004 # ifdef _LIBC
01005 printf ("/on_failure_keep_string_jump to %td", p + mcnt - start);
01006 # else
01007 printf ("/on_failure_keep_string_jump to %ld",
01008 (long int) (p + mcnt - start));
01009 # endif
01010 break;
01011
01012 case dummy_failure_jump:
01013 PREFIX(extract_number_and_incr) (&mcnt, &p);
01014 # ifdef _LIBC
01015 printf ("/dummy_failure_jump to %td", p + mcnt - start);
01016 # else
01017 printf ("/dummy_failure_jump to %ld", (long int) (p + mcnt - start));
01018 # endif
01019 break;
01020
01021 case push_dummy_failure:
01022 printf ("/push_dummy_failure");
01023 break;
01024
01025 case maybe_pop_jump:
01026 PREFIX(extract_number_and_incr) (&mcnt, &p);
01027 # ifdef _LIBC
01028 printf ("/maybe_pop_jump to %td", p + mcnt - start);
01029 # else
01030 printf ("/maybe_pop_jump to %ld", (long int) (p + mcnt - start));
01031 # endif
01032 break;
01033
01034 case pop_failure_jump:
01035 PREFIX(extract_number_and_incr) (&mcnt, &p);
01036 # ifdef _LIBC
01037 printf ("/pop_failure_jump to %td", p + mcnt - start);
01038 # else
01039 printf ("/pop_failure_jump to %ld", (long int) (p + mcnt - start));
01040 # endif
01041 break;
01042
01043 case jump_past_alt:
01044 PREFIX(extract_number_and_incr) (&mcnt, &p);
01045 # ifdef _LIBC
01046 printf ("/jump_past_alt to %td", p + mcnt - start);
01047 # else
01048 printf ("/jump_past_alt to %ld", (long int) (p + mcnt - start));
01049 # endif
01050 break;
01051
01052 case jump:
01053 PREFIX(extract_number_and_incr) (&mcnt, &p);
01054 # ifdef _LIBC
01055 printf ("/jump to %td", p + mcnt - start);
01056 # else
01057 printf ("/jump to %ld", (long int) (p + mcnt - start));
01058 # endif
01059 break;
01060
01061 case succeed_n:
01062 PREFIX(extract_number_and_incr) (&mcnt, &p);
01063 p1 = p + mcnt;
01064 PREFIX(extract_number_and_incr) (&mcnt2, &p);
01065 # ifdef _LIBC
01066 printf ("/succeed_n to %td, %d times", p1 - start, mcnt2);
01067 # else
01068 printf ("/succeed_n to %ld, %d times",
01069 (long int) (p1 - start), mcnt2);
01070 # endif
01071 break;
01072
01073 case jump_n:
01074 PREFIX(extract_number_and_incr) (&mcnt, &p);
01075 p1 = p + mcnt;
01076 PREFIX(extract_number_and_incr) (&mcnt2, &p);
01077 printf ("/jump_n to %d, %d times", p1 - start, mcnt2);
01078 break;
01079
01080 case set_number_at:
01081 PREFIX(extract_number_and_incr) (&mcnt, &p);
01082 p1 = p + mcnt;
01083 PREFIX(extract_number_and_incr) (&mcnt2, &p);
01084 # ifdef _LIBC
01085 printf ("/set_number_at location %td to %d", p1 - start, mcnt2);
01086 # else
01087 printf ("/set_number_at location %ld to %d",
01088 (long int) (p1 - start), mcnt2);
01089 # endif
01090 break;
01091
01092 case wordbound:
01093 printf ("/wordbound");
01094 break;
01095
01096 case notwordbound:
01097 printf ("/notwordbound");
01098 break;
01099
01100 case wordbeg:
01101 printf ("/wordbeg");
01102 break;
01103
01104 case wordend:
01105 printf ("/wordend");
01106 break;
01107
01108 # ifdef emacs
01109 case before_dot:
01110 printf ("/before_dot");
01111 break;
01112
01113 case at_dot:
01114 printf ("/at_dot");
01115 break;
01116
01117 case after_dot:
01118 printf ("/after_dot");
01119 break;
01120
01121 case syntaxspec:
01122 printf ("/syntaxspec");
01123 mcnt = *p++;
01124 printf ("/%d", mcnt);
01125 break;
01126
01127 case notsyntaxspec:
01128 printf ("/notsyntaxspec");
01129 mcnt = *p++;
01130 printf ("/%d", mcnt);
01131 break;
01132 # endif
01133
01134 case wordchar:
01135 printf ("/wordchar");
01136 break;
01137
01138 case notwordchar:
01139 printf ("/notwordchar");
01140 break;
01141
01142 case begbuf:
01143 printf ("/begbuf");
01144 break;
01145
01146 case endbuf:
01147 printf ("/endbuf");
01148 break;
01149
01150 default:
01151 printf ("?%ld", (long int) *(p-1));
01152 }
01153
01154 putchar ('\n');
01155 }
01156
01157 # ifdef _LIBC
01158 printf ("%td:\tend of pattern.\n", p - start);
01159 # else
01160 printf ("%ld:\tend of pattern.\n", (long int) (p - start));
01161 # endif
01162 }
01163
01164
01165 void
01166 PREFIX(print_compiled_pattern) (bufp)
01167 struct re_pattern_buffer *bufp;
01168 {
01169 UCHAR_T *buffer = (UCHAR_T*) bufp->buffer;
01170
01171 PREFIX(print_partial_compiled_pattern) (buffer, buffer
01172 + bufp->used / sizeof(UCHAR_T));
01173 printf ("%ld bytes used/%ld bytes allocated.\n",
01174 bufp->used, bufp->allocated);
01175
01176 if (bufp->fastmap_accurate && bufp->fastmap)
01177 {
01178 printf ("fastmap: ");
01179 print_fastmap (bufp->fastmap);
01180 }
01181
01182 # ifdef _LIBC
01183 printf ("re_nsub: %Zd\t", bufp->re_nsub);
01184 # else
01185 printf ("re_nsub: %ld\t", (long int) bufp->re_nsub);
01186 # endif
01187 printf ("regs_alloc: %d\t", bufp->regs_allocated);
01188 printf ("can_be_null: %d\t", bufp->can_be_null);
01189 printf ("newline_anchor: %d\n", bufp->newline_anchor);
01190 printf ("no_sub: %d\t", bufp->no_sub);
01191 printf ("not_bol: %d\t", bufp->not_bol);
01192 printf ("not_eol: %d\t", bufp->not_eol);
01193 printf ("syntax: %lx\n", bufp->syntax);
01194
01195 }
01196
01197
01198 void
01199 PREFIX(print_double_string) (where, string1, size1, string2, size2)
01200 const CHAR_T *where;
01201 const CHAR_T *string1;
01202 const CHAR_T *string2;
01203 int size1;
01204 int size2;
01205 {
01206 int this_char;
01207
01208 if (where == NULL)
01209 printf ("(null)");
01210 else
01211 {
01212 int cnt;
01213
01214 if (FIRST_STRING_P (where))
01215 {
01216 for (this_char = where - string1; this_char < size1; this_char++)
01217 PUT_CHAR (string1[this_char]);
01218
01219 where = string2;
01220 }
01221
01222 cnt = 0;
01223 for (this_char = where - string2; this_char < size2; this_char++)
01224 {
01225 PUT_CHAR (string2[this_char]);
01226 if (++cnt > 100)
01227 {
01228 fputs ("...", stdout);
01229 break;
01230 }
01231 }
01232 }
01233 }
01234
01235 # ifndef DEFINED_ONCE
01236 void
01237 printchar (c)
01238 int c;
01239 {
01240 putc (c, stderr);
01241 }
01242 # endif
01243
01244 # else
01245
01246 # ifndef DEFINED_ONCE
01247 # undef assert
01248 # define assert(e)
01249
01250 # define DEBUG_STATEMENT(e)
01251 # define DEBUG_PRINT1(x)
01252 # define DEBUG_PRINT2(x1, x2)
01253 # define DEBUG_PRINT3(x1, x2, x3)
01254 # define DEBUG_PRINT4(x1, x2, x3, x4)
01255 # endif
01256 # define DEBUG_PRINT_COMPILED_PATTERN(p, s, e)
01257 # define DEBUG_PRINT_DOUBLE_STRING(w, s1, sz1, s2, sz2)
01258
01259 # endif
01260
01261
01262
01263 # ifdef WCHAR
01264
01265
01266
01267
01268
01269
01270
01271 static size_t convert_mbs_to_wcs (CHAR_T *dest, const unsigned char* src,
01272 size_t len, int *offset_buffer,
01273 char *is_binary);
01274 static size_t
01275 convert_mbs_to_wcs (dest, src, len, offset_buffer, is_binary)
01276 CHAR_T *dest;
01277 const unsigned char* src;
01278 size_t len;
01279
01280
01281
01282
01283
01284
01285
01286
01287
01288
01289 int *offset_buffer;
01290 char *is_binary;
01291 {
01292 wchar_t *pdest = dest;
01293 const unsigned char *psrc = src;
01294 size_t wc_count = 0;
01295
01296 mbstate_t mbs;
01297 int i, consumed;
01298 size_t mb_remain = len;
01299 size_t mb_count = 0;
01300
01301
01302 memset (&mbs, 0, sizeof (mbstate_t));
01303
01304 offset_buffer[0] = 0;
01305 for( ; mb_remain > 0 ; ++wc_count, ++pdest, mb_remain -= consumed,
01306 psrc += consumed)
01307 {
01308 #ifdef _LIBC
01309 consumed = __mbrtowc (pdest, psrc, mb_remain, &mbs);
01310 #else
01311 consumed = mbrtowc (pdest, psrc, mb_remain, &mbs);
01312 #endif
01313
01314 if (consumed <= 0)
01315
01316
01317 {
01318 *pdest = *psrc;
01319 consumed = 1;
01320 is_binary[wc_count] = TRUE;
01321 }
01322 else
01323 is_binary[wc_count] = FALSE;
01324
01325
01326
01327
01328 if (consumed == 1 && (int) *psrc == 0x5c && (int) *pdest == 0xa5)
01329 *pdest = (wchar_t) *psrc;
01330
01331 offset_buffer[wc_count + 1] = mb_count += consumed;
01332 }
01333
01334
01335 for (i = wc_count + 1 ; i <= len ; i++)
01336 offset_buffer[i] = mb_count + 1;
01337
01338 return wc_count;
01339 }
01340
01341 # endif
01342
01343 #else
01344
01345
01346
01347
01348
01349
01350 reg_syntax_t re_syntax_options;
01351
01352
01353
01354
01355
01356
01357
01358
01359
01360 reg_syntax_t
01361 re_set_syntax (syntax)
01362 reg_syntax_t syntax;
01363 {
01364 reg_syntax_t ret = re_syntax_options;
01365
01366 re_syntax_options = syntax;
01367 # ifdef DEBUG
01368 if (syntax & RE_DEBUG)
01369 debug = 1;
01370 else if (debug)
01371 debug = 0;
01372 # endif
01373 return ret;
01374 }
01375 # ifdef _LIBC
01376 weak_alias (__re_set_syntax, re_set_syntax)
01377 # endif
01378
01379
01380
01381
01382
01383
01384 static const char *re_error_msgid[] =
01385 {
01386 gettext_noop ("Success"),
01387 gettext_noop ("No match"),
01388 gettext_noop ("Invalid regular expression"),
01389 gettext_noop ("Invalid collation character"),
01390 gettext_noop ("Invalid character class name"),
01391 gettext_noop ("Trailing backslash"),
01392 gettext_noop ("Invalid back reference"),
01393 gettext_noop ("Unmatched [ or [^"),
01394 gettext_noop ("Unmatched ( or \\("),
01395 gettext_noop ("Unmatched \\{"),
01396 gettext_noop ("Invalid content of \\{\\}"),
01397 gettext_noop ("Invalid range end"),
01398 gettext_noop ("Memory exhausted"),
01399 gettext_noop ("Invalid preceding regular expression"),
01400 gettext_noop ("Premature end of regular expression"),
01401 gettext_noop ("Regular expression too big"),
01402 gettext_noop ("Unmatched ) or \\)")
01403 };
01404
01405 #endif
01406
01407 #ifndef DEFINED_ONCE
01408
01409
01410
01411
01412
01413
01414
01415
01416
01417
01418
01419
01420
01421
01422
01423
01424
01425
01426
01427
01428 # define MATCH_MAY_ALLOCATE
01429
01430
01431
01432 # ifdef __GNUC__
01433 # undef C_ALLOCA
01434 # endif
01435
01436
01437
01438
01439
01440
01441 # if (defined C_ALLOCA || defined REGEX_MALLOC) && defined emacs
01442 # undef MATCH_MAY_ALLOCATE
01443 # endif
01444 #endif
01445
01446 #ifdef INSIDE_RECURSION
01447
01448
01449
01450
01451
01452
01453
01454
01455 # ifndef INIT_FAILURE_ALLOC
01456 # define INIT_FAILURE_ALLOC 5
01457 # endif
01458
01459
01460
01461
01462
01463
01464 # ifdef INT_IS_16BIT
01465
01466 # ifndef DEFINED_ONCE
01467 # if defined MATCH_MAY_ALLOCATE
01468
01469
01470 long int re_max_failures = 4000;
01471 # else
01472 long int re_max_failures = 2000;
01473 # endif
01474 # endif
01475
01476 union PREFIX(fail_stack_elt)
01477 {
01478 UCHAR_T *pointer;
01479 long int integer;
01480 };
01481
01482 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
01483
01484 typedef struct
01485 {
01486 PREFIX(fail_stack_elt_t) *stack;
01487 unsigned long int size;
01488 unsigned long int avail;
01489 } PREFIX(fail_stack_type);
01490
01491 # else
01492
01493 # ifndef DEFINED_ONCE
01494 # if defined MATCH_MAY_ALLOCATE
01495
01496
01497 int re_max_failures = 4000;
01498 # else
01499 int re_max_failures = 2000;
01500 # endif
01501 # endif
01502
01503 union PREFIX(fail_stack_elt)
01504 {
01505 UCHAR_T *pointer;
01506 int integer;
01507 };
01508
01509 typedef union PREFIX(fail_stack_elt) PREFIX(fail_stack_elt_t);
01510
01511 typedef struct
01512 {
01513 PREFIX(fail_stack_elt_t) *stack;
01514 unsigned size;
01515 unsigned avail;
01516 } PREFIX(fail_stack_type);
01517
01518 # endif
01519
01520 # ifndef DEFINED_ONCE
01521 # define FAIL_STACK_EMPTY() (fail_stack.avail == 0)
01522 # define FAIL_STACK_PTR_EMPTY() (fail_stack_ptr->avail == 0)
01523 # define FAIL_STACK_FULL() (fail_stack.avail == fail_stack.size)
01524 # endif
01525
01526
01527
01528
01529
01530 # ifdef MATCH_MAY_ALLOCATE
01531 # define INIT_FAIL_STACK() \
01532 do { \
01533 fail_stack.stack = (PREFIX(fail_stack_elt_t) *) \
01534 REGEX_ALLOCATE_STACK (INIT_FAILURE_ALLOC * sizeof (PREFIX(fail_stack_elt_t))); \
01535 \
01536 if (fail_stack.stack == NULL) \
01537 return -2; \
01538 \
01539 fail_stack.size = INIT_FAILURE_ALLOC; \
01540 fail_stack.avail = 0; \
01541 } while (0)
01542
01543 # define RESET_FAIL_STACK() REGEX_FREE_STACK (fail_stack.stack)
01544 # else
01545 # define INIT_FAIL_STACK() \
01546 do { \
01547 fail_stack.avail = 0; \
01548 } while (0)
01549
01550 # define RESET_FAIL_STACK()
01551 # endif
01552
01553
01554
01555
01556
01557
01558
01559
01560
01561 # define DOUBLE_FAIL_STACK(fail_stack) \
01562 ((fail_stack).size > (unsigned) (re_max_failures * MAX_FAILURE_ITEMS) \
01563 ? 0 \
01564 : ((fail_stack).stack = (PREFIX(fail_stack_elt_t) *) \
01565 REGEX_REALLOCATE_STACK ((fail_stack).stack, \
01566 (fail_stack).size * sizeof (PREFIX(fail_stack_elt_t)), \
01567 ((fail_stack).size << 1) * sizeof (PREFIX(fail_stack_elt_t))),\
01568 \
01569 (fail_stack).stack == NULL \
01570 ? 0 \
01571 : ((fail_stack).size <<= 1, \
01572 1)))
01573
01574
01575
01576
01577
01578 # define PUSH_PATTERN_OP(POINTER, FAIL_STACK) \
01579 ((FAIL_STACK_FULL () \
01580 && !DOUBLE_FAIL_STACK (FAIL_STACK)) \
01581 ? 0 \
01582 : ((FAIL_STACK).stack[(FAIL_STACK).avail++].pointer = POINTER, \
01583 1))
01584
01585
01586
01587
01588 # define PUSH_FAILURE_POINTER(item) \
01589 fail_stack.stack[fail_stack.avail++].pointer = (UCHAR_T *) (item)
01590
01591
01592
01593
01594 # define PUSH_FAILURE_INT(item) \
01595 fail_stack.stack[fail_stack.avail++].integer = (item)
01596
01597
01598
01599
01600 # define PUSH_FAILURE_ELT(item) \
01601 fail_stack.stack[fail_stack.avail++] = (item)
01602
01603
01604
01605 # define POP_FAILURE_POINTER() fail_stack.stack[--fail_stack.avail].pointer
01606 # define POP_FAILURE_INT() fail_stack.stack[--fail_stack.avail].integer
01607 # define POP_FAILURE_ELT() fail_stack.stack[--fail_stack.avail]
01608
01609
01610 # ifdef DEBUG
01611 # define DEBUG_PUSH PUSH_FAILURE_INT
01612 # define DEBUG_POP(item_addr) *(item_addr) = POP_FAILURE_INT ()
01613 # else
01614 # define DEBUG_PUSH(item)
01615 # define DEBUG_POP(item_addr)
01616 # endif
01617
01618
01619
01620
01621
01622
01623
01624
01625
01626
01627
01628 # define PUSH_FAILURE_POINT(pattern_place, string_place, failure_code) \
01629 do { \
01630 char *destination; \
01631
01632 \
01633
01634
01635 \
01636 active_reg_t this_reg; \
01637 \
01638 DEBUG_STATEMENT (failure_id++); \
01639 DEBUG_STATEMENT (nfailure_points_pushed++); \
01640 DEBUG_PRINT2 ("\nPUSH_FAILURE_POINT #%u:\n", failure_id); \
01641 DEBUG_PRINT2 (" Before push, next avail: %d\n", (fail_stack).avail);\
01642 DEBUG_PRINT2 (" size: %d\n", (fail_stack).size);\
01643 \
01644 DEBUG_PRINT2 (" slots needed: %ld\n", NUM_FAILURE_ITEMS); \
01645 DEBUG_PRINT2 (" available: %d\n", REMAINING_AVAIL_SLOTS); \
01646 \
01647 \
01648 while (REMAINING_AVAIL_SLOTS < NUM_FAILURE_ITEMS) \
01649 { \
01650 if (!DOUBLE_FAIL_STACK (fail_stack)) \
01651 return failure_code; \
01652 \
01653 DEBUG_PRINT2 ("\n Doubled stack; size now: %d\n", \
01654 (fail_stack).size); \
01655 DEBUG_PRINT2 (" slots available: %d\n", REMAINING_AVAIL_SLOTS);\
01656 } \
01657 \
01658 \
01659 DEBUG_PRINT1 ("\n"); \
01660 \
01661 if (1) \
01662 for (this_reg = lowest_active_reg; this_reg <= highest_active_reg; \
01663 this_reg++) \
01664 { \
01665 DEBUG_PRINT2 (" Pushing reg: %lu\n", this_reg); \
01666 DEBUG_STATEMENT (num_regs_pushed++); \
01667 \
01668 DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
01669 PUSH_FAILURE_POINTER (regstart[this_reg]); \
01670 \
01671 DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
01672 PUSH_FAILURE_POINTER (regend[this_reg]); \
01673 \
01674 DEBUG_PRINT2 (" info: %p\n ", \
01675 reg_info[this_reg].word.pointer); \
01676 DEBUG_PRINT2 (" match_null=%d", \
01677 REG_MATCH_NULL_STRING_P (reg_info[this_reg])); \
01678 DEBUG_PRINT2 (" active=%d", IS_ACTIVE (reg_info[this_reg])); \
01679 DEBUG_PRINT2 (" matched_something=%d", \
01680 MATCHED_SOMETHING (reg_info[this_reg])); \
01681 DEBUG_PRINT2 (" ever_matched=%d", \
01682 EVER_MATCHED_SOMETHING (reg_info[this_reg])); \
01683 DEBUG_PRINT1 ("\n"); \
01684 PUSH_FAILURE_ELT (reg_info[this_reg].word); \
01685 } \
01686 \
01687 DEBUG_PRINT2 (" Pushing low active reg: %ld\n", lowest_active_reg);\
01688 PUSH_FAILURE_INT (lowest_active_reg); \
01689 \
01690 DEBUG_PRINT2 (" Pushing high active reg: %ld\n", highest_active_reg);\
01691 PUSH_FAILURE_INT (highest_active_reg); \
01692 \
01693 DEBUG_PRINT2 (" Pushing pattern %p:\n", pattern_place); \
01694 DEBUG_PRINT_COMPILED_PATTERN (bufp, pattern_place, pend); \
01695 PUSH_FAILURE_POINTER (pattern_place); \
01696 \
01697 DEBUG_PRINT2 (" Pushing string %p: `", string_place); \
01698 DEBUG_PRINT_DOUBLE_STRING (string_place, string1, size1, string2, \
01699 size2); \
01700 DEBUG_PRINT1 ("'\n"); \
01701 PUSH_FAILURE_POINTER (string_place); \
01702 \
01703 DEBUG_PRINT2 (" Pushing failure id: %u\n", failure_id); \
01704 DEBUG_PUSH (failure_id); \
01705 } while (0)
01706
01707 # ifndef DEFINED_ONCE
01708
01709
01710 # define NUM_REG_ITEMS 3
01711
01712
01713 # ifdef DEBUG
01714 # define NUM_NONREG_ITEMS 5
01715 # else
01716 # define NUM_NONREG_ITEMS 4
01717 # endif
01718
01719
01720
01721
01722
01723 # define MAX_FAILURE_ITEMS (5 * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
01724
01725
01726 # define NUM_FAILURE_ITEMS \
01727 (((0 \
01728 ? 0 : highest_active_reg - lowest_active_reg + 1) \
01729 * NUM_REG_ITEMS) \
01730 + NUM_NONREG_ITEMS)
01731
01732
01733 # define REMAINING_AVAIL_SLOTS ((fail_stack).size - (fail_stack).avail)
01734 # endif
01735
01736
01737
01738
01739
01740
01741
01742
01743
01744
01745
01746
01747
01748 # define POP_FAILURE_POINT(str, pat, low_reg, high_reg, regstart, regend, reg_info)\
01749 { \
01750 DEBUG_STATEMENT (unsigned failure_id;) \
01751 active_reg_t this_reg; \
01752 const UCHAR_T *string_temp; \
01753 \
01754 assert (!FAIL_STACK_EMPTY ()); \
01755 \
01756 \
01757 DEBUG_PRINT1 ("POP_FAILURE_POINT:\n"); \
01758 DEBUG_PRINT2 (" Before pop, next avail: %d\n", fail_stack.avail); \
01759 DEBUG_PRINT2 (" size: %d\n", fail_stack.size); \
01760 \
01761 assert (fail_stack.avail >= NUM_NONREG_ITEMS); \
01762 \
01763 DEBUG_POP (&failure_id); \
01764 DEBUG_PRINT2 (" Popping failure id: %u\n", failure_id); \
01765 \
01766
01767
01768 \
01769 string_temp = POP_FAILURE_POINTER (); \
01770 if (string_temp != NULL) \
01771 str = (const CHAR_T *) string_temp; \
01772 \
01773 DEBUG_PRINT2 (" Popping string %p: `", str); \
01774 DEBUG_PRINT_DOUBLE_STRING (str, string1, size1, string2, size2); \
01775 DEBUG_PRINT1 ("'\n"); \
01776 \
01777 pat = (UCHAR_T *) POP_FAILURE_POINTER (); \
01778 DEBUG_PRINT2 (" Popping pattern %p:\n", pat); \
01779 DEBUG_PRINT_COMPILED_PATTERN (bufp, pat, pend); \
01780 \
01781 \
01782 high_reg = (active_reg_t) POP_FAILURE_INT (); \
01783 DEBUG_PRINT2 (" Popping high active reg: %ld\n", high_reg); \
01784 \
01785 low_reg = (active_reg_t) POP_FAILURE_INT (); \
01786 DEBUG_PRINT2 (" Popping low active reg: %ld\n", low_reg); \
01787 \
01788 if (1) \
01789 for (this_reg = high_reg; this_reg >= low_reg; this_reg--) \
01790 { \
01791 DEBUG_PRINT2 (" Popping reg: %ld\n", this_reg); \
01792 \
01793 reg_info[this_reg].word = POP_FAILURE_ELT (); \
01794 DEBUG_PRINT2 (" info: %p\n", \
01795 reg_info[this_reg].word.pointer); \
01796 \
01797 regend[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
01798 DEBUG_PRINT2 (" end: %p\n", regend[this_reg]); \
01799 \
01800 regstart[this_reg] = (const CHAR_T *) POP_FAILURE_POINTER (); \
01801 DEBUG_PRINT2 (" start: %p\n", regstart[this_reg]); \
01802 } \
01803 else \
01804 { \
01805 for (this_reg = highest_active_reg; this_reg > high_reg; this_reg--) \
01806 { \
01807 reg_info[this_reg].word.integer = 0; \
01808 regend[this_reg] = 0; \
01809 regstart[this_reg] = 0; \
01810 } \
01811 highest_active_reg = high_reg; \
01812 } \
01813 \
01814 set_regs_matched_done = 0; \
01815 DEBUG_STATEMENT (nfailure_points_popped++); \
01816 }
01817
01818
01819
01820
01821
01822
01823
01824
01825
01826
01827
01828
01829
01830
01831
01832 typedef union
01833 {
01834 PREFIX(fail_stack_elt_t) word;
01835 struct
01836 {
01837
01838
01839 # define MATCH_NULL_UNSET_VALUE 3
01840 unsigned match_null_string_p : 2;
01841 unsigned is_active : 1;
01842 unsigned matched_something : 1;
01843 unsigned ever_matched_something : 1;
01844 } bits;
01845 } PREFIX(register_info_type);
01846
01847 # ifndef DEFINED_ONCE
01848 # define REG_MATCH_NULL_STRING_P(R) ((R).bits.match_null_string_p)
01849 # define IS_ACTIVE(R) ((R).bits.is_active)
01850 # define MATCHED_SOMETHING(R) ((R).bits.matched_something)
01851 # define EVER_MATCHED_SOMETHING(R) ((R).bits.ever_matched_something)
01852
01853
01854
01855
01856
01857 # define SET_REGS_MATCHED() \
01858 do \
01859 { \
01860 if (!set_regs_matched_done) \
01861 { \
01862 active_reg_t r; \
01863 set_regs_matched_done = 1; \
01864 for (r = lowest_active_reg; r <= highest_active_reg; r++) \
01865 { \
01866 MATCHED_SOMETHING (reg_info[r]) \
01867 = EVER_MATCHED_SOMETHING (reg_info[r]) \
01868 = 1; \
01869 } \
01870 } \
01871 } \
01872 while (0)
01873 # endif
01874
01875
01876 static CHAR_T PREFIX(reg_unset_dummy);
01877 # define REG_UNSET_VALUE (&PREFIX(reg_unset_dummy))
01878 # define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
01879
01880
01881 static void PREFIX(store_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc, int arg));
01882 static void PREFIX(store_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
01883 int arg1, int arg2));
01884 static void PREFIX(insert_op1) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
01885 int arg, UCHAR_T *end));
01886 static void PREFIX(insert_op2) _RE_ARGS ((re_opcode_t op, UCHAR_T *loc,
01887 int arg1, int arg2, UCHAR_T *end));
01888 static boolean PREFIX(at_begline_loc_p) _RE_ARGS ((const CHAR_T *pattern,
01889 const CHAR_T *p,
01890 reg_syntax_t syntax));
01891 static boolean PREFIX(at_endline_loc_p) _RE_ARGS ((const CHAR_T *p,
01892 const CHAR_T *pend,
01893 reg_syntax_t syntax));
01894 # ifdef WCHAR
01895 static reg_errcode_t wcs_compile_range _RE_ARGS ((CHAR_T range_start,
01896 const CHAR_T **p_ptr,
01897 const CHAR_T *pend,
01898 char *translate,
01899 reg_syntax_t syntax,
01900 UCHAR_T *b,
01901 CHAR_T *char_set));
01902 static void insert_space _RE_ARGS ((int num, CHAR_T *loc, CHAR_T *end));
01903 # else
01904 static reg_errcode_t byte_compile_range _RE_ARGS ((unsigned int range_start,
01905 const char **p_ptr,
01906 const char *pend,
01907 char *translate,
01908 reg_syntax_t syntax,
01909 unsigned char *b));
01910 # endif
01911
01912
01913
01914
01915
01916
01917
01918
01919 # ifndef PATFETCH
01920 # ifdef WCHAR
01921 # define PATFETCH(c) \
01922 do {if (p == pend) return REG_EEND; \
01923 c = (UCHAR_T) *p++; \
01924 if (translate && (c <= 0xff)) c = (UCHAR_T) translate[c]; \
01925 } while (0)
01926 # else
01927 # define PATFETCH(c) \
01928 do {if (p == pend) return REG_EEND; \
01929 c = (unsigned char) *p++; \
01930 if (translate) c = (unsigned char) translate[c]; \
01931 } while (0)
01932 # endif
01933 # endif
01934
01935
01936
01937 # define PATFETCH_RAW(c) \
01938 do {if (p == pend) return REG_EEND; \
01939 c = (UCHAR_T) *p++; \
01940 } while (0)
01941
01942
01943 # define PATUNFETCH p--
01944
01945
01946
01947
01948
01949
01950
01951
01952
01953
01954 # ifndef TRANSLATE
01955 # ifdef WCHAR
01956 # define TRANSLATE(d) \
01957 ((translate && ((UCHAR_T) (d)) <= 0xff) \
01958 ? (char) translate[(unsigned char) (d)] : (d))
01959 # else
01960 # define TRANSLATE(d) \
01961 (translate ? (char) translate[(unsigned char) (d)] : (d))
01962 # endif
01963 # endif
01964
01965
01966
01967
01968
01969 # define INIT_BUF_SIZE (32 * sizeof(UCHAR_T))
01970
01971
01972 # ifdef WCHAR
01973 # define GET_BUFFER_SPACE(n) \
01974 while (((unsigned long)b - (unsigned long)COMPILED_BUFFER_VAR \
01975 + (n)*sizeof(CHAR_T)) > bufp->allocated) \
01976 EXTEND_BUFFER ()
01977 # else
01978 # define GET_BUFFER_SPACE(n) \
01979 while ((unsigned long) (b - bufp->buffer + (n)) > bufp->allocated) \
01980 EXTEND_BUFFER ()
01981 # endif
01982
01983
01984 # define BUF_PUSH(c) \
01985 do { \
01986 GET_BUFFER_SPACE (1); \
01987 *b++ = (UCHAR_T) (c); \
01988 } while (0)
01989
01990
01991
01992 # define BUF_PUSH_2(c1, c2) \
01993 do { \
01994 GET_BUFFER_SPACE (2); \
01995 *b++ = (UCHAR_T) (c1); \
01996 *b++ = (UCHAR_T) (c2); \
01997 } while (0)
01998
01999
02000
02001 # define BUF_PUSH_3(c1, c2, c3) \
02002 do { \
02003 GET_BUFFER_SPACE (3); \
02004 *b++ = (UCHAR_T) (c1); \
02005 *b++ = (UCHAR_T) (c2); \
02006 *b++ = (UCHAR_T) (c3); \
02007 } while (0)
02008
02009
02010
02011 # define STORE_JUMP(op, loc, to) \
02012 PREFIX(store_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)))
02013
02014
02015 # define STORE_JUMP2(op, loc, to, arg) \
02016 PREFIX(store_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), arg)
02017
02018
02019 # define INSERT_JUMP(op, loc, to) \
02020 PREFIX(insert_op1) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)), b)
02021
02022
02023 # define INSERT_JUMP2(op, loc, to, arg) \
02024 PREFIX(insert_op2) (op, loc, (int) ((to) - (loc) - (1 + OFFSET_ADDRESS_SIZE)),\
02025 arg, b)
02026
02027
02028
02029
02030
02031
02032
02033
02034
02035 # ifndef DEFINED_ONCE
02036 # if defined _MSC_VER && !defined WIN32
02037
02038
02039
02040 # define MAX_BUF_SIZE 65500L
02041 # define REALLOC(p,s) realloc ((p), (size_t) (s))
02042 # else
02043 # define MAX_BUF_SIZE (1L << 16)
02044 # define REALLOC(p,s) realloc ((p), (s))
02045 # endif
02046
02047
02048
02049
02050
02051 # if __BOUNDED_POINTERS__
02052 # define SET_HIGH_BOUND(P) (__ptrhigh (P) = __ptrlow (P) + bufp->allocated)
02053 # define MOVE_BUFFER_POINTER(P) \
02054 (__ptrlow (P) += incr, SET_HIGH_BOUND (P), __ptrvalue (P) += incr)
02055 # define ELSE_EXTEND_BUFFER_HIGH_BOUND \
02056 else \
02057 { \
02058 SET_HIGH_BOUND (b); \
02059 SET_HIGH_BOUND (begalt); \
02060 if (fixup_alt_jump) \
02061 SET_HIGH_BOUND (fixup_alt_jump); \
02062 if (laststart) \
02063 SET_HIGH_BOUND (laststart); \
02064 if (pending_exact) \
02065 SET_HIGH_BOUND (pending_exact); \
02066 }
02067 # else
02068 # define MOVE_BUFFER_POINTER(P) (P) += incr
02069 # define ELSE_EXTEND_BUFFER_HIGH_BOUND
02070 # endif
02071 # endif
02072
02073 # ifdef WCHAR
02074 # define EXTEND_BUFFER() \
02075 do { \
02076 UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
02077 int wchar_count; \
02078 if (bufp->allocated + sizeof(UCHAR_T) > MAX_BUF_SIZE) \
02079 return REG_ESIZE; \
02080 bufp->allocated <<= 1; \
02081 if (bufp->allocated > MAX_BUF_SIZE) \
02082 bufp->allocated = MAX_BUF_SIZE; \
02083 \
02084 wchar_count = bufp->allocated / sizeof(UCHAR_T); \
02085 if (wchar_count == 0) wchar_count = 1; \
02086 \
02087 bufp->allocated = wchar_count * sizeof(UCHAR_T); \
02088 RETALLOC (COMPILED_BUFFER_VAR, wchar_count, UCHAR_T); \
02089 bufp->buffer = (char*)COMPILED_BUFFER_VAR; \
02090 if (COMPILED_BUFFER_VAR == NULL) \
02091 return REG_ESPACE; \
02092 \
02093 if (old_buffer != COMPILED_BUFFER_VAR) \
02094 { \
02095 int incr = COMPILED_BUFFER_VAR - old_buffer; \
02096 MOVE_BUFFER_POINTER (b); \
02097 MOVE_BUFFER_POINTER (begalt); \
02098 if (fixup_alt_jump) \
02099 MOVE_BUFFER_POINTER (fixup_alt_jump); \
02100 if (laststart) \
02101 MOVE_BUFFER_POINTER (laststart); \
02102 if (pending_exact) \
02103 MOVE_BUFFER_POINTER (pending_exact); \
02104 } \
02105 ELSE_EXTEND_BUFFER_HIGH_BOUND \
02106 } while (0)
02107 # else
02108 # define EXTEND_BUFFER() \
02109 do { \
02110 UCHAR_T *old_buffer = COMPILED_BUFFER_VAR; \
02111 if (bufp->allocated == MAX_BUF_SIZE) \
02112 return REG_ESIZE; \
02113 bufp->allocated <<= 1; \
02114 if (bufp->allocated > MAX_BUF_SIZE) \
02115 bufp->allocated = MAX_BUF_SIZE; \
02116 bufp->buffer = (UCHAR_T *) REALLOC (COMPILED_BUFFER_VAR, \
02117 bufp->allocated); \
02118 if (COMPILED_BUFFER_VAR == NULL) \
02119 return REG_ESPACE; \
02120 \
02121 if (old_buffer != COMPILED_BUFFER_VAR) \
02122 { \
02123 int incr = COMPILED_BUFFER_VAR - old_buffer; \
02124 MOVE_BUFFER_POINTER (b); \
02125 MOVE_BUFFER_POINTER (begalt); \
02126 if (fixup_alt_jump) \
02127 MOVE_BUFFER_POINTER (fixup_alt_jump); \
02128 if (laststart) \
02129 MOVE_BUFFER_POINTER (laststart); \
02130 if (pending_exact) \
02131 MOVE_BUFFER_POINTER (pending_exact); \
02132 } \
02133 ELSE_EXTEND_BUFFER_HIGH_BOUND \
02134 } while (0)
02135 # endif
02136
02137 # ifndef DEFINED_ONCE
02138
02139
02140
02141 # define MAX_REGNUM 255
02142
02143
02144
02145 typedef unsigned regnum_t;
02146
02147
02148
02149
02150
02151
02152
02153 typedef long pattern_offset_t;
02154
02155 typedef struct
02156 {
02157 pattern_offset_t begalt_offset;
02158 pattern_offset_t fixup_alt_jump;
02159 pattern_offset_t inner_group_offset;
02160 pattern_offset_t laststart_offset;
02161 regnum_t regnum;
02162 } compile_stack_elt_t;
02163
02164
02165 typedef struct
02166 {
02167 compile_stack_elt_t *stack;
02168 unsigned size;
02169 unsigned avail;
02170 } compile_stack_type;
02171
02172
02173 # define INIT_COMPILE_STACK_SIZE 32
02174
02175 # define COMPILE_STACK_EMPTY (compile_stack.avail == 0)
02176 # define COMPILE_STACK_FULL (compile_stack.avail == compile_stack.size)
02177
02178
02179 # define COMPILE_STACK_TOP (compile_stack.stack[compile_stack.avail])
02180
02181 # endif
02182
02183
02184 # ifndef DEFINED_ONCE
02185 # define SET_LIST_BIT(c) \
02186 (b[((unsigned char) (c)) / BYTEWIDTH] \
02187 |= 1 << (((unsigned char) c) % BYTEWIDTH))
02188 # endif
02189
02190
02191 # define GET_UNSIGNED_NUMBER(num) \
02192 { \
02193 while (p != pend) \
02194 { \
02195 PATFETCH (c); \
02196 if (c < '0' || c > '9') \
02197 break; \
02198 if (num <= RE_DUP_MAX) \
02199 { \
02200 if (num < 0) \
02201 num = 0; \
02202 num = num * 10 + c - '0'; \
02203 } \
02204 } \
02205 }
02206
02207 # ifndef DEFINED_ONCE
02208 # if defined _LIBC || WIDE_CHAR_SUPPORT
02209
02210
02211 # ifdef CHARCLASS_NAME_MAX
02212 # define CHAR_CLASS_MAX_LENGTH CHARCLASS_NAME_MAX
02213 # else
02214
02215
02216 # define CHAR_CLASS_MAX_LENGTH 256
02217 # endif
02218
02219 # ifdef _LIBC
02220 # define IS_CHAR_CLASS(string) __wctype (string)
02221 # else
02222 # define IS_CHAR_CLASS(string) wctype (string)
02223 # endif
02224 # else
02225 # define CHAR_CLASS_MAX_LENGTH 6
02226
02227 # define IS_CHAR_CLASS(string) \
02228 (STREQ (string, "alpha") || STREQ (string, "upper") \
02229 || STREQ (string, "lower") || STREQ (string, "digit") \
02230 || STREQ (string, "alnum") || STREQ (string, "xdigit") \
02231 || STREQ (string, "space") || STREQ (string, "print") \
02232 || STREQ (string, "punct") || STREQ (string, "graph") \
02233 || STREQ (string, "cntrl") || STREQ (string, "blank"))
02234 # endif
02235 # endif
02236
02237 # ifndef MATCH_MAY_ALLOCATE
02238
02239
02240
02241
02242
02243
02244
02245
02246 static PREFIX(fail_stack_type) fail_stack;
02247
02248
02249
02250
02251 # ifdef DEFINED_ONCE
02252 static int regs_allocated_size;
02253
02254 static const char ** regstart, ** regend;
02255 static const char ** old_regstart, ** old_regend;
02256 static const char **best_regstart, **best_regend;
02257 static const char **reg_dummy;
02258 # endif
02259
02260 static PREFIX(register_info_type) *PREFIX(reg_info);
02261 static PREFIX(register_info_type) *PREFIX(reg_info_dummy);
02262
02263
02264
02265
02266 static void
02267 PREFIX(regex_grow_registers) (num_regs)
02268 int num_regs;
02269 {
02270 if (num_regs > regs_allocated_size)
02271 {
02272 RETALLOC_IF (regstart, num_regs, const char *);
02273 RETALLOC_IF (regend, num_regs, const char *);
02274 RETALLOC_IF (old_regstart, num_regs, const char *);
02275 RETALLOC_IF (old_regend, num_regs, const char *);
02276 RETALLOC_IF (best_regstart, num_regs, const char *);
02277 RETALLOC_IF (best_regend, num_regs, const char *);
02278 RETALLOC_IF (PREFIX(reg_info), num_regs, PREFIX(register_info_type));
02279 RETALLOC_IF (reg_dummy, num_regs, const char *);
02280 RETALLOC_IF (PREFIX(reg_info_dummy), num_regs, PREFIX(register_info_type));
02281
02282 regs_allocated_size = num_regs;
02283 }
02284 }
02285
02286 # endif
02287
02288 # ifndef DEFINED_ONCE
02289 static boolean group_in_compile_stack _RE_ARGS ((compile_stack_type
02290 compile_stack,
02291 regnum_t regnum));
02292 # endif
02293
02294
02295
02296
02297
02298
02299
02300
02301
02302
02303
02304
02305
02306
02307
02308
02309
02310
02311
02312
02313 # ifdef WCHAR
02314 # define FREE_STACK_RETURN(value) \
02315 return (free(pattern), free(mbs_offset), free(is_binary), free (compile_stack.stack), value)
02316 # else
02317 # define FREE_STACK_RETURN(value) \
02318 return (free (compile_stack.stack), value)
02319 # endif
02320
02321 static reg_errcode_t
02322 PREFIX(regex_compile) (ARG_PREFIX(pattern), ARG_PREFIX(size), syntax, bufp)
02323 const char *ARG_PREFIX(pattern);
02324 size_t ARG_PREFIX(size);
02325 reg_syntax_t syntax;
02326 struct re_pattern_buffer *bufp;
02327 {
02328
02329
02330
02331 register UCHAR_T c, c1;
02332
02333 #ifdef WCHAR
02334
02335 CHAR_T *pattern, *COMPILED_BUFFER_VAR;
02336 size_t size;
02337
02338 int *mbs_offset = NULL;
02339
02340 char *is_binary = NULL;
02341
02342 char is_exactn_bin = FALSE;
02343 #endif
02344
02345
02346 const CHAR_T *p1;
02347
02348
02349 register UCHAR_T *b;
02350
02351
02352 compile_stack_type compile_stack;
02353
02354
02355 #ifdef WCHAR
02356 const CHAR_T *p;
02357 const CHAR_T *pend;
02358 #else
02359 const CHAR_T *p = pattern;
02360 const CHAR_T *pend = pattern + size;
02361 #endif
02362
02363
02364 RE_TRANSLATE_TYPE translate = bufp->translate;
02365
02366
02367
02368
02369
02370 UCHAR_T *pending_exact = 0;
02371
02372
02373
02374
02375 UCHAR_T *laststart = 0;
02376
02377
02378 UCHAR_T *begalt;
02379
02380
02381
02382
02383 UCHAR_T *fixup_alt_jump = 0;
02384
02385
02386
02387
02388 regnum_t regnum = 0;
02389
02390 #ifdef WCHAR
02391
02392 p = pend = pattern = TALLOC(csize + 1, CHAR_T);
02393 mbs_offset = TALLOC(csize + 1, int);
02394 is_binary = TALLOC(csize + 1, char);
02395 if (pattern == NULL || mbs_offset == NULL || is_binary == NULL)
02396 {
02397 free(pattern);
02398 free(mbs_offset);
02399 free(is_binary);
02400 return REG_ESPACE;
02401 }
02402 pattern[csize] = L'\0';
02403 size = convert_mbs_to_wcs(pattern, cpattern, csize, mbs_offset, is_binary);
02404 pend = p + size;
02405 if (size < 0)
02406 {
02407 free(pattern);
02408 free(mbs_offset);
02409 free(is_binary);
02410 return REG_BADPAT;
02411 }
02412 #endif
02413
02414 #ifdef DEBUG
02415 DEBUG_PRINT1 ("\nCompiling pattern: ");
02416 if (debug)
02417 {
02418 unsigned debug_count;
02419
02420 for (debug_count = 0; debug_count < size; debug_count++)
02421 PUT_CHAR (pattern[debug_count]);
02422 putchar ('\n');
02423 }
02424 #endif
02425
02426
02427 compile_stack.stack = TALLOC (INIT_COMPILE_STACK_SIZE, compile_stack_elt_t);
02428 if (compile_stack.stack == NULL)
02429 {
02430 #ifdef WCHAR
02431 free(pattern);
02432 free(mbs_offset);
02433 free(is_binary);
02434 #endif
02435 return REG_ESPACE;
02436 }
02437
02438 compile_stack.size = INIT_COMPILE_STACK_SIZE;
02439 compile_stack.avail = 0;
02440
02441
02442 bufp->syntax = syntax;
02443 bufp->fastmap_accurate = 0;
02444 bufp->not_bol = bufp->not_eol = 0;
02445
02446
02447
02448
02449 bufp->used = 0;
02450
02451
02452 bufp->re_nsub = 0;
02453
02454 #if !defined emacs && !defined SYNTAX_TABLE
02455
02456 init_syntax_once ();
02457 #endif
02458
02459 if (bufp->allocated == 0)
02460 {
02461 if (bufp->buffer)
02462 {
02463
02464
02465 #ifdef WCHAR
02466
02467
02468 free(bufp->buffer);
02469 COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE/sizeof(UCHAR_T),
02470 UCHAR_T);
02471 #else
02472 RETALLOC (COMPILED_BUFFER_VAR, INIT_BUF_SIZE, UCHAR_T);
02473 #endif
02474 }
02475 else
02476 {
02477 COMPILED_BUFFER_VAR = TALLOC (INIT_BUF_SIZE / sizeof(UCHAR_T),
02478 UCHAR_T);
02479 }
02480
02481 if (!COMPILED_BUFFER_VAR) FREE_STACK_RETURN (REG_ESPACE);
02482 #ifdef WCHAR
02483 bufp->buffer = (char*)COMPILED_BUFFER_VAR;
02484 #endif
02485 bufp->allocated = INIT_BUF_SIZE;
02486 }
02487 #ifdef WCHAR
02488 else
02489 COMPILED_BUFFER_VAR = (UCHAR_T*) bufp->buffer;
02490 #endif
02491
02492 begalt = b = COMPILED_BUFFER_VAR;
02493
02494
02495 while (p != pend)
02496 {
02497 PATFETCH (c);
02498
02499 switch (c)
02500 {
02501 case '^':
02502 {
02503 if (
02504 p == pattern + 1
02505
02506 || syntax & RE_CONTEXT_INDEP_ANCHORS
02507
02508 || PREFIX(at_begline_loc_p) (pattern, p, syntax))
02509 BUF_PUSH (begline);
02510 else
02511 goto normal_char;
02512 }
02513 break;
02514
02515
02516 case '$':
02517 {
02518 if (
02519 p == pend
02520
02521 || syntax & RE_CONTEXT_INDEP_ANCHORS
02522
02523 || PREFIX(at_endline_loc_p) (p, pend, syntax))
02524 BUF_PUSH (endline);
02525 else
02526 goto normal_char;
02527 }
02528 break;
02529
02530
02531 case '+':
02532 case '?':
02533 if ((syntax & RE_BK_PLUS_QM)
02534 || (syntax & RE_LIMITED_OPS))
02535 goto normal_char;
02536 handle_plus:
02537 case '*':
02538
02539 if (!laststart)
02540 {
02541 if (syntax & RE_CONTEXT_INVALID_OPS)
02542 FREE_STACK_RETURN (REG_BADRPT);
02543 else if (!(syntax & RE_CONTEXT_INDEP_OPS))
02544 goto normal_char;
02545 }
02546
02547 {
02548
02549 boolean keep_string_p = false;
02550
02551
02552 char zero_times_ok = 0, many_times_ok = 0;
02553
02554
02555
02556
02557
02558
02559 for (;;)
02560 {
02561 zero_times_ok |= c != '+';
02562 many_times_ok |= c != '?';
02563
02564 if (p == pend)
02565 break;
02566
02567 PATFETCH (c);
02568
02569 if (c == '*'
02570 || (!(syntax & RE_BK_PLUS_QM) && (c == '+' || c == '?')))
02571 ;
02572
02573 else if (syntax & RE_BK_PLUS_QM && c == '\\')
02574 {
02575 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02576
02577 PATFETCH (c1);
02578 if (!(c1 == '+' || c1 == '?'))
02579 {
02580 PATUNFETCH;
02581 PATUNFETCH;
02582 break;
02583 }
02584
02585 c = c1;
02586 }
02587 else
02588 {
02589 PATUNFETCH;
02590 break;
02591 }
02592
02593
02594 }
02595
02596
02597
02598 if (!laststart)
02599 break;
02600
02601
02602
02603 if (many_times_ok)
02604 {
02605
02606
02607
02608
02609
02610
02611
02612
02613
02614 assert (p - 1 > pattern);
02615
02616
02617 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
02618
02619
02620
02621
02622
02623
02624 if (TRANSLATE (*(p - 2)) == TRANSLATE ('.')
02625 && zero_times_ok
02626 && p < pend && TRANSLATE (*p) == TRANSLATE ('\n')
02627 && !(syntax & RE_DOT_NEWLINE))
02628 {
02629 STORE_JUMP (jump, b, laststart);
02630 keep_string_p = true;
02631 }
02632 else
02633
02634 STORE_JUMP (maybe_pop_jump, b, laststart -
02635 (1 + OFFSET_ADDRESS_SIZE));
02636
02637
02638 b += 1 + OFFSET_ADDRESS_SIZE;
02639 }
02640
02641
02642
02643
02644
02645 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
02646 INSERT_JUMP (keep_string_p ? on_failure_keep_string_jump
02647 : on_failure_jump,
02648 laststart, b + 1 + OFFSET_ADDRESS_SIZE);
02649 pending_exact = 0;
02650 b += 1 + OFFSET_ADDRESS_SIZE;
02651
02652 if (!zero_times_ok)
02653 {
02654
02655
02656
02657
02658
02659 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
02660 INSERT_JUMP (dummy_failure_jump, laststart, laststart +
02661 2 + 2 * OFFSET_ADDRESS_SIZE);
02662 b += 1 + OFFSET_ADDRESS_SIZE;
02663 }
02664 }
02665 break;
02666
02667
02668 case '.':
02669 laststart = b;
02670 BUF_PUSH (anychar);
02671 break;
02672
02673
02674 case '[':
02675 {
02676 boolean had_char_class = false;
02677 #ifdef WCHAR
02678 CHAR_T range_start = 0xffffffff;
02679 #else
02680 unsigned int range_start = 0xffffffff;
02681 #endif
02682 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02683
02684 #ifdef WCHAR
02685
02686
02687
02688
02689
02690
02691
02692
02693
02694
02695
02696
02697
02698
02699
02700
02701
02702
02703
02704
02705
02706
02707
02708
02709
02710
02711
02712
02713
02714
02715
02716
02717
02718
02719
02720
02721
02722
02723
02724
02725
02726
02727
02728
02729
02730
02731 GET_BUFFER_SPACE (6);
02732
02733
02734
02735
02736 laststart = b;
02737
02738
02739
02740 BUF_PUSH (*p == '^' ? charset_not : charset);
02741 if (*p == '^')
02742 p++;
02743
02744
02745
02746
02747 BUF_PUSH_3 (0, 0, 0);
02748 BUF_PUSH_2 (0, 0);
02749
02750
02751 p1 = p;
02752
02753
02754 if ((re_opcode_t) b[-6] == charset_not
02755 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
02756 {
02757 BUF_PUSH('\n');
02758 laststart[5]++;
02759 }
02760
02761
02762 for (;;)
02763 {
02764 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02765
02766 PATFETCH (c);
02767
02768
02769 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
02770 {
02771 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
02772
02773 PATFETCH (c1);
02774 BUF_PUSH(c1);
02775 laststart[5]++;
02776 range_start = c1;
02777 continue;
02778 }
02779
02780
02781
02782
02783 if (c == ']' && p != p1 + 1)
02784 break;
02785
02786
02787
02788 if (had_char_class && c == '-' && *p != ']')
02789 FREE_STACK_RETURN (REG_ERANGE);
02790
02791
02792
02793
02794
02795 if (c == '-'
02796 && !(p - 2 >= pattern && p[-2] == '[')
02797 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
02798 && *p != ']')
02799 {
02800 reg_errcode_t ret;
02801
02802 GET_BUFFER_SPACE (2);
02803
02804 b += 2;
02805 ret = wcs_compile_range (range_start, &p, pend, translate,
02806 syntax, b, laststart);
02807 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02808 range_start = 0xffffffff;
02809 }
02810 else if (p[0] == '-' && p[1] != ']')
02811 {
02812 reg_errcode_t ret;
02813
02814
02815 PATFETCH (c1);
02816
02817 GET_BUFFER_SPACE (2);
02818
02819 b += 2;
02820 ret = wcs_compile_range (c, &p, pend, translate, syntax, b,
02821 laststart);
02822 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
02823 range_start = 0xffffffff;
02824 }
02825
02826
02827
02828 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
02829 {
02830 char str[CHAR_CLASS_MAX_LENGTH + 1];
02831
02832 PATFETCH (c);
02833 c1 = 0;
02834
02835
02836 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02837
02838 for (;;)
02839 {
02840 PATFETCH (c);
02841 if ((c == ':' && *p == ']') || p == pend)
02842 break;
02843 if (c1 < CHAR_CLASS_MAX_LENGTH)
02844 str[c1++] = c;
02845 else
02846
02847 str[0] = '\0';
02848 }
02849 str[c1] = '\0';
02850
02851
02852
02853
02854 if (c == ':' && *p == ']')
02855 {
02856 wctype_t wt;
02857 uintptr_t alignedp;
02858
02859
02860 wt = IS_CHAR_CLASS (str);
02861 if (wt == 0)
02862 FREE_STACK_RETURN (REG_ECTYPE);
02863
02864
02865
02866 PATFETCH (c);
02867
02868 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02869
02870
02871 GET_BUFFER_SPACE(CHAR_CLASS_SIZE);
02872
02873 b += CHAR_CLASS_SIZE;
02874
02875
02876 insert_space(CHAR_CLASS_SIZE,
02877 laststart + 6 + laststart[1],
02878 b - 1);
02879 alignedp = ((uintptr_t)(laststart + 6 + laststart[1])
02880 + __alignof__(wctype_t) - 1)
02881 & ~(uintptr_t)(__alignof__(wctype_t) - 1);
02882
02883 *((wctype_t*)alignedp) = wt;
02884
02885 laststart[1] += CHAR_CLASS_SIZE;
02886
02887 had_char_class = true;
02888 }
02889 else
02890 {
02891 c1++;
02892 while (c1--)
02893 PATUNFETCH;
02894 BUF_PUSH ('[');
02895 BUF_PUSH (':');
02896 laststart[5] += 2;
02897 range_start = ':';
02898 had_char_class = false;
02899 }
02900 }
02901 else if (syntax & RE_CHAR_CLASSES && c == '[' && (*p == '='
02902 || *p == '.'))
02903 {
02904 CHAR_T str[128];
02905 CHAR_T delim = *p;
02906 # ifdef _LIBC
02907 uint32_t nrules =
02908 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
02909 # endif
02910 PATFETCH (c);
02911 c1 = 0;
02912
02913
02914 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
02915
02916 for (;;)
02917 {
02918 PATFETCH (c);
02919 if ((c == delim && *p == ']') || p == pend)
02920 break;
02921 if (c1 < sizeof (str) - 1)
02922 str[c1++] = c;
02923 else
02924
02925 str[0] = '\0';
02926 }
02927 str[c1] = '\0';
02928
02929 if (c == delim && *p == ']' && str[0] != '\0')
02930 {
02931 unsigned int i, offset;
02932
02933
02934
02935
02936
02937
02938
02939
02940
02941 int datasize = c1 + 1;
02942
02943 # ifdef _LIBC
02944 int32_t idx = 0;
02945 if (nrules == 0)
02946 # endif
02947 {
02948 if (c1 != 1)
02949 FREE_STACK_RETURN (REG_ECOLLATE);
02950 }
02951 # ifdef _LIBC
02952 else
02953 {
02954 const int32_t *table;
02955 const int32_t *weights;
02956 const int32_t *extra;
02957 const int32_t *indirect;
02958 wint_t *cp;
02959
02960
02961 # include <locale/weightwc.h>
02962
02963 if(delim == '=')
02964 {
02965
02966 cp = (wint_t*)str;
02967
02968 table = (const int32_t *)
02969 _NL_CURRENT (LC_COLLATE,
02970 _NL_COLLATE_TABLEWC);
02971 weights = (const int32_t *)
02972 _NL_CURRENT (LC_COLLATE,
02973 _NL_COLLATE_WEIGHTWC);
02974 extra = (const int32_t *)
02975 _NL_CURRENT (LC_COLLATE,
02976 _NL_COLLATE_EXTRAWC);
02977 indirect = (const int32_t *)
02978 _NL_CURRENT (LC_COLLATE,
02979 _NL_COLLATE_INDIRECTWC);
02980
02981 idx = findidx ((const wint_t**)&cp);
02982 if (idx == 0 || cp < (wint_t*) str + c1)
02983
02984 FREE_STACK_RETURN (REG_ECOLLATE);
02985
02986 str[0] = (wchar_t)idx;
02987 }
02988 else
02989 {
02990
02991
02992 int32_t table_size;
02993 const int32_t *symb_table;
02994 const unsigned char *extra;
02995 int32_t idx;
02996 int32_t elem;
02997 int32_t second;
02998 int32_t hash;
02999 char char_str[c1];
03000
03001
03002
03003
03004
03005 for (i = 0; i < c1; ++i)
03006 char_str[i] = str[i];
03007
03008 table_size =
03009 _NL_CURRENT_WORD (LC_COLLATE,
03010 _NL_COLLATE_SYMB_HASH_SIZEMB);
03011 symb_table = (const int32_t *)
03012 _NL_CURRENT (LC_COLLATE,
03013 _NL_COLLATE_SYMB_TABLEMB);
03014 extra = (const unsigned char *)
03015 _NL_CURRENT (LC_COLLATE,
03016 _NL_COLLATE_SYMB_EXTRAMB);
03017
03018
03019 hash = elem_hash (char_str, c1);
03020
03021 idx = 0;
03022 elem = hash % table_size;
03023 second = hash % (table_size - 2);
03024 while (symb_table[2 * elem] != 0)
03025 {
03026
03027 if (symb_table[2 * elem] == hash
03028 && c1 == extra[symb_table[2 * elem + 1]]
03029 && memcmp (char_str,
03030 &extra[symb_table[2 * elem + 1]
03031 + 1], c1) == 0)
03032 {
03033
03034 idx = symb_table[2 * elem + 1];
03035 idx += 1 + extra[idx];
03036 break;
03037 }
03038
03039
03040 elem += second;
03041 }
03042
03043 if (symb_table[2 * elem] != 0)
03044 {
03045
03046
03047 idx += 1 + extra[idx];
03048
03049 idx = (idx + 3) & ~3;
03050
03051 str[0] = (wchar_t) idx + 4;
03052 }
03053 else if (symb_table[2 * elem] == 0 && c1 == 1)
03054 {
03055
03056
03057 had_char_class = false;
03058 BUF_PUSH(str[0]);
03059
03060 laststart[5]++;
03061 range_start = str[0];
03062
03063
03064
03065 PATFETCH (c);
03066
03067 continue;
03068 }
03069 else
03070 FREE_STACK_RETURN (REG_ECOLLATE);
03071 }
03072 datasize = 1;
03073 }
03074 # endif
03075
03076
03077 PATFETCH (c);
03078
03079
03080
03081 GET_BUFFER_SPACE(datasize);
03082
03083 b += datasize;
03084
03085 if (delim == '=')
03086 {
03087
03088
03089 offset = laststart[1] + laststart[2]
03090 + laststart[3] +6;
03091
03092 insert_space(datasize, laststart + offset, b - 1);
03093
03094
03095 for (i = 0 ; i < datasize ; i++)
03096 laststart[offset + i] = str[i];
03097
03098
03099 laststart[3] += datasize;
03100 had_char_class = true;
03101 }
03102 else
03103 {
03104
03105
03106 offset = laststart[1] + laststart[2] + 6;
03107
03108
03109 insert_space(datasize, laststart + offset, b-1);
03110 for (i = 0 ; i < datasize ; i++)
03111 laststart[offset + i] = str[i];
03112
03113
03114
03115
03116
03117
03118
03119 range_start = -(laststart[1] + laststart[2] + 6);
03120
03121 laststart[2] += datasize;
03122 had_char_class = false;
03123 }
03124 }
03125 else
03126 {
03127 c1++;
03128 while (c1--)
03129 PATUNFETCH;
03130 BUF_PUSH ('[');
03131 BUF_PUSH (delim);
03132 laststart[5] += 2;
03133 range_start = delim;
03134 had_char_class = false;
03135 }
03136 }
03137 else
03138 {
03139 had_char_class = false;
03140 BUF_PUSH(c);
03141 laststart[5]++;
03142 range_start = c;
03143 }
03144 }
03145
03146 #else
03147
03148
03149 GET_BUFFER_SPACE (34);
03150
03151 laststart = b;
03152
03153
03154
03155 BUF_PUSH (*p == '^' ? charset_not : charset);
03156 if (*p == '^')
03157 p++;
03158
03159
03160 p1 = p;
03161
03162
03163 BUF_PUSH ((1 << BYTEWIDTH) / BYTEWIDTH);
03164
03165
03166 bzero (b, (1 << BYTEWIDTH) / BYTEWIDTH);
03167
03168
03169 if ((re_opcode_t) b[-2] == charset_not
03170 && (syntax & RE_HAT_LISTS_NOT_NEWLINE))
03171 SET_LIST_BIT ('\n');
03172
03173
03174 for (;;)
03175 {
03176 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03177
03178 PATFETCH (c);
03179
03180
03181 if ((syntax & RE_BACKSLASH_ESCAPE_IN_LISTS) && c == '\\')
03182 {
03183 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
03184
03185 PATFETCH (c1);
03186 SET_LIST_BIT (c1);
03187 range_start = c1;
03188 continue;
03189 }
03190
03191
03192
03193
03194 if (c == ']' && p != p1 + 1)
03195 break;
03196
03197
03198
03199 if (had_char_class && c == '-' && *p != ']')
03200 FREE_STACK_RETURN (REG_ERANGE);
03201
03202
03203
03204
03205
03206 if (c == '-'
03207 && !(p - 2 >= pattern && p[-2] == '[')
03208 && !(p - 3 >= pattern && p[-3] == '[' && p[-2] == '^')
03209 && *p != ']')
03210 {
03211 reg_errcode_t ret
03212 = byte_compile_range (range_start, &p, pend, translate,
03213 syntax, b);
03214 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
03215 range_start = 0xffffffff;
03216 }
03217
03218 else if (p[0] == '-' && p[1] != ']')
03219 {
03220 reg_errcode_t ret;
03221
03222
03223 PATFETCH (c1);
03224
03225 ret = byte_compile_range (c, &p, pend, translate, syntax, b);
03226 if (ret != REG_NOERROR) FREE_STACK_RETURN (ret);
03227 range_start = 0xffffffff;
03228 }
03229
03230
03231
03232
03233 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == ':')
03234 {
03235 char str[CHAR_CLASS_MAX_LENGTH + 1];
03236
03237 PATFETCH (c);
03238 c1 = 0;
03239
03240
03241 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03242
03243 for (;;)
03244 {
03245 PATFETCH (c);
03246 if ((c == ':' && *p == ']') || p == pend)
03247 break;
03248 if (c1 < CHAR_CLASS_MAX_LENGTH)
03249 str[c1++] = c;
03250 else
03251
03252 str[0] = '\0';
03253 }
03254 str[c1] = '\0';
03255
03256
03257
03258
03259 if (c == ':' && *p == ']')
03260 {
03261 # if defined _LIBC || WIDE_CHAR_SUPPORT
03262 boolean is_lower = STREQ (str, "lower");
03263 boolean is_upper = STREQ (str, "upper");
03264 wctype_t wt;
03265 int ch;
03266
03267 wt = IS_CHAR_CLASS (str);
03268 if (wt == 0)
03269 FREE_STACK_RETURN (REG_ECTYPE);
03270
03271
03272
03273 PATFETCH (c);
03274
03275 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03276
03277 for (ch = 0; ch < 1 << BYTEWIDTH; ++ch)
03278 {
03279 # ifdef _LIBC
03280 if (__iswctype (__btowc (ch), wt))
03281 SET_LIST_BIT (ch);
03282 # else
03283 if (iswctype (btowc (ch), wt))
03284 SET_LIST_BIT (ch);
03285 # endif
03286
03287 if (translate && (is_upper || is_lower)
03288 && (ISUPPER (ch) || ISLOWER (ch)))
03289 SET_LIST_BIT (ch);
03290 }
03291
03292 had_char_class = true;
03293 # else
03294 int ch;
03295 boolean is_alnum = STREQ (str, "alnum");
03296 boolean is_alpha = STREQ (str, "alpha");
03297 boolean is_blank = STREQ (str, "blank");
03298 boolean is_cntrl = STREQ (str, "cntrl");
03299 boolean is_digit = STREQ (str, "digit");
03300 boolean is_graph = STREQ (str, "graph");
03301 boolean is_lower = STREQ (str, "lower");
03302 boolean is_print = STREQ (str, "print");
03303 boolean is_punct = STREQ (str, "punct");
03304 boolean is_space = STREQ (str, "space");
03305 boolean is_upper = STREQ (str, "upper");
03306 boolean is_xdigit = STREQ (str, "xdigit");
03307
03308 if (!IS_CHAR_CLASS (str))
03309 FREE_STACK_RETURN (REG_ECTYPE);
03310
03311
03312
03313 PATFETCH (c);
03314
03315 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03316
03317 for (ch = 0; ch < 1 << BYTEWIDTH; ch++)
03318 {
03319
03320
03321 if ( (is_alnum && ISALNUM (ch))
03322 || (is_alpha && ISALPHA (ch))
03323 || (is_blank && ISBLANK (ch))
03324 || (is_cntrl && ISCNTRL (ch)))
03325 SET_LIST_BIT (ch);
03326 if ( (is_digit && ISDIGIT (ch))
03327 || (is_graph && ISGRAPH (ch))
03328 || (is_lower && ISLOWER (ch))
03329 || (is_print && ISPRINT (ch)))
03330 SET_LIST_BIT (ch);
03331 if ( (is_punct && ISPUNCT (ch))
03332 || (is_space && ISSPACE (ch))
03333 || (is_upper && ISUPPER (ch))
03334 || (is_xdigit && ISXDIGIT (ch)))
03335 SET_LIST_BIT (ch);
03336 if ( translate && (is_upper || is_lower)
03337 && (ISUPPER (ch) || ISLOWER (ch)))
03338 SET_LIST_BIT (ch);
03339 }
03340 had_char_class = true;
03341 # endif
03342 }
03343 else
03344 {
03345 c1++;
03346 while (c1--)
03347 PATUNFETCH;
03348 SET_LIST_BIT ('[');
03349 SET_LIST_BIT (':');
03350 range_start = ':';
03351 had_char_class = false;
03352 }
03353 }
03354 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '=')
03355 {
03356 unsigned char str[MB_LEN_MAX + 1];
03357 # ifdef _LIBC
03358 uint32_t nrules =
03359 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
03360 # endif
03361
03362 PATFETCH (c);
03363 c1 = 0;
03364
03365
03366 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03367
03368 for (;;)
03369 {
03370 PATFETCH (c);
03371 if ((c == '=' && *p == ']') || p == pend)
03372 break;
03373 if (c1 < MB_LEN_MAX)
03374 str[c1++] = c;
03375 else
03376
03377 str[0] = '\0';
03378 }
03379 str[c1] = '\0';
03380
03381 if (c == '=' && *p == ']' && str[0] != '\0')
03382 {
03383
03384
03385
03386
03387
03388
03389 # ifdef _LIBC
03390 if (nrules == 0)
03391 # endif
03392 {
03393 if (c1 != 1)
03394 FREE_STACK_RETURN (REG_ECOLLATE);
03395
03396
03397
03398 PATFETCH (c);
03399
03400
03401 SET_LIST_BIT (str[0]);
03402 }
03403 # ifdef _LIBC
03404 else
03405 {
03406
03407
03408
03409
03410 const int32_t *table;
03411 const unsigned char *weights;
03412 const unsigned char *extra;
03413 const int32_t *indirect;
03414 int32_t idx;
03415 const unsigned char *cp = str;
03416 int ch;
03417
03418
03419 # include <locale/weight.h>
03420
03421 table = (const int32_t *)
03422 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEMB);
03423 weights = (const unsigned char *)
03424 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTMB);
03425 extra = (const unsigned char *)
03426 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAMB);
03427 indirect = (const int32_t *)
03428 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTMB);
03429
03430 idx = findidx (&cp);
03431 if (idx == 0 || cp < str + c1)
03432
03433 FREE_STACK_RETURN (REG_ECOLLATE);
03434
03435
03436
03437 PATFETCH (c);
03438
03439
03440
03441
03442
03443
03444
03445
03446
03447 for (ch = 1; ch < 256; ++ch)
03448
03449
03450 if (table[ch] > 0)
03451 {
03452 int32_t idx2 = table[ch];
03453 size_t len = weights[idx2];
03454
03455
03456 if (weights[idx] == len)
03457 {
03458
03459
03460 size_t cnt = 0;
03461
03462 while (cnt < len
03463 && (weights[idx + 1 + cnt]
03464 == weights[idx2 + 1 + cnt]))
03465 ++cnt;
03466
03467 if (cnt == len)
03468
03469
03470 SET_LIST_BIT (ch);
03471 }
03472 }
03473 }
03474 # endif
03475 had_char_class = true;
03476 }
03477 else
03478 {
03479 c1++;
03480 while (c1--)
03481 PATUNFETCH;
03482 SET_LIST_BIT ('[');
03483 SET_LIST_BIT ('=');
03484 range_start = '=';
03485 had_char_class = false;
03486 }
03487 }
03488 else if (syntax & RE_CHAR_CLASSES && c == '[' && *p == '.')
03489 {
03490 unsigned char str[128];
03491 # ifdef _LIBC
03492 uint32_t nrules =
03493 _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
03494 # endif
03495
03496 PATFETCH (c);
03497 c1 = 0;
03498
03499
03500 if (p == pend) FREE_STACK_RETURN (REG_EBRACK);
03501
03502 for (;;)
03503 {
03504 PATFETCH (c);
03505 if ((c == '.' && *p == ']') || p == pend)
03506 break;
03507 if (c1 < sizeof (str))
03508 str[c1++] = c;
03509 else
03510
03511 str[0] = '\0';
03512 }
03513 str[c1] = '\0';
03514
03515 if (c == '.' && *p == ']' && str[0] != '\0')
03516 {
03517
03518
03519
03520
03521
03522
03523
03524 # ifdef _LIBC
03525 if (nrules == 0)
03526 # endif
03527 {
03528 if (c1 != 1)
03529 FREE_STACK_RETURN (REG_ECOLLATE);
03530
03531
03532
03533 PATFETCH (c);
03534
03535
03536 SET_LIST_BIT (str[0]);
03537 range_start = ((const unsigned char *) str)[0];
03538 }
03539 # ifdef _LIBC
03540 else
03541 {
03542
03543
03544
03545
03546 int32_t table_size;
03547 const int32_t *symb_table;
03548 const unsigned char *extra;
03549 int32_t idx;
03550 int32_t elem;
03551 int32_t second;
03552 int32_t hash;
03553
03554 table_size =
03555 _NL_CURRENT_WORD (LC_COLLATE,
03556 _NL_COLLATE_SYMB_HASH_SIZEMB);
03557 symb_table = (const int32_t *)
03558 _NL_CURRENT (LC_COLLATE,
03559 _NL_COLLATE_SYMB_TABLEMB);
03560 extra = (const unsigned char *)
03561 _NL_CURRENT (LC_COLLATE,
03562 _NL_COLLATE_SYMB_EXTRAMB);
03563
03564
03565 hash = elem_hash (str, c1);
03566
03567 idx = 0;
03568 elem = hash % table_size;
03569 second = hash % (table_size - 2);
03570 while (symb_table[2 * elem] != 0)
03571 {
03572
03573 if (symb_table[2 * elem] == hash
03574 && c1 == extra[symb_table[2 * elem + 1]]
03575 && memcmp (str,
03576 &extra[symb_table[2 * elem + 1]
03577 + 1],
03578 c1) == 0)
03579 {
03580
03581 idx = symb_table[2 * elem + 1];
03582 idx += 1 + extra[idx];
03583 break;
03584 }
03585
03586
03587 elem += second;
03588 }
03589
03590 if (symb_table[2 * elem] == 0)
03591
03592 FREE_STACK_RETURN (REG_ECOLLATE);
03593
03594
03595
03596 PATFETCH (c);
03597
03598
03599
03600
03601
03602
03603
03604
03605
03606
03607
03608 c1 = extra[idx++];
03609 if (c1 == 1)
03610 range_start = extra[idx];
03611 while (c1-- > 0)
03612 {
03613 SET_LIST_BIT (extra[idx]);
03614 ++idx;
03615 }
03616 }
03617 # endif
03618 had_char_class = false;
03619 }
03620 else
03621 {
03622 c1++;
03623 while (c1--)
03624 PATUNFETCH;
03625 SET_LIST_BIT ('[');
03626 SET_LIST_BIT ('.');
03627 range_start = '.';
03628 had_char_class = false;
03629 }
03630 }
03631 else
03632 {
03633 had_char_class = false;
03634 SET_LIST_BIT (c);
03635 range_start = c;
03636 }
03637 }
03638
03639
03640
03641 while ((int) b[-1] > 0 && b[b[-1] - 1] == 0)
03642 b[-1]--;
03643 b += b[-1];
03644 #endif
03645 }
03646 break;
03647
03648
03649 case '(':
03650 if (syntax & RE_NO_BK_PARENS)
03651 goto handle_open;
03652 else
03653 goto normal_char;
03654
03655
03656 case ')':
03657 if (syntax & RE_NO_BK_PARENS)
03658 goto handle_close;
03659 else
03660 goto normal_char;
03661
03662
03663 case '\n':
03664 if (syntax & RE_NEWLINE_ALT)
03665 goto handle_alt;
03666 else
03667 goto normal_char;
03668
03669
03670 case '|':
03671 if (syntax & RE_NO_BK_VBAR)
03672 goto handle_alt;
03673 else
03674 goto normal_char;
03675
03676
03677 case '{':
03678 if (syntax & RE_INTERVALS && syntax & RE_NO_BK_BRACES)
03679 goto handle_interval;
03680 else
03681 goto normal_char;
03682
03683
03684 case '\\':
03685 if (p == pend) FREE_STACK_RETURN (REG_EESCAPE);
03686
03687
03688
03689
03690 PATFETCH_RAW (c);
03691
03692 switch (c)
03693 {
03694 case '(':
03695 if (syntax & RE_NO_BK_PARENS)
03696 goto normal_backslash;
03697
03698 handle_open:
03699 bufp->re_nsub++;
03700 regnum++;
03701
03702 if (COMPILE_STACK_FULL)
03703 {
03704 RETALLOC (compile_stack.stack, compile_stack.size << 1,
03705 compile_stack_elt_t);
03706 if (compile_stack.stack == NULL) return REG_ESPACE;
03707
03708 compile_stack.size <<= 1;
03709 }
03710
03711
03712
03713
03714
03715 COMPILE_STACK_TOP.begalt_offset = begalt - COMPILED_BUFFER_VAR;
03716 COMPILE_STACK_TOP.fixup_alt_jump
03717 = fixup_alt_jump ? fixup_alt_jump - COMPILED_BUFFER_VAR + 1 : 0;
03718 COMPILE_STACK_TOP.laststart_offset = b - COMPILED_BUFFER_VAR;
03719 COMPILE_STACK_TOP.regnum = regnum;
03720
03721
03722
03723
03724
03725 if (regnum <= MAX_REGNUM)
03726 {
03727 COMPILE_STACK_TOP.inner_group_offset = b
03728 - COMPILED_BUFFER_VAR + 2;
03729 BUF_PUSH_3 (start_memory, regnum, 0);
03730 }
03731
03732 compile_stack.avail++;
03733
03734 fixup_alt_jump = 0;
03735 laststart = 0;
03736 begalt = b;
03737
03738
03739
03740 pending_exact = 0;
03741 break;
03742
03743
03744 case ')':
03745 if (syntax & RE_NO_BK_PARENS) goto normal_backslash;
03746
03747 if (COMPILE_STACK_EMPTY)
03748 {
03749 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
03750 goto normal_backslash;
03751 else
03752 FREE_STACK_RETURN (REG_ERPAREN);
03753 }
03754
03755 handle_close:
03756 if (fixup_alt_jump)
03757 {
03758
03759
03760
03761 BUF_PUSH (push_dummy_failure);
03762
03763
03764
03765 STORE_JUMP (jump_past_alt, fixup_alt_jump, b - 1);
03766 }
03767
03768
03769 if (COMPILE_STACK_EMPTY)
03770 {
03771 if (syntax & RE_UNMATCHED_RIGHT_PAREN_ORD)
03772 goto normal_char;
03773 else
03774 FREE_STACK_RETURN (REG_ERPAREN);
03775 }
03776
03777
03778
03779 assert (compile_stack.avail != 0);
03780 {
03781
03782
03783
03784 regnum_t this_group_regnum;
03785
03786 compile_stack.avail--;
03787 begalt = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.begalt_offset;
03788 fixup_alt_jump
03789 = COMPILE_STACK_TOP.fixup_alt_jump
03790 ? COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.fixup_alt_jump - 1
03791 : 0;
03792 laststart = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.laststart_offset;
03793 this_group_regnum = COMPILE_STACK_TOP.regnum;
03794
03795
03796
03797 pending_exact = 0;
03798
03799
03800
03801 if (this_group_regnum <= MAX_REGNUM)
03802 {
03803 UCHAR_T *inner_group_loc
03804 = COMPILED_BUFFER_VAR + COMPILE_STACK_TOP.inner_group_offset;
03805
03806 *inner_group_loc = regnum - this_group_regnum;
03807 BUF_PUSH_3 (stop_memory, this_group_regnum,
03808 regnum - this_group_regnum);
03809 }
03810 }
03811 break;
03812
03813
03814 case '|':
03815 if (syntax & RE_LIMITED_OPS || syntax & RE_NO_BK_VBAR)
03816 goto normal_backslash;
03817 handle_alt:
03818 if (syntax & RE_LIMITED_OPS)
03819 goto normal_char;
03820
03821
03822
03823 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
03824 INSERT_JUMP (on_failure_jump, begalt,
03825 b + 2 + 2 * OFFSET_ADDRESS_SIZE);
03826 pending_exact = 0;
03827 b += 1 + OFFSET_ADDRESS_SIZE;
03828
03829
03830
03831
03832
03833
03834
03835
03836
03837
03838
03839
03840
03841
03842
03843
03844
03845 if (fixup_alt_jump)
03846 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
03847
03848
03849
03850
03851 fixup_alt_jump = b;
03852 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
03853 b += 1 + OFFSET_ADDRESS_SIZE;
03854
03855 laststart = 0;
03856 begalt = b;
03857 break;
03858
03859
03860 case '{':
03861
03862 if (!(syntax & RE_INTERVALS)
03863
03864
03865 || (syntax & RE_NO_BK_BRACES))
03866 goto normal_backslash;
03867
03868 handle_interval:
03869 {
03870
03871
03872
03873 int lower_bound = -1, upper_bound = -1;
03874
03875
03876
03877 const CHAR_T *beg_interval = p;
03878
03879 if (p == pend)
03880 goto invalid_interval;
03881
03882 GET_UNSIGNED_NUMBER (lower_bound);
03883
03884 if (c == ',')
03885 {
03886 GET_UNSIGNED_NUMBER (upper_bound);
03887 if (upper_bound < 0)
03888 upper_bound = RE_DUP_MAX;
03889 }
03890 else
03891
03892 upper_bound = lower_bound;
03893
03894 if (! (0 <= lower_bound && lower_bound <= upper_bound))
03895 goto invalid_interval;
03896
03897 if (!(syntax & RE_NO_BK_BRACES))
03898 {
03899 if (c != '\\' || p == pend)
03900 goto invalid_interval;
03901 PATFETCH (c);
03902 }
03903
03904 if (c != '}')
03905 goto invalid_interval;
03906
03907
03908 if (!laststart)
03909 {
03910 if (syntax & RE_CONTEXT_INVALID_OPS
03911 && !(syntax & RE_INVALID_INTERVAL_ORD))
03912 FREE_STACK_RETURN (REG_BADRPT);
03913 else if (syntax & RE_CONTEXT_INDEP_OPS)
03914 laststart = b;
03915 else
03916 goto unfetch_interval;
03917 }
03918
03919
03920
03921 if (RE_DUP_MAX < upper_bound)
03922 FREE_STACK_RETURN (REG_BADBR);
03923
03924
03925
03926
03927
03928
03929 if (upper_bound == 0)
03930 {
03931 GET_BUFFER_SPACE (1 + OFFSET_ADDRESS_SIZE);
03932 INSERT_JUMP (jump, laststart, b + 1
03933 + OFFSET_ADDRESS_SIZE);
03934 b += 1 + OFFSET_ADDRESS_SIZE;
03935 }
03936
03937
03938
03939
03940
03941
03942
03943
03944
03945
03946 else
03947 {
03948
03949 unsigned nbytes = 2 + 4 * OFFSET_ADDRESS_SIZE +
03950 (upper_bound > 1) * (2 + 4 * OFFSET_ADDRESS_SIZE);
03951
03952 GET_BUFFER_SPACE (nbytes);
03953
03954
03955
03956
03957
03958
03959 INSERT_JUMP2 (succeed_n, laststart,
03960 b + 1 + 2 * OFFSET_ADDRESS_SIZE
03961 + (upper_bound > 1) * (1 + 2 * OFFSET_ADDRESS_SIZE)
03962 , lower_bound);
03963 b += 1 + 2 * OFFSET_ADDRESS_SIZE;
03964
03965
03966
03967
03968
03969
03970
03971
03972 PREFIX(insert_op2) (set_number_at, laststart, 1
03973 + 2 * OFFSET_ADDRESS_SIZE, lower_bound, b);
03974 b += 1 + 2 * OFFSET_ADDRESS_SIZE;
03975
03976 if (upper_bound > 1)
03977 {
03978
03979
03980
03981
03982
03983
03984 STORE_JUMP2 (jump_n, b, laststart
03985 + 2 * OFFSET_ADDRESS_SIZE + 1,
03986 upper_bound - 1);
03987 b += 1 + 2 * OFFSET_ADDRESS_SIZE;
03988
03989
03990
03991
03992
03993
03994
03995
03996
03997
03998
03999
04000
04001
04002
04003 PREFIX(insert_op2) (set_number_at, laststart,
04004 b - laststart,
04005 upper_bound - 1, b);
04006 b += 1 + 2 * OFFSET_ADDRESS_SIZE;
04007 }
04008 }
04009 pending_exact = 0;
04010 break;
04011
04012 invalid_interval:
04013 if (!(syntax & RE_INVALID_INTERVAL_ORD))
04014 FREE_STACK_RETURN (p == pend ? REG_EBRACE : REG_BADBR);
04015 unfetch_interval:
04016
04017 p = beg_interval;
04018 c = '{';
04019 if (syntax & RE_NO_BK_BRACES)
04020 goto normal_char;
04021 else
04022 goto normal_backslash;
04023 }
04024
04025 #ifdef emacs
04026
04027
04028 case '=':
04029 BUF_PUSH (at_dot);
04030 break;
04031
04032 case 's':
04033 laststart = b;
04034 PATFETCH (c);
04035 BUF_PUSH_2 (syntaxspec, syntax_spec_code[c]);
04036 break;
04037
04038 case 'S':
04039 laststart = b;
04040 PATFETCH (c);
04041 BUF_PUSH_2 (notsyntaxspec, syntax_spec_code[c]);
04042 break;
04043 #endif
04044
04045
04046 case 'w':
04047 if (syntax & RE_NO_GNU_OPS)
04048 goto normal_char;
04049 laststart = b;
04050 BUF_PUSH (wordchar);
04051 break;
04052
04053
04054 case 'W':
04055 if (syntax & RE_NO_GNU_OPS)
04056 goto normal_char;
04057 laststart = b;
04058 BUF_PUSH (notwordchar);
04059 break;
04060
04061
04062 case '<':
04063 if (syntax & RE_NO_GNU_OPS)
04064 goto normal_char;
04065 BUF_PUSH (wordbeg);
04066 break;
04067
04068 case '>':
04069 if (syntax & RE_NO_GNU_OPS)
04070 goto normal_char;
04071 BUF_PUSH (wordend);
04072 break;
04073
04074 case 'b':
04075 if (syntax & RE_NO_GNU_OPS)
04076 goto normal_char;
04077 BUF_PUSH (wordbound);
04078 break;
04079
04080 case 'B':
04081 if (syntax & RE_NO_GNU_OPS)
04082 goto normal_char;
04083 BUF_PUSH (notwordbound);
04084 break;
04085
04086 case '`':
04087 if (syntax & RE_NO_GNU_OPS)
04088 goto normal_char;
04089 BUF_PUSH (begbuf);
04090 break;
04091
04092 case '\'':
04093 if (syntax & RE_NO_GNU_OPS)
04094 goto normal_char;
04095 BUF_PUSH (endbuf);
04096 break;
04097
04098 case '1': case '2': case '3': case '4': case '5':
04099 case '6': case '7': case '8': case '9':
04100 if (syntax & RE_NO_BK_REFS)
04101 goto normal_char;
04102
04103 c1 = c - '0';
04104
04105 if (c1 > regnum)
04106 FREE_STACK_RETURN (REG_ESUBREG);
04107
04108
04109 if (group_in_compile_stack (compile_stack, (regnum_t) c1))
04110 goto normal_char;
04111
04112 laststart = b;
04113 BUF_PUSH_2 (duplicate, c1);
04114 break;
04115
04116
04117 case '+':
04118 case '?':
04119 if (syntax & RE_BK_PLUS_QM)
04120 goto handle_plus;
04121 else
04122 goto normal_backslash;
04123
04124 default:
04125 normal_backslash:
04126
04127
04128
04129 c = TRANSLATE (c);
04130 goto normal_char;
04131 }
04132 break;
04133
04134
04135 default:
04136
04137 normal_char:
04138
04139 if (!pending_exact
04140 #ifdef WCHAR
04141
04142
04143 || is_exactn_bin != is_binary[p - 1 - pattern]
04144 #endif
04145
04146
04147 || pending_exact + *pending_exact + 1 != b
04148
04149
04150 || *pending_exact == (1 << BYTEWIDTH) - 1
04151
04152
04153 || *p == '*' || *p == '^'
04154 || ((syntax & RE_BK_PLUS_QM)
04155 ? *p == '\\' && (p[1] == '+' || p[1] == '?')
04156 : (*p == '+' || *p == '?'))
04157 || ((syntax & RE_INTERVALS)
04158 && ((syntax & RE_NO_BK_BRACES)
04159 ? *p == '{'
04160 : (p[0] == '\\' && p[1] == '{'))))
04161 {
04162
04163
04164 laststart = b;
04165
04166 #ifdef WCHAR
04167
04168 is_exactn_bin = is_binary[p - 1 - pattern];
04169 if (is_exactn_bin)
04170 BUF_PUSH_2 (exactn_bin, 0);
04171 else
04172 BUF_PUSH_2 (exactn, 0);
04173 #else
04174 BUF_PUSH_2 (exactn, 0);
04175 #endif
04176 pending_exact = b - 1;
04177 }
04178
04179 BUF_PUSH (c);
04180 (*pending_exact)++;
04181 break;
04182 }
04183 }
04184
04185
04186
04187
04188 if (fixup_alt_jump)
04189 STORE_JUMP (jump_past_alt, fixup_alt_jump, b);
04190
04191 if (!COMPILE_STACK_EMPTY)
04192 FREE_STACK_RETURN (REG_EPAREN);
04193
04194
04195
04196 if (syntax & RE_NO_POSIX_BACKTRACKING)
04197 BUF_PUSH (succeed);
04198
04199 #ifdef WCHAR
04200 free (pattern);
04201 free (mbs_offset);
04202 free (is_binary);
04203 #endif
04204 free (compile_stack.stack);
04205
04206
04207 #ifdef WCHAR
04208 bufp->used = (uintptr_t) b - (uintptr_t) COMPILED_BUFFER_VAR;
04209 #else
04210 bufp->used = b - bufp->buffer;
04211 #endif
04212
04213 #ifdef DEBUG
04214 if (debug)
04215 {
04216 DEBUG_PRINT1 ("\nCompiled pattern: \n");
04217 PREFIX(print_compiled_pattern) (bufp);
04218 }
04219 #endif
04220
04221 #ifndef MATCH_MAY_ALLOCATE
04222
04223
04224
04225 {
04226 int num_regs = bufp->re_nsub + 1;
04227
04228
04229
04230
04231 if (fail_stack.size < (2 * re_max_failures * MAX_FAILURE_ITEMS))
04232 {
04233 fail_stack.size = (2 * re_max_failures * MAX_FAILURE_ITEMS);
04234
04235 # ifdef emacs
04236 if (! fail_stack.stack)
04237 fail_stack.stack
04238 = (PREFIX(fail_stack_elt_t) *) xmalloc (fail_stack.size
04239 * sizeof (PREFIX(fail_stack_elt_t)));
04240 else
04241 fail_stack.stack
04242 = (PREFIX(fail_stack_elt_t) *) xrealloc (fail_stack.stack,
04243 (fail_stack.size
04244 * sizeof (PREFIX(fail_stack_elt_t))));
04245 # else
04246 if (! fail_stack.stack)
04247 fail_stack.stack
04248 = (PREFIX(fail_stack_elt_t) *) malloc (fail_stack.size
04249 * sizeof (PREFIX(fail_stack_elt_t)));
04250 else
04251 fail_stack.stack
04252 = (PREFIX(fail_stack_elt_t) *) realloc (fail_stack.stack,
04253 (fail_stack.size
04254 * sizeof (PREFIX(fail_stack_elt_t))));
04255 # endif
04256 }
04257
04258 PREFIX(regex_grow_registers) (num_regs);
04259 }
04260 #endif
04261
04262 return REG_NOERROR;
04263 }
04264
04265
04266
04267
04268
04269
04270 static void
04271 PREFIX(store_op1) (op, loc, arg)
04272 re_opcode_t op;
04273 UCHAR_T *loc;
04274 int arg;
04275 {
04276 *loc = (UCHAR_T) op;
04277 STORE_NUMBER (loc + 1, arg);
04278 }
04279
04280
04281
04282
04283
04284 static void
04285 PREFIX(store_op2) (op, loc, arg1, arg2)
04286 re_opcode_t op;
04287 UCHAR_T *loc;
04288 int arg1, arg2;
04289 {
04290 *loc = (UCHAR_T) op;
04291 STORE_NUMBER (loc + 1, arg1);
04292 STORE_NUMBER (loc + 1 + OFFSET_ADDRESS_SIZE, arg2);
04293 }
04294
04295
04296
04297
04298
04299
04300 static void
04301 PREFIX(insert_op1) (op, loc, arg, end)
04302 re_opcode_t op;
04303 UCHAR_T *loc;
04304 int arg;
04305 UCHAR_T *end;
04306 {
04307 register UCHAR_T *pfrom = end;
04308 register UCHAR_T *pto = end + 1 + OFFSET_ADDRESS_SIZE;
04309
04310 while (pfrom != loc)
04311 *--pto = *--pfrom;
04312
04313 PREFIX(store_op1) (op, loc, arg);
04314 }
04315
04316
04317
04318
04319
04320 static void
04321 PREFIX(insert_op2) (op, loc, arg1, arg2, end)
04322 re_opcode_t op;
04323 UCHAR_T *loc;
04324 int arg1, arg2;
04325 UCHAR_T *end;
04326 {
04327 register UCHAR_T *pfrom = end;
04328 register UCHAR_T *pto = end + 1 + 2 * OFFSET_ADDRESS_SIZE;
04329
04330 while (pfrom != loc)
04331 *--pto = *--pfrom;
04332
04333 PREFIX(store_op2) (op, loc, arg1, arg2);
04334 }
04335
04336
04337
04338
04339
04340
04341 static boolean
04342 PREFIX(at_begline_loc_p) (pattern, p, syntax)
04343 const CHAR_T *pattern, *p;
04344 reg_syntax_t syntax;
04345 {
04346 const CHAR_T *prev = p - 2;
04347 boolean prev_prev_backslash = prev > pattern && prev[-1] == '\\';
04348
04349 return
04350
04351 (*prev == '(' && (syntax & RE_NO_BK_PARENS || prev_prev_backslash))
04352
04353 || (*prev == '|' && (syntax & RE_NO_BK_VBAR || prev_prev_backslash));
04354 }
04355
04356
04357
04358
04359
04360 static boolean
04361 PREFIX(at_endline_loc_p) (p, pend, syntax)
04362 const CHAR_T *p, *pend;
04363 reg_syntax_t syntax;
04364 {
04365 const CHAR_T *next = p;
04366 boolean next_backslash = *next == '\\';
04367 const CHAR_T *next_next = p + 1 < pend ? p + 1 : 0;
04368
04369 return
04370
04371 (syntax & RE_NO_BK_PARENS ? *next == ')'
04372 : next_backslash && next_next && *next_next == ')')
04373
04374 || (syntax & RE_NO_BK_VBAR ? *next == '|'
04375 : next_backslash && next_next && *next_next == '|');
04376 }
04377
04378 #else
04379
04380
04381
04382
04383 static boolean
04384 group_in_compile_stack (compile_stack, regnum)
04385 compile_stack_type compile_stack;
04386 regnum_t regnum;
04387 {
04388 int this_element;
04389
04390 for (this_element = compile_stack.avail - 1;
04391 this_element >= 0;
04392 this_element--)
04393 if (compile_stack.stack[this_element].regnum == regnum)
04394 return true;
04395
04396 return false;
04397 }
04398 #endif
04399
04400 #ifdef INSIDE_RECURSION
04401
04402 #ifdef WCHAR
04403
04404
04405 static void
04406 insert_space (num, loc, end)
04407 int num;
04408 CHAR_T *loc;
04409 CHAR_T *end;
04410 {
04411 register CHAR_T *pto = end;
04412 register CHAR_T *pfrom = end - num;
04413
04414 while (pfrom >= loc)
04415 *pto-- = *pfrom--;
04416 }
04417 #endif
04418
04419 #ifdef WCHAR
04420 static reg_errcode_t
04421 wcs_compile_range (range_start_char, p_ptr, pend, translate, syntax, b,
04422 char_set)
04423 CHAR_T range_start_char;
04424 const CHAR_T **p_ptr, *pend;
04425 CHAR_T *char_set, *b;
04426 RE_TRANSLATE_TYPE translate;
04427 reg_syntax_t syntax;
04428 {
04429 const CHAR_T *p = *p_ptr;
04430 CHAR_T range_start, range_end;
04431 reg_errcode_t ret;
04432 # ifdef _LIBC
04433 uint32_t nrules;
04434 uint32_t start_val, end_val;
04435 # endif
04436 if (p == pend)
04437 return REG_ERANGE;
04438
04439 # ifdef _LIBC
04440 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
04441 if (nrules != 0)
04442 {
04443 const char *collseq = (const char *) _NL_CURRENT(LC_COLLATE,
04444 _NL_COLLATE_COLLSEQWC);
04445 const unsigned char *extra = (const unsigned char *)
04446 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
04447
04448 if (range_start_char < -1)
04449 {
04450
04451 int32_t *wextra;
04452
04453 wextra = (int32_t*)(extra + char_set[-range_start_char]);
04454 start_val = wextra[1 + *wextra];
04455 }
04456 else
04457 start_val = collseq_table_lookup(collseq, TRANSLATE(range_start_char));
04458
04459 end_val = collseq_table_lookup (collseq, TRANSLATE (p[0]));
04460
04461
04462
04463 ret = ((syntax & RE_NO_EMPTY_RANGES)
04464 && (start_val > end_val))? REG_ERANGE : REG_NOERROR;
04465
04466
04467 insert_space(2, b - char_set[5] - 2, b - 1);
04468 *(b - char_set[5] - 2) = (wchar_t)start_val;
04469 *(b - char_set[5] - 1) = (wchar_t)end_val;
04470 char_set[4]++;
04471 }
04472 else
04473 # endif
04474 {
04475 range_start = (range_start_char >= 0)? TRANSLATE (range_start_char):
04476 range_start_char;
04477 range_end = TRANSLATE (p[0]);
04478
04479
04480 ret = ((syntax & RE_NO_EMPTY_RANGES)
04481 && (range_start > range_end))? REG_ERANGE : REG_NOERROR;
04482
04483
04484 insert_space(2, b - char_set[5] - 2, b - 1);
04485 *(b - char_set[5] - 2) = range_start;
04486 *(b - char_set[5] - 1) = range_end;
04487 char_set[4]++;
04488 }
04489
04490
04491 (*p_ptr)++;
04492
04493 return ret;
04494 }
04495 #else
04496
04497
04498
04499
04500
04501
04502
04503
04504
04505
04506
04507 static reg_errcode_t
04508 byte_compile_range (range_start_char, p_ptr, pend, translate, syntax, b)
04509 unsigned int range_start_char;
04510 const char **p_ptr, *pend;
04511 RE_TRANSLATE_TYPE translate;
04512 reg_syntax_t syntax;
04513 unsigned char *b;
04514 {
04515 unsigned this_char;
04516 const char *p = *p_ptr;
04517 reg_errcode_t ret;
04518 # if _LIBC
04519 const unsigned char *collseq;
04520 unsigned int start_colseq;
04521 unsigned int end_colseq;
04522 # else
04523 unsigned end_char;
04524 # endif
04525
04526 if (p == pend)
04527 return REG_ERANGE;
04528
04529
04530
04531 (*p_ptr)++;
04532
04533
04534 ret = syntax & RE_NO_EMPTY_RANGES ? REG_ERANGE : REG_NOERROR;
04535
04536 # if _LIBC
04537 collseq = (const unsigned char *) _NL_CURRENT (LC_COLLATE,
04538 _NL_COLLATE_COLLSEQMB);
04539
04540 start_colseq = collseq[(unsigned char) TRANSLATE (range_start_char)];
04541 end_colseq = collseq[(unsigned char) TRANSLATE (p[0])];
04542 for (this_char = 0; this_char <= (unsigned char) -1; ++this_char)
04543 {
04544 unsigned int this_colseq = collseq[(unsigned char) TRANSLATE (this_char)];
04545
04546 if (start_colseq <= this_colseq && this_colseq <= end_colseq)
04547 {
04548 SET_LIST_BIT (TRANSLATE (this_char));
04549 ret = REG_NOERROR;
04550 }
04551 }
04552 # else
04553
04554
04555
04556 range_start_char = TRANSLATE (range_start_char);
04557
04558
04559
04560
04561
04562 end_char = ((unsigned)TRANSLATE(p[0]) & ((1 << BYTEWIDTH) - 1));
04563
04564 for (this_char = range_start_char; this_char <= end_char; ++this_char)
04565 {
04566 SET_LIST_BIT (TRANSLATE (this_char));
04567 ret = REG_NOERROR;
04568 }
04569 # endif
04570
04571 return ret;
04572 }
04573 #endif
04574
04575
04576
04577
04578
04579
04580
04581
04582
04583
04584
04585
04586
04587
04588 #ifdef WCHAR
04589
04590
04591 static unsigned char truncate_wchar (CHAR_T c);
04592
04593 static unsigned char
04594 truncate_wchar (c)
04595 CHAR_T c;
04596 {
04597 unsigned char buf[MB_CUR_MAX];
04598 mbstate_t state;
04599 int retval;
04600 memset (&state, '\0', sizeof (state));
04601 # ifdef _LIBC
04602 retval = __wcrtomb (buf, c, &state);
04603 # else
04604 retval = wcrtomb (buf, c, &state);
04605 # endif
04606 return retval > 0 ? buf[0] : (unsigned char) c;
04607 }
04608 #endif
04609
04610 static int
04611 PREFIX(re_compile_fastmap) (bufp)
04612 struct re_pattern_buffer *bufp;
04613 {
04614 int j, k;
04615 #ifdef MATCH_MAY_ALLOCATE
04616 PREFIX(fail_stack_type) fail_stack;
04617 #endif
04618 #ifndef REGEX_MALLOC
04619 char *destination;
04620 #endif
04621
04622 register char *fastmap = bufp->fastmap;
04623
04624 #ifdef WCHAR
04625
04626
04627 UCHAR_T *pattern = (UCHAR_T*)bufp->buffer;
04628 register UCHAR_T *pend = (UCHAR_T*) (bufp->buffer + bufp->used);
04629 #else
04630 UCHAR_T *pattern = bufp->buffer;
04631 register UCHAR_T *pend = pattern + bufp->used;
04632 #endif
04633 UCHAR_T *p = pattern;
04634
04635 #ifdef REL_ALLOC
04636
04637
04638 fail_stack_elt_t *failure_stack_ptr;
04639 #endif
04640
04641
04642
04643
04644
04645 boolean path_can_be_null = true;
04646
04647
04648 boolean succeed_n_p = false;
04649
04650 assert (fastmap != NULL && p != NULL);
04651
04652 INIT_FAIL_STACK ();
04653 bzero (fastmap, 1 << BYTEWIDTH);
04654 bufp->fastmap_accurate = 1;
04655 bufp->can_be_null = 0;
04656
04657 while (1)
04658 {
04659 if (p == pend || *p == (UCHAR_T) succeed)
04660 {
04661
04662 if (!FAIL_STACK_EMPTY ())
04663 {
04664 bufp->can_be_null |= path_can_be_null;
04665
04666
04667 path_can_be_null = true;
04668
04669 p = fail_stack.stack[--fail_stack.avail].pointer;
04670
04671 continue;
04672 }
04673 else
04674 break;
04675 }
04676
04677
04678 assert (p < pend);
04679
04680 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
04681 {
04682
04683
04684
04685
04686
04687
04688 case duplicate:
04689 bufp->can_be_null = 1;
04690 goto done;
04691
04692
04693
04694
04695
04696 #ifdef WCHAR
04697 case exactn:
04698 fastmap[truncate_wchar(p[1])] = 1;
04699 break;
04700 #else
04701 case exactn:
04702 fastmap[p[1]] = 1;
04703 break;
04704 #endif
04705 #ifdef MBS_SUPPORT
04706 case exactn_bin:
04707 fastmap[p[1]] = 1;
04708 break;
04709 #endif
04710
04711 #ifdef WCHAR
04712
04713
04714 case charset:
04715 case charset_not:
04716 case wordchar:
04717 case notwordchar:
04718 bufp->can_be_null = 1;
04719 goto done;
04720 #else
04721 case charset:
04722 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
04723 if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))
04724 fastmap[j] = 1;
04725 break;
04726
04727
04728 case charset_not:
04729
04730 for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
04731 fastmap[j] = 1;
04732
04733 for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
04734 if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))))
04735 fastmap[j] = 1;
04736 break;
04737
04738
04739 case wordchar:
04740 for (j = 0; j < (1 << BYTEWIDTH); j++)
04741 if (SYNTAX (j) == Sword)
04742 fastmap[j] = 1;
04743 break;
04744
04745
04746 case notwordchar:
04747 for (j = 0; j < (1 << BYTEWIDTH); j++)
04748 if (SYNTAX (j) != Sword)
04749 fastmap[j] = 1;
04750 break;
04751 #endif
04752
04753 case anychar:
04754 {
04755 int fastmap_newline = fastmap['\n'];
04756
04757
04758 for (j = 0; j < (1 << BYTEWIDTH); j++)
04759 fastmap[j] = 1;
04760
04761
04762 if (!(bufp->syntax & RE_DOT_NEWLINE))
04763 fastmap['\n'] = fastmap_newline;
04764
04765
04766
04767 else if (bufp->can_be_null)
04768 goto done;
04769
04770
04771 break;
04772 }
04773
04774 #ifdef emacs
04775 case syntaxspec:
04776 k = *p++;
04777 for (j = 0; j < (1 << BYTEWIDTH); j++)
04778 if (SYNTAX (j) == (enum syntaxcode) k)
04779 fastmap[j] = 1;
04780 break;
04781
04782
04783 case notsyntaxspec:
04784 k = *p++;
04785 for (j = 0; j < (1 << BYTEWIDTH); j++)
04786 if (SYNTAX (j) != (enum syntaxcode) k)
04787 fastmap[j] = 1;
04788 break;
04789
04790
04791
04792
04793
04794
04795 case before_dot:
04796 case at_dot:
04797 case after_dot:
04798 continue;
04799 #endif
04800
04801
04802 case no_op:
04803 case begline:
04804 case endline:
04805 case begbuf:
04806 case endbuf:
04807 case wordbound:
04808 case notwordbound:
04809 case wordbeg:
04810 case wordend:
04811 case push_dummy_failure:
04812 continue;
04813
04814
04815 case jump_n:
04816 case pop_failure_jump:
04817 case maybe_pop_jump:
04818 case jump:
04819 case jump_past_alt:
04820 case dummy_failure_jump:
04821 EXTRACT_NUMBER_AND_INCR (j, p);
04822 p += j;
04823 if (j > 0)
04824 continue;
04825
04826
04827
04828
04829
04830
04831 if ((re_opcode_t) *p != on_failure_jump
04832 && (re_opcode_t) *p != succeed_n)
04833 continue;
04834
04835 p++;
04836 EXTRACT_NUMBER_AND_INCR (j, p);
04837 p += j;
04838
04839
04840 if (!FAIL_STACK_EMPTY ()
04841 && fail_stack.stack[fail_stack.avail - 1].pointer == p)
04842 fail_stack.avail--;
04843
04844 continue;
04845
04846
04847 case on_failure_jump:
04848 case on_failure_keep_string_jump:
04849 handle_on_failure_jump:
04850 EXTRACT_NUMBER_AND_INCR (j, p);
04851
04852
04853
04854
04855
04856
04857
04858
04859 if (p + j < pend)
04860 {
04861 if (!PUSH_PATTERN_OP (p + j, fail_stack))
04862 {
04863 RESET_FAIL_STACK ();
04864 return -2;
04865 }
04866 }
04867 else
04868 bufp->can_be_null = 1;
04869
04870 if (succeed_n_p)
04871 {
04872 EXTRACT_NUMBER_AND_INCR (k, p);
04873 succeed_n_p = false;
04874 }
04875
04876 continue;
04877
04878
04879 case succeed_n:
04880
04881 p += OFFSET_ADDRESS_SIZE;
04882
04883
04884 EXTRACT_NUMBER_AND_INCR (k, p);
04885 if (k == 0)
04886 {
04887 p -= 2 * OFFSET_ADDRESS_SIZE;
04888 succeed_n_p = true;
04889 goto handle_on_failure_jump;
04890 }
04891 continue;
04892
04893
04894 case set_number_at:
04895 p += 2 * OFFSET_ADDRESS_SIZE;
04896 continue;
04897
04898
04899 case start_memory:
04900 case stop_memory:
04901 p += 2;
04902 continue;
04903
04904
04905 default:
04906 abort ();
04907 }
04908
04909
04910
04911
04912
04913
04914
04915 path_can_be_null = false;
04916 p = pend;
04917 }
04918
04919
04920
04921 bufp->can_be_null |= path_can_be_null;
04922
04923 done:
04924 RESET_FAIL_STACK ();
04925 return 0;
04926 }
04927
04928 #else
04929
04930 int
04931 re_compile_fastmap (bufp)
04932 struct re_pattern_buffer *bufp;
04933 {
04934 # ifdef MBS_SUPPORT
04935 if (MB_CUR_MAX != 1)
04936 return wcs_re_compile_fastmap(bufp);
04937 else
04938 # endif
04939 return byte_re_compile_fastmap(bufp);
04940 }
04941 #ifdef _LIBC
04942 weak_alias (__re_compile_fastmap, re_compile_fastmap)
04943 #endif
04944
04945
04946
04947
04948
04949
04950
04951
04952
04953
04954
04955
04956
04957
04958
04959 void
04960 re_set_registers (bufp, regs, num_regs, starts, ends)
04961 struct re_pattern_buffer *bufp;
04962 struct re_registers *regs;
04963 unsigned num_regs;
04964 regoff_t *starts, *ends;
04965 {
04966 if (num_regs)
04967 {
04968 bufp->regs_allocated = REGS_REALLOCATE;
04969 regs->num_regs = num_regs;
04970 regs->start = starts;
04971 regs->end = ends;
04972 }
04973 else
04974 {
04975 bufp->regs_allocated = REGS_UNALLOCATED;
04976 regs->num_regs = 0;
04977 regs->start = regs->end = (regoff_t *) 0;
04978 }
04979 }
04980 #ifdef _LIBC
04981 weak_alias (__re_set_registers, re_set_registers)
04982 #endif
04983
04984
04985
04986
04987
04988
04989 int
04990 re_search (bufp, string, size, startpos, range, regs)
04991 struct re_pattern_buffer *bufp;
04992 const char *string;
04993 int size, startpos, range;
04994 struct re_registers *regs;
04995 {
04996 return re_search_2 (bufp, NULL, 0, string, size, startpos, range,
04997 regs, size);
04998 }
04999 #ifdef _LIBC
05000 weak_alias (__re_search, re_search)
05001 #endif
05002
05003
05004
05005
05006
05007
05008
05009
05010
05011
05012
05013
05014
05015
05016
05017
05018
05019
05020
05021
05022
05023
05024
05025 int
05026 re_search_2 (bufp, string1, size1, string2, size2, startpos, range, regs, stop)
05027 struct re_pattern_buffer *bufp;
05028 const char *string1, *string2;
05029 int size1, size2;
05030 int startpos;
05031 int range;
05032 struct re_registers *regs;
05033 int stop;
05034 {
05035 # ifdef MBS_SUPPORT
05036 if (MB_CUR_MAX != 1)
05037 return wcs_re_search_2 (bufp, string1, size1, string2, size2, startpos,
05038 range, regs, stop);
05039 else
05040 # endif
05041 return byte_re_search_2 (bufp, string1, size1, string2, size2, startpos,
05042 range, regs, stop);
05043 }
05044 #ifdef _LIBC
05045 weak_alias (__re_search_2, re_search_2)
05046 #endif
05047
05048 #endif
05049
05050 #ifdef INSIDE_RECURSION
05051
05052 #ifdef MATCH_MAY_ALLOCATE
05053 # define FREE_VAR(var) if (var) REGEX_FREE (var); var = NULL
05054 #else
05055 # define FREE_VAR(var) if (var) free (var); var = NULL
05056 #endif
05057
05058 #ifdef WCHAR
05059 # define MAX_ALLOCA_SIZE 2000
05060
05061 # define FREE_WCS_BUFFERS() \
05062 do { \
05063 if (size1 > MAX_ALLOCA_SIZE) \
05064 { \
05065 free (wcs_string1); \
05066 free (mbs_offset1); \
05067 } \
05068 else \
05069 { \
05070 FREE_VAR (wcs_string1); \
05071 FREE_VAR (mbs_offset1); \
05072 } \
05073 if (size2 > MAX_ALLOCA_SIZE) \
05074 { \
05075 free (wcs_string2); \
05076 free (mbs_offset2); \
05077 } \
05078 else \
05079 { \
05080 FREE_VAR (wcs_string2); \
05081 FREE_VAR (mbs_offset2); \
05082 } \
05083 } while (0)
05084
05085 #endif
05086
05087
05088 static int
05089 PREFIX(re_search_2) (bufp, string1, size1, string2, size2, startpos, range,
05090 regs, stop)
05091 struct re_pattern_buffer *bufp;
05092 const char *string1, *string2;
05093 int size1, size2;
05094 int startpos;
05095 int range;
05096 struct re_registers *regs;
05097 int stop;
05098 {
05099 int val;
05100 register char *fastmap = bufp->fastmap;
05101 register RE_TRANSLATE_TYPE translate = bufp->translate;
05102 int total_size = size1 + size2;
05103 int endpos = startpos + range;
05104 #ifdef WCHAR
05105
05106 wchar_t *wcs_string1 = NULL, *wcs_string2 = NULL;
05107
05108 int wcs_size1 = 0, wcs_size2 = 0;
05109
05110 int *mbs_offset1 = NULL, *mbs_offset2 = NULL;
05111
05112 char *is_binary = NULL;
05113 #endif
05114
05115
05116 if (startpos < 0 || startpos > total_size)
05117 return -1;
05118
05119
05120
05121
05122 if (endpos < 0)
05123 range = 0 - startpos;
05124 else if (endpos > total_size)
05125 range = total_size - startpos;
05126
05127
05128
05129 if (bufp->used > 0 && range > 0
05130 && ((re_opcode_t) bufp->buffer[0] == begbuf
05131
05132 || ((re_opcode_t) bufp->buffer[0] == begline
05133 && !bufp->newline_anchor)))
05134 {
05135 if (startpos > 0)
05136 return -1;
05137 else
05138 range = 1;
05139 }
05140
05141 #ifdef emacs
05142
05143
05144 if (bufp->used > 0 && (re_opcode_t) bufp->buffer[0] == at_dot && range > 0)
05145 {
05146 range = PT - startpos;
05147 if (range <= 0)
05148 return -1;
05149 }
05150 #endif
05151
05152
05153 if (fastmap && !bufp->fastmap_accurate)
05154 if (re_compile_fastmap (bufp) == -2)
05155 return -2;
05156
05157 #ifdef WCHAR
05158
05159
05160 if (size1 != 0)
05161 {
05162 if (size1 > MAX_ALLOCA_SIZE)
05163 {
05164 wcs_string1 = TALLOC (size1 + 1, CHAR_T);
05165 mbs_offset1 = TALLOC (size1 + 1, int);
05166 is_binary = TALLOC (size1 + 1, char);
05167 }
05168 else
05169 {
05170 wcs_string1 = REGEX_TALLOC (size1 + 1, CHAR_T);
05171 mbs_offset1 = REGEX_TALLOC (size1 + 1, int);
05172 is_binary = REGEX_TALLOC (size1 + 1, char);
05173 }
05174 if (!wcs_string1 || !mbs_offset1 || !is_binary)
05175 {
05176 if (size1 > MAX_ALLOCA_SIZE)
05177 {
05178 free (wcs_string1);
05179 free (mbs_offset1);
05180 free (is_binary);
05181 }
05182 else
05183 {
05184 FREE_VAR (wcs_string1);
05185 FREE_VAR (mbs_offset1);
05186 FREE_VAR (is_binary);
05187 }
05188 return -2;
05189 }
05190 wcs_size1 = convert_mbs_to_wcs(wcs_string1, string1, size1,
05191 mbs_offset1, is_binary);
05192 wcs_string1[wcs_size1] = L'\0';
05193 if (size1 > MAX_ALLOCA_SIZE)
05194 free (is_binary);
05195 else
05196 FREE_VAR (is_binary);
05197 }
05198 if (size2 != 0)
05199 {
05200 if (size2 > MAX_ALLOCA_SIZE)
05201 {
05202 wcs_string2 = TALLOC (size2 + 1, CHAR_T);
05203 mbs_offset2 = TALLOC (size2 + 1, int);
05204 is_binary = TALLOC (size2 + 1, char);
05205 }
05206 else
05207 {
05208 wcs_string2 = REGEX_TALLOC (size2 + 1, CHAR_T);
05209 mbs_offset2 = REGEX_TALLOC (size2 + 1, int);
05210 is_binary = REGEX_TALLOC (size2 + 1, char);
05211 }
05212 if (!wcs_string2 || !mbs_offset2 || !is_binary)
05213 {
05214 FREE_WCS_BUFFERS ();
05215 if (size2 > MAX_ALLOCA_SIZE)
05216 free (is_binary);
05217 else
05218 FREE_VAR (is_binary);
05219 return -2;
05220 }
05221 wcs_size2 = convert_mbs_to_wcs(wcs_string2, string2, size2,
05222 mbs_offset2, is_binary);
05223 wcs_string2[wcs_size2] = L'\0';
05224 if (size2 > MAX_ALLOCA_SIZE)
05225 free (is_binary);
05226 else
05227 FREE_VAR (is_binary);
05228 }
05229 #endif
05230
05231
05232
05233 for (;;)
05234 {
05235
05236
05237
05238
05239 if (fastmap && startpos < total_size && !bufp->can_be_null)
05240 {
05241 if (range > 0)
05242 {
05243 register const char *d;
05244 register int lim = 0;
05245 int irange = range;
05246
05247 if (startpos < size1 && startpos + range >= size1)
05248 lim = range - (size1 - startpos);
05249
05250 d = (startpos >= size1 ? string2 - size1 : string1) + startpos;
05251
05252
05253
05254 if (translate)
05255 while (range > lim
05256 && !fastmap[(unsigned char)
05257 translate[(unsigned char) *d++]])
05258 range--;
05259 else
05260 while (range > lim && !fastmap[(unsigned char) *d++])
05261 range--;
05262
05263 startpos += irange - range;
05264 }
05265 else
05266 {
05267 register CHAR_T c = (size1 == 0 || startpos >= size1
05268 ? string2[startpos - size1]
05269 : string1[startpos]);
05270
05271 if (!fastmap[(unsigned char) TRANSLATE (c)])
05272 goto advance;
05273 }
05274 }
05275
05276
05277 if (range >= 0 && startpos == total_size && fastmap
05278 && !bufp->can_be_null)
05279 {
05280 #ifdef WCHAR
05281 FREE_WCS_BUFFERS ();
05282 #endif
05283 return -1;
05284 }
05285
05286 #ifdef WCHAR
05287 val = wcs_re_match_2_internal (bufp, string1, size1, string2,
05288 size2, startpos, regs, stop,
05289 wcs_string1, wcs_size1,
05290 wcs_string2, wcs_size2,
05291 mbs_offset1, mbs_offset2);
05292 #else
05293 val = byte_re_match_2_internal (bufp, string1, size1, string2,
05294 size2, startpos, regs, stop);
05295 #endif
05296
05297 #ifndef REGEX_MALLOC
05298 # ifdef C_ALLOCA
05299 alloca (0);
05300 # endif
05301 #endif
05302
05303 if (val >= 0)
05304 {
05305 #ifdef WCHAR
05306 FREE_WCS_BUFFERS ();
05307 #endif
05308 return startpos;
05309 }
05310
05311 if (val == -2)
05312 {
05313 #ifdef WCHAR
05314 FREE_WCS_BUFFERS ();
05315 #endif
05316 return -2;
05317 }
05318
05319 advance:
05320 if (!range)
05321 break;
05322 else if (range > 0)
05323 {
05324 range--;
05325 startpos++;
05326 }
05327 else
05328 {
05329 range++;
05330 startpos--;
05331 }
05332 }
05333 #ifdef WCHAR
05334 FREE_WCS_BUFFERS ();
05335 #endif
05336 return -1;
05337 }
05338
05339 #ifdef WCHAR
05340
05341
05342
05343
05344 # define POINTER_TO_OFFSET(ptr) \
05345 (FIRST_STRING_P (ptr) \
05346 ? ((regoff_t)(mbs_offset1 != NULL? mbs_offset1[(ptr)-string1] : 0)) \
05347 : ((regoff_t)((mbs_offset2 != NULL? mbs_offset2[(ptr)-string2] : 0) \
05348 + csize1)))
05349 #else
05350
05351
05352 # define POINTER_TO_OFFSET(ptr) \
05353 (FIRST_STRING_P (ptr) \
05354 ? ((regoff_t) ((ptr) - string1)) \
05355 : ((regoff_t) ((ptr) - string2 + size1)))
05356 #endif
05357
05358
05359
05360 #define MATCHING_IN_FIRST_STRING (dend == end_match_1)
05361
05362
05363
05364 #define PREFETCH() \
05365 while (d == dend) \
05366 { \
05367 \
05368 if (dend == end_match_2) \
05369 goto fail; \
05370 \
05371 d = string2; \
05372 dend = end_match_2; \
05373 }
05374
05375
05376
05377 #define AT_STRINGS_BEG(d) ((d) == (size1 ? string1 : string2) || !size2)
05378 #define AT_STRINGS_END(d) ((d) == end2)
05379
05380
05381
05382
05383
05384
05385 #ifdef WCHAR
05386
05387 # define WORDCHAR_P(d) \
05388 (iswalnum ((wint_t)((d) == end1 ? *string2 \
05389 : (d) == string2 - 1 ? *(end1 - 1) : *(d))) != 0 \
05390 || ((d) == end1 ? *string2 \
05391 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) == L'_')
05392 #else
05393 # define WORDCHAR_P(d) \
05394 (SYNTAX ((d) == end1 ? *string2 \
05395 : (d) == string2 - 1 ? *(end1 - 1) : *(d)) \
05396 == Sword)
05397 #endif
05398
05399
05400 #if 0
05401
05402
05403 #define AT_WORD_BOUNDARY(d) \
05404 (AT_STRINGS_BEG (d) || AT_STRINGS_END (d) \
05405 || WORDCHAR_P (d - 1) != WORDCHAR_P (d))
05406 #endif
05407
05408
05409 #ifdef MATCH_MAY_ALLOCATE
05410 # ifdef WCHAR
05411 # define FREE_VARIABLES() \
05412 do { \
05413 REGEX_FREE_STACK (fail_stack.stack); \
05414 FREE_VAR (regstart); \
05415 FREE_VAR (regend); \
05416 FREE_VAR (old_regstart); \
05417 FREE_VAR (old_regend); \
05418 FREE_VAR (best_regstart); \
05419 FREE_VAR (best_regend); \
05420 FREE_VAR (reg_info); \
05421 FREE_VAR (reg_dummy); \
05422 FREE_VAR (reg_info_dummy); \
05423 if (!cant_free_wcs_buf) \
05424 { \
05425 FREE_VAR (string1); \
05426 FREE_VAR (string2); \
05427 FREE_VAR (mbs_offset1); \
05428 FREE_VAR (mbs_offset2); \
05429 } \
05430 } while (0)
05431 # else
05432 # define FREE_VARIABLES() \
05433 do { \
05434 REGEX_FREE_STACK (fail_stack.stack); \
05435 FREE_VAR (regstart); \
05436 FREE_VAR (regend); \
05437 FREE_VAR (old_regstart); \
05438 FREE_VAR (old_regend); \
05439 FREE_VAR (best_regstart); \
05440 FREE_VAR (best_regend); \
05441 FREE_VAR (reg_info); \
05442 FREE_VAR (reg_dummy); \
05443 FREE_VAR (reg_info_dummy); \
05444 } while (0)
05445 # endif
05446 #else
05447 # ifdef WCHAR
05448 # define FREE_VARIABLES() \
05449 do { \
05450 if (!cant_free_wcs_buf) \
05451 { \
05452 FREE_VAR (string1); \
05453 FREE_VAR (string2); \
05454 FREE_VAR (mbs_offset1); \
05455 FREE_VAR (mbs_offset2); \
05456 } \
05457 } while (0)
05458 # else
05459 # define FREE_VARIABLES() ((void)0)
05460 # endif
05461 #endif
05462
05463
05464
05465
05466
05467
05468
05469
05470 #define NO_HIGHEST_ACTIVE_REG (1 << BYTEWIDTH)
05471 #define NO_LOWEST_ACTIVE_REG (NO_HIGHEST_ACTIVE_REG + 1)
05472
05473 #else
05474
05475
05476 #ifndef emacs
05477
05478
05479 int
05480 re_match (bufp, string, size, pos, regs)
05481 struct re_pattern_buffer *bufp;
05482 const char *string;
05483 int size, pos;
05484 struct re_registers *regs;
05485 {
05486 int result;
05487 # ifdef MBS_SUPPORT
05488 if (MB_CUR_MAX != 1)
05489 result = wcs_re_match_2_internal (bufp, NULL, 0, string, size,
05490 pos, regs, size,
05491 NULL, 0, NULL, 0, NULL, NULL);
05492 else
05493 # endif
05494 result = byte_re_match_2_internal (bufp, NULL, 0, string, size,
05495 pos, regs, size);
05496 # ifndef REGEX_MALLOC
05497 # ifdef C_ALLOCA
05498 alloca (0);
05499 # endif
05500 # endif
05501 return result;
05502 }
05503 # ifdef _LIBC
05504 weak_alias (__re_match, re_match)
05505 # endif
05506 #endif
05507
05508 #endif
05509
05510 #ifdef INSIDE_RECURSION
05511 static boolean PREFIX(group_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
05512 UCHAR_T *end,
05513 PREFIX(register_info_type) *reg_info));
05514 static boolean PREFIX(alt_match_null_string_p) _RE_ARGS ((UCHAR_T *p,
05515 UCHAR_T *end,
05516 PREFIX(register_info_type) *reg_info));
05517 static boolean PREFIX(common_op_match_null_string_p) _RE_ARGS ((UCHAR_T **p,
05518 UCHAR_T *end,
05519 PREFIX(register_info_type) *reg_info));
05520 static int PREFIX(bcmp_translate) _RE_ARGS ((const CHAR_T *s1, const CHAR_T *s2,
05521 int len, char *translate));
05522 #else
05523
05524
05525
05526
05527
05528
05529
05530
05531
05532
05533
05534
05535
05536
05537 int
05538 re_match_2 (bufp, string1, size1, string2, size2, pos, regs, stop)
05539 struct re_pattern_buffer *bufp;
05540 const char *string1, *string2;
05541 int size1, size2;
05542 int pos;
05543 struct re_registers *regs;
05544 int stop;
05545 {
05546 int result;
05547 # ifdef MBS_SUPPORT
05548 if (MB_CUR_MAX != 1)
05549 result = wcs_re_match_2_internal (bufp, string1, size1, string2, size2,
05550 pos, regs, stop,
05551 NULL, 0, NULL, 0, NULL, NULL);
05552 else
05553 # endif
05554 result = byte_re_match_2_internal (bufp, string1, size1, string2, size2,
05555 pos, regs, stop);
05556
05557 #ifndef REGEX_MALLOC
05558 # ifdef C_ALLOCA
05559 alloca (0);
05560 # endif
05561 #endif
05562 return result;
05563 }
05564 #ifdef _LIBC
05565 weak_alias (__re_match_2, re_match_2)
05566 #endif
05567
05568 #endif
05569
05570 #ifdef INSIDE_RECURSION
05571
05572 #ifdef WCHAR
05573 static int count_mbs_length PARAMS ((int *, int));
05574
05575
05576
05577
05578
05579
05580 static int
05581 count_mbs_length(offset_buffer, length)
05582 int *offset_buffer;
05583 int length;
05584 {
05585 int upper, lower;
05586
05587
05588 if (length < 0)
05589 return -1;
05590
05591 if (offset_buffer == NULL)
05592 return 0;
05593
05594
05595
05596 if (offset_buffer[length] == length)
05597 return length;
05598
05599
05600 upper = length;
05601 lower = 0;
05602
05603 while (true)
05604 {
05605 int middle = (lower + upper) / 2;
05606 if (middle == lower || middle == upper)
05607 break;
05608 if (offset_buffer[middle] > length)
05609 upper = middle;
05610 else if (offset_buffer[middle] < length)
05611 lower = middle;
05612 else
05613 return middle;
05614 }
05615
05616 return -1;
05617 }
05618 #endif
05619
05620
05621
05622 #ifdef WCHAR
05623 static int
05624 wcs_re_match_2_internal (bufp, cstring1, csize1, cstring2, csize2, pos,
05625 regs, stop, string1, size1, string2, size2,
05626 mbs_offset1, mbs_offset2)
05627 struct re_pattern_buffer *bufp;
05628 const char *cstring1, *cstring2;
05629 int csize1, csize2;
05630 int pos;
05631 struct re_registers *regs;
05632 int stop;
05633
05634
05635
05636 wchar_t *string1, *string2;
05637
05638 int size1, size2;
05639
05640 int *mbs_offset1, *mbs_offset2;
05641 #else
05642 static int
05643 byte_re_match_2_internal (bufp, string1, size1,string2, size2, pos,
05644 regs, stop)
05645 struct re_pattern_buffer *bufp;
05646 const char *string1, *string2;
05647 int size1, size2;
05648 int pos;
05649 struct re_registers *regs;
05650 int stop;
05651 #endif
05652 {
05653
05654 int mcnt;
05655 UCHAR_T *p1;
05656 #ifdef WCHAR
05657
05658 char *is_binary = NULL;
05659
05660 int cant_free_wcs_buf = 1;
05661 #endif
05662
05663
05664 const CHAR_T *end1, *end2;
05665
05666
05667
05668 const CHAR_T *end_match_1, *end_match_2;
05669
05670
05671 const CHAR_T *d, *dend;
05672
05673
05674 #ifdef WCHAR
05675 UCHAR_T *pattern, *p;
05676 register UCHAR_T *pend;
05677 #else
05678 UCHAR_T *p = bufp->buffer;
05679 register UCHAR_T *pend = p + bufp->used;
05680 #endif
05681
05682
05683
05684 UCHAR_T *just_past_start_mem = 0;
05685
05686
05687 RE_TRANSLATE_TYPE translate = bufp->translate;
05688
05689
05690
05691
05692
05693
05694
05695
05696
05697
05698 #ifdef MATCH_MAY_ALLOCATE
05699 PREFIX(fail_stack_type) fail_stack;
05700 #endif
05701 #ifdef DEBUG
05702 static unsigned failure_id;
05703 unsigned nfailure_points_pushed = 0, nfailure_points_popped = 0;
05704 #endif
05705
05706 #ifdef REL_ALLOC
05707
05708
05709 fail_stack_elt_t *failure_stack_ptr;
05710 #endif
05711
05712
05713
05714
05715 size_t num_regs = bufp->re_nsub + 1;
05716
05717
05718 active_reg_t lowest_active_reg = NO_LOWEST_ACTIVE_REG;
05719 active_reg_t highest_active_reg = NO_HIGHEST_ACTIVE_REG;
05720
05721
05722
05723
05724
05725
05726
05727
05728 #ifdef MATCH_MAY_ALLOCATE
05729 const CHAR_T **regstart, **regend;
05730 #endif
05731
05732
05733
05734
05735
05736
05737 #ifdef MATCH_MAY_ALLOCATE
05738 const CHAR_T **old_regstart, **old_regend;
05739 #endif
05740
05741
05742
05743
05744
05745
05746
05747 #ifdef MATCH_MAY_ALLOCATE
05748 PREFIX(register_info_type) *reg_info;
05749 #endif
05750
05751
05752
05753
05754
05755 unsigned best_regs_set = false;
05756 #ifdef MATCH_MAY_ALLOCATE
05757 const CHAR_T **best_regstart, **best_regend;
05758 #endif
05759
05760
05761
05762
05763
05764
05765
05766
05767
05768 const CHAR_T *match_end = NULL;
05769
05770
05771 int set_regs_matched_done = 0;
05772
05773
05774 #ifdef MATCH_MAY_ALLOCATE
05775 const CHAR_T **reg_dummy;
05776 PREFIX(register_info_type) *reg_info_dummy;
05777 #endif
05778
05779 #ifdef DEBUG
05780
05781 unsigned num_regs_pushed = 0;
05782 #endif
05783
05784 DEBUG_PRINT1 ("\n\nEntering re_match_2.\n");
05785
05786 INIT_FAIL_STACK ();
05787
05788 #ifdef MATCH_MAY_ALLOCATE
05789
05790
05791
05792
05793
05794 if (bufp->re_nsub)
05795 {
05796 regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
05797 regend = REGEX_TALLOC (num_regs, const CHAR_T *);
05798 old_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
05799 old_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
05800 best_regstart = REGEX_TALLOC (num_regs, const CHAR_T *);
05801 best_regend = REGEX_TALLOC (num_regs, const CHAR_T *);
05802 reg_info = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
05803 reg_dummy = REGEX_TALLOC (num_regs, const CHAR_T *);
05804 reg_info_dummy = REGEX_TALLOC (num_regs, PREFIX(register_info_type));
05805
05806 if (!(regstart && regend && old_regstart && old_regend && reg_info
05807 && best_regstart && best_regend && reg_dummy && reg_info_dummy))
05808 {
05809 FREE_VARIABLES ();
05810 return -2;
05811 }
05812 }
05813 else
05814 {
05815
05816
05817 regstart = regend = old_regstart = old_regend = best_regstart
05818 = best_regend = reg_dummy = NULL;
05819 reg_info = reg_info_dummy = (PREFIX(register_info_type) *) NULL;
05820 }
05821 #endif
05822
05823
05824 #ifdef WCHAR
05825 if (pos < 0 || pos > csize1 + csize2)
05826 #else
05827 if (pos < 0 || pos > size1 + size2)
05828 #endif
05829 {
05830 FREE_VARIABLES ();
05831 return -1;
05832 }
05833
05834 #ifdef WCHAR
05835
05836
05837 if (string1 == NULL && string2 == NULL)
05838 {
05839
05840
05841
05842 cant_free_wcs_buf = 0;
05843
05844 if (csize1 != 0)
05845 {
05846 string1 = REGEX_TALLOC (csize1 + 1, CHAR_T);
05847 mbs_offset1 = REGEX_TALLOC (csize1 + 1, int);
05848 is_binary = REGEX_TALLOC (csize1 + 1, char);
05849 if (!string1 || !mbs_offset1 || !is_binary)
05850 {
05851 FREE_VAR (string1);
05852 FREE_VAR (mbs_offset1);
05853 FREE_VAR (is_binary);
05854 return -2;
05855 }
05856 }
05857 if (csize2 != 0)
05858 {
05859 string2 = REGEX_TALLOC (csize2 + 1, CHAR_T);
05860 mbs_offset2 = REGEX_TALLOC (csize2 + 1, int);
05861 is_binary = REGEX_TALLOC (csize2 + 1, char);
05862 if (!string2 || !mbs_offset2 || !is_binary)
05863 {
05864 FREE_VAR (string1);
05865 FREE_VAR (mbs_offset1);
05866 FREE_VAR (string2);
05867 FREE_VAR (mbs_offset2);
05868 FREE_VAR (is_binary);
05869 return -2;
05870 }
05871 size2 = convert_mbs_to_wcs(string2, cstring2, csize2,
05872 mbs_offset2, is_binary);
05873 string2[size2] = L'\0';
05874 FREE_VAR (is_binary);
05875 }
05876 }
05877
05878
05879
05880 p = pattern = (CHAR_T*)bufp->buffer;
05881 pend = (CHAR_T*)(bufp->buffer + bufp->used);
05882
05883 #endif
05884
05885
05886
05887
05888 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
05889 {
05890 regstart[mcnt] = regend[mcnt]
05891 = old_regstart[mcnt] = old_regend[mcnt] = REG_UNSET_VALUE;
05892
05893 REG_MATCH_NULL_STRING_P (reg_info[mcnt]) = MATCH_NULL_UNSET_VALUE;
05894 IS_ACTIVE (reg_info[mcnt]) = 0;
05895 MATCHED_SOMETHING (reg_info[mcnt]) = 0;
05896 EVER_MATCHED_SOMETHING (reg_info[mcnt]) = 0;
05897 }
05898
05899
05900
05901 if (size2 == 0 && string1 != NULL)
05902 {
05903 string2 = string1;
05904 size2 = size1;
05905 string1 = 0;
05906 size1 = 0;
05907 #ifdef WCHAR
05908 mbs_offset2 = mbs_offset1;
05909 csize2 = csize1;
05910 mbs_offset1 = NULL;
05911 csize1 = 0;
05912 #endif
05913 }
05914 end1 = string1 + size1;
05915 end2 = string2 + size2;
05916
05917
05918 #ifdef WCHAR
05919 if (stop <= csize1)
05920 {
05921 mcnt = count_mbs_length(mbs_offset1, stop);
05922 end_match_1 = string1 + mcnt;
05923 end_match_2 = string2;
05924 }
05925 else
05926 {
05927 if (stop > csize1 + csize2)
05928 stop = csize1 + csize2;
05929 end_match_1 = end1;
05930 mcnt = count_mbs_length(mbs_offset2, stop-csize1);
05931 end_match_2 = string2 + mcnt;
05932 }
05933 if (mcnt < 0)
05934 {
05935 FREE_VARIABLES ();
05936 return -1;
05937 }
05938 #else
05939 if (stop <= size1)
05940 {
05941 end_match_1 = string1 + stop;
05942 end_match_2 = string2;
05943 }
05944 else
05945 {
05946 end_match_1 = end1;
05947 end_match_2 = string2 + stop - size1;
05948 }
05949 #endif
05950
05951
05952
05953
05954
05955
05956
05957 #ifdef WCHAR
05958 if (size1 > 0 && pos <= csize1)
05959 {
05960 mcnt = count_mbs_length(mbs_offset1, pos);
05961 d = string1 + mcnt;
05962 dend = end_match_1;
05963 }
05964 else
05965 {
05966 mcnt = count_mbs_length(mbs_offset2, pos-csize1);
05967 d = string2 + mcnt;
05968 dend = end_match_2;
05969 }
05970
05971 if (mcnt < 0)
05972 {
05973 FREE_VARIABLES ();
05974 return -1;
05975 }
05976 #else
05977 if (size1 > 0 && pos <= size1)
05978 {
05979 d = string1 + pos;
05980 dend = end_match_1;
05981 }
05982 else
05983 {
05984 d = string2 + pos - size1;
05985 dend = end_match_2;
05986 }
05987 #endif
05988
05989 DEBUG_PRINT1 ("The compiled pattern is:\n");
05990 DEBUG_PRINT_COMPILED_PATTERN (bufp, p, pend);
05991 DEBUG_PRINT1 ("The string to match is: `");
05992 DEBUG_PRINT_DOUBLE_STRING (d, string1, size1, string2, size2);
05993 DEBUG_PRINT1 ("'\n");
05994
05995
05996
05997
05998 for (;;)
05999 {
06000 #ifdef _LIBC
06001 DEBUG_PRINT2 ("\n%p: ", p);
06002 #else
06003 DEBUG_PRINT2 ("\n0x%x: ", p);
06004 #endif
06005
06006 if (p == pend)
06007 {
06008 DEBUG_PRINT1 ("end of pattern ... ");
06009
06010
06011
06012 if (d != end_match_2)
06013 {
06014
06015
06016 boolean same_str_p = (FIRST_STRING_P (match_end)
06017 == MATCHING_IN_FIRST_STRING);
06018
06019 boolean best_match_p;
06020
06021
06022
06023 if (same_str_p)
06024 best_match_p = d > match_end;
06025 else
06026 best_match_p = !MATCHING_IN_FIRST_STRING;
06027
06028 DEBUG_PRINT1 ("backtracking.\n");
06029
06030 if (!FAIL_STACK_EMPTY ())
06031 {
06032
06033
06034 if (!best_regs_set || best_match_p)
06035 {
06036 best_regs_set = true;
06037 match_end = d;
06038
06039 DEBUG_PRINT1 ("\nSAVING match as best so far.\n");
06040
06041 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
06042 {
06043 best_regstart[mcnt] = regstart[mcnt];
06044 best_regend[mcnt] = regend[mcnt];
06045 }
06046 }
06047 goto fail;
06048 }
06049
06050
06051
06052
06053 else if (best_regs_set && !best_match_p)
06054 {
06055 restore_best_regs:
06056
06057
06058
06059
06060
06061 DEBUG_PRINT1 ("Restoring best registers.\n");
06062
06063 d = match_end;
06064 dend = ((d >= string1 && d <= end1)
06065 ? end_match_1 : end_match_2);
06066
06067 for (mcnt = 1; (unsigned) mcnt < num_regs; mcnt++)
06068 {
06069 regstart[mcnt] = best_regstart[mcnt];
06070 regend[mcnt] = best_regend[mcnt];
06071 }
06072 }
06073 }
06074
06075 succeed_label:
06076 DEBUG_PRINT1 ("Accepting match.\n");
06077
06078 if (regs && !bufp->no_sub)
06079 {
06080
06081 if (bufp->regs_allocated == REGS_UNALLOCATED)
06082 {
06083
06084
06085 regs->num_regs = MAX (RE_NREGS, num_regs + 1);
06086 regs->start = TALLOC (regs->num_regs, regoff_t);
06087 regs->end = TALLOC (regs->num_regs, regoff_t);
06088 if (regs->start == NULL || regs->end == NULL)
06089 {
06090 FREE_VARIABLES ();
06091 return -2;
06092 }
06093 bufp->regs_allocated = REGS_REALLOCATE;
06094 }
06095 else if (bufp->regs_allocated == REGS_REALLOCATE)
06096 {
06097
06098
06099 if (regs->num_regs < num_regs + 1)
06100 {
06101 regs->num_regs = num_regs + 1;
06102 RETALLOC (regs->start, regs->num_regs, regoff_t);
06103 RETALLOC (regs->end, regs->num_regs, regoff_t);
06104 if (regs->start == NULL || regs->end == NULL)
06105 {
06106 FREE_VARIABLES ();
06107 return -2;
06108 }
06109 }
06110 }
06111 else
06112 {
06113
06114
06115 assert (bufp->regs_allocated == REGS_FIXED);
06116 }
06117
06118
06119
06120
06121 if (regs->num_regs > 0)
06122 {
06123 regs->start[0] = pos;
06124 #ifdef WCHAR
06125 if (MATCHING_IN_FIRST_STRING)
06126 regs->end[0] = mbs_offset1 != NULL ?
06127 mbs_offset1[d-string1] : 0;
06128 else
06129 regs->end[0] = csize1 + (mbs_offset2 != NULL ?
06130 mbs_offset2[d-string2] : 0);
06131 #else
06132 regs->end[0] = (MATCHING_IN_FIRST_STRING
06133 ? ((regoff_t) (d - string1))
06134 : ((regoff_t) (d - string2 + size1)));
06135 #endif
06136 }
06137
06138
06139
06140 for (mcnt = 1; (unsigned) mcnt < MIN (num_regs, regs->num_regs);
06141 mcnt++)
06142 {
06143 if (REG_UNSET (regstart[mcnt]) || REG_UNSET (regend[mcnt]))
06144 regs->start[mcnt] = regs->end[mcnt] = -1;
06145 else
06146 {
06147 regs->start[mcnt]
06148 = (regoff_t) POINTER_TO_OFFSET (regstart[mcnt]);
06149 regs->end[mcnt]
06150 = (regoff_t) POINTER_TO_OFFSET (regend[mcnt]);
06151 }
06152 }
06153
06154
06155
06156
06157
06158
06159 for (mcnt = num_regs; (unsigned) mcnt < regs->num_regs; mcnt++)
06160 regs->start[mcnt] = regs->end[mcnt] = -1;
06161 }
06162
06163 DEBUG_PRINT4 ("%u failure points pushed, %u popped (%u remain).\n",
06164 nfailure_points_pushed, nfailure_points_popped,
06165 nfailure_points_pushed - nfailure_points_popped);
06166 DEBUG_PRINT2 ("%u registers pushed.\n", num_regs_pushed);
06167
06168 #ifdef WCHAR
06169 if (MATCHING_IN_FIRST_STRING)
06170 mcnt = mbs_offset1 != NULL ? mbs_offset1[d-string1] : 0;
06171 else
06172 mcnt = (mbs_offset2 != NULL ? mbs_offset2[d-string2] : 0) +
06173 csize1;
06174 mcnt -= pos;
06175 #else
06176 mcnt = d - pos - (MATCHING_IN_FIRST_STRING
06177 ? string1
06178 : string2 - size1);
06179 #endif
06180
06181 DEBUG_PRINT2 ("Returning %d from re_match_2.\n", mcnt);
06182
06183 FREE_VARIABLES ();
06184 return mcnt;
06185 }
06186
06187
06188 switch (SWITCH_ENUM_CAST ((re_opcode_t) *p++))
06189 {
06190
06191
06192 case no_op:
06193 DEBUG_PRINT1 ("EXECUTING no_op.\n");
06194 break;
06195
06196 case succeed:
06197 DEBUG_PRINT1 ("EXECUTING succeed.\n");
06198 goto succeed_label;
06199
06200
06201
06202
06203 case exactn:
06204 #ifdef MBS_SUPPORT
06205 case exactn_bin:
06206 #endif
06207 mcnt = *p++;
06208 DEBUG_PRINT2 ("EXECUTING exactn %d.\n", mcnt);
06209
06210
06211
06212 if (translate)
06213 {
06214 do
06215 {
06216 PREFETCH ();
06217 #ifdef WCHAR
06218 if (*d <= 0xff)
06219 {
06220 if ((UCHAR_T) translate[(unsigned char) *d++]
06221 != (UCHAR_T) *p++)
06222 goto fail;
06223 }
06224 else
06225 {
06226 if (*d++ != (CHAR_T) *p++)
06227 goto fail;
06228 }
06229 #else
06230 if ((UCHAR_T) translate[(unsigned char) *d++]
06231 != (UCHAR_T) *p++)
06232 goto fail;
06233 #endif
06234 }
06235 while (--mcnt);
06236 }
06237 else
06238 {
06239 do
06240 {
06241 PREFETCH ();
06242 if (*d++ != (CHAR_T) *p++) goto fail;
06243 }
06244 while (--mcnt);
06245 }
06246 SET_REGS_MATCHED ();
06247 break;
06248
06249
06250
06251 case anychar:
06252 DEBUG_PRINT1 ("EXECUTING anychar.\n");
06253
06254 PREFETCH ();
06255
06256 if ((!(bufp->syntax & RE_DOT_NEWLINE) && TRANSLATE (*d) == '\n')
06257 || (bufp->syntax & RE_DOT_NOT_NULL && TRANSLATE (*d) == '\000'))
06258 goto fail;
06259
06260 SET_REGS_MATCHED ();
06261 DEBUG_PRINT2 (" Matched `%ld'.\n", (long int) *d);
06262 d++;
06263 break;
06264
06265
06266 case charset:
06267 case charset_not:
06268 {
06269 register UCHAR_T c;
06270 #ifdef WCHAR
06271 unsigned int i, char_class_length, coll_symbol_length,
06272 equiv_class_length, ranges_length, chars_length, length;
06273 CHAR_T *workp, *workp2, *charset_top;
06274 #define WORK_BUFFER_SIZE 128
06275 CHAR_T str_buf[WORK_BUFFER_SIZE];
06276 # ifdef _LIBC
06277 uint32_t nrules;
06278 # endif
06279 #endif
06280 boolean not = (re_opcode_t) *(p - 1) == charset_not;
06281
06282 DEBUG_PRINT2 ("EXECUTING charset%s.\n", not ? "_not" : "");
06283 PREFETCH ();
06284 c = TRANSLATE (*d);
06285 #ifdef WCHAR
06286 # ifdef _LIBC
06287 nrules = _NL_CURRENT_WORD (LC_COLLATE, _NL_COLLATE_NRULES);
06288 # endif
06289 charset_top = p - 1;
06290 char_class_length = *p++;
06291 coll_symbol_length = *p++;
06292 equiv_class_length = *p++;
06293 ranges_length = *p++;
06294 chars_length = *p++;
06295
06296
06297
06298
06299
06300 workp = p;
06301
06302 p += char_class_length + coll_symbol_length+ equiv_class_length +
06303 2*ranges_length + chars_length;
06304
06305
06306 for (i = 0; i < char_class_length ; i += CHAR_CLASS_SIZE)
06307 {
06308 wctype_t wctype;
06309 uintptr_t alignedp = ((uintptr_t)workp
06310 + __alignof__(wctype_t) - 1)
06311 & ~(uintptr_t)(__alignof__(wctype_t) - 1);
06312 wctype = *((wctype_t*)alignedp);
06313 workp += CHAR_CLASS_SIZE;
06314 # ifdef _LIBC
06315 if (__iswctype((wint_t)c, wctype))
06316 goto char_set_matched;
06317 # else
06318 if (iswctype((wint_t)c, wctype))
06319 goto char_set_matched;
06320 # endif
06321 }
06322
06323
06324 # ifdef _LIBC
06325 if (nrules != 0)
06326 {
06327 const unsigned char *extra = (const unsigned char *)
06328 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_SYMB_EXTRAMB);
06329
06330 for (workp2 = workp + coll_symbol_length ; workp < workp2 ;
06331 workp++)
06332 {
06333 int32_t *wextra;
06334 wextra = (int32_t*)(extra + *workp++);
06335 for (i = 0; i < *wextra; ++i)
06336 if (TRANSLATE(d[i]) != wextra[1 + i])
06337 break;
06338
06339 if (i == *wextra)
06340 {
06341
06342
06343 d += i - 1;
06344 goto char_set_matched;
06345 }
06346 }
06347 }
06348 else
06349 # endif
06350
06351
06352 {
06353 for (workp2 = workp + coll_symbol_length ; workp < workp2 ;)
06354 {
06355 const CHAR_T *backup_d = d, *backup_dend = dend;
06356 # ifdef _LIBC
06357 length = __wcslen (workp);
06358 # else
06359 length = wcslen (workp);
06360 # endif
06361
06362
06363
06364
06365 # ifdef _LIBC
06366 if (__wcscoll (workp, d) > 0)
06367 # else
06368 if (wcscoll (workp, d) > 0)
06369 # endif
06370 {
06371 workp += length + 1;
06372 continue;
06373 }
06374
06375
06376
06377
06378
06379 for (i = 0 ; i < WORK_BUFFER_SIZE-1 ; i++, d++)
06380 {
06381 int match;
06382 if (d == dend)
06383 {
06384 if (dend == end_match_2)
06385 break;
06386 d = string2;
06387 dend = end_match_2;
06388 }
06389
06390
06391 str_buf[i] = TRANSLATE(*d);
06392 str_buf[i+1] = '\0';
06393
06394 # ifdef _LIBC
06395 match = __wcscoll (workp, str_buf);
06396 # else
06397 match = wcscoll (workp, str_buf);
06398 # endif
06399 if (match == 0)
06400 goto char_set_matched;
06401
06402 if (match < 0)
06403
06404
06405
06406 break;
06407
06408
06409
06410
06411 }
06412
06413 d = backup_d;
06414 dend = backup_dend;
06415 workp += length + 1;
06416 }
06417 }
06418
06419 # ifdef _LIBC
06420 if (nrules != 0)
06421 {
06422 const CHAR_T *backup_d = d, *backup_dend = dend;
06423
06424
06425 const int32_t *table;
06426 const int32_t *weights;
06427 const int32_t *extra;
06428 const int32_t *indirect;
06429 int32_t idx, idx2;
06430 wint_t *cp;
06431 size_t len;
06432
06433
06434 # include <locale/weightwc.h>
06435
06436 table = (const int32_t *)
06437 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_TABLEWC);
06438 weights = (const wint_t *)
06439 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_WEIGHTWC);
06440 extra = (const wint_t *)
06441 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_EXTRAWC);
06442 indirect = (const int32_t *)
06443 _NL_CURRENT (LC_COLLATE, _NL_COLLATE_INDIRECTWC);
06444
06445
06446
06447 idx2 = 0;
06448
06449 for (i = 0 ; idx2 == 0 && i < WORK_BUFFER_SIZE - 1; i++)
06450 {
06451 cp = (wint_t*)str_buf;
06452 if (d == dend)
06453 {
06454 if (dend == end_match_2)
06455 break;
06456 d = string2;
06457 dend = end_match_2;
06458 }
06459 str_buf[i] = TRANSLATE(*(d+i));
06460 str_buf[i+1] = '\0';
06461 idx2 = findidx ((const wint_t**)&cp);
06462 }
06463
06464
06465
06466 d = backup_d + ((wchar_t*)cp - (wchar_t*)str_buf - 1);
06467 if (d >= dend)
06468 {
06469 if (dend == end_match_2)
06470 d = dend;
06471 else
06472 {
06473 d = string2;
06474 dend = end_match_2;
06475 }
06476 }
06477
06478 len = weights[idx2];
06479
06480 for (workp2 = workp + equiv_class_length ; workp < workp2 ;
06481 workp++)
06482 {
06483 idx = (int32_t)*workp;
06484
06485
06486 if (idx2 != 0 && len == weights[idx])
06487 {
06488 int cnt = 0;
06489 while (cnt < len && (weights[idx + 1 + cnt]
06490 == weights[idx2 + 1 + cnt]))
06491 ++cnt;
06492
06493 if (cnt == len)
06494 goto char_set_matched;
06495 }
06496 }
06497
06498 d = backup_d;
06499 dend = backup_dend;
06500 }
06501 else
06502 # endif
06503
06504
06505 {
06506 for (workp2 = workp + equiv_class_length ; workp < workp2 ;)
06507 {
06508 const CHAR_T *backup_d = d, *backup_dend = dend;
06509 # ifdef _LIBC
06510 length = __wcslen (workp);
06511 # else
06512 length = wcslen (workp);
06513 # endif
06514
06515
06516
06517
06518 # ifdef _LIBC
06519 if (__wcscoll (workp, d) > 0)
06520 # else
06521 if (wcscoll (workp, d) > 0)
06522 # endif
06523 {
06524 workp += length + 1;
06525 break;
06526 }
06527
06528
06529
06530
06531
06532 for (i = 0 ; i < WORK_BUFFER_SIZE - 1 ; i++, d++)
06533 {
06534 int match;
06535 if (d == dend)
06536 {
06537 if (dend == end_match_2)
06538 break;
06539 d = string2;
06540 dend = end_match_2;
06541 }
06542
06543
06544 str_buf[i] = TRANSLATE(*d);
06545 str_buf[i+1] = '\0';
06546
06547 # ifdef _LIBC
06548 match = __wcscoll (workp, str_buf);
06549 # else
06550 match = wcscoll (workp, str_buf);
06551 # endif
06552
06553 if (match == 0)
06554 goto char_set_matched;
06555
06556 if (match < 0)
06557
06558
06559
06560 break;
06561
06562
06563
06564
06565 }
06566
06567 d = backup_d;
06568 dend = backup_dend;
06569 workp += length + 1;
06570 }
06571 }
06572
06573
06574 # ifdef _LIBC
06575 if (nrules != 0)
06576 {
06577 uint32_t collseqval;
06578 const char *collseq = (const char *)
06579 _NL_CURRENT(LC_COLLATE, _NL_COLLATE_COLLSEQWC);
06580
06581 collseqval = collseq_table_lookup (collseq, c);
06582
06583 for (; workp < p - chars_length ;)
06584 {
06585 uint32_t start_val, end_val;
06586
06587
06588
06589 start_val = (uint32_t) *workp++;
06590 end_val = (uint32_t) *workp++;
06591
06592 if (start_val <= collseqval && collseqval <= end_val)
06593 goto char_set_matched;
06594 }
06595 }
06596 else
06597 # endif
06598 {
06599
06600
06601 str_buf[1] = 0;
06602 str_buf[2] = c;
06603 str_buf[3] = 0;
06604 str_buf[5] = 0;
06605 for (; workp < p - chars_length ;)
06606 {
06607 wchar_t *range_start_char, *range_end_char;
06608
06609
06610
06611
06612
06613
06614
06615
06616 if (*workp < 0)
06617 range_start_char = charset_top - (*workp++);
06618 else
06619 {
06620 str_buf[0] = *workp++;
06621 range_start_char = str_buf;
06622 }
06623
06624
06625 if (*workp < 0)
06626 range_end_char = charset_top - (*workp++);
06627 else
06628 {
06629 str_buf[4] = *workp++;
06630 range_end_char = str_buf + 4;
06631 }
06632
06633 # ifdef _LIBC
06634 if (__wcscoll (range_start_char, str_buf+2) <= 0
06635 && __wcscoll (str_buf+2, range_end_char) <= 0)
06636 # else
06637 if (wcscoll (range_start_char, str_buf+2) <= 0
06638 && wcscoll (str_buf+2, range_end_char) <= 0)
06639 # endif
06640 goto char_set_matched;
06641 }
06642 }
06643
06644
06645 for (; workp < p ; workp++)
06646 if (c == *workp)
06647 goto char_set_matched;
06648
06649 not = !not;
06650
06651 char_set_matched:
06652 if (not) goto fail;
06653 #else
06654
06655
06656 if (c < (unsigned) (*p * BYTEWIDTH)
06657 && p[1 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
06658 not = !not;
06659
06660 p += 1 + *p;
06661
06662 if (!not) goto fail;
06663 #undef WORK_BUFFER_SIZE
06664 #endif
06665 SET_REGS_MATCHED ();
06666 d++;
06667 break;
06668 }
06669
06670
06671
06672
06673
06674
06675
06676 case start_memory:
06677 DEBUG_PRINT3 ("EXECUTING start_memory %ld (%ld):\n",
06678 (long int) *p, (long int) p[1]);
06679
06680
06681 p1 = p;
06682
06683 if (REG_MATCH_NULL_STRING_P (reg_info[*p]) == MATCH_NULL_UNSET_VALUE)
06684 REG_MATCH_NULL_STRING_P (reg_info[*p])
06685 = PREFIX(group_match_null_string_p) (&p1, pend, reg_info);
06686
06687
06688
06689
06690
06691
06692 old_regstart[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
06693 ? REG_UNSET (regstart[*p]) ? d : regstart[*p]
06694 : regstart[*p];
06695 DEBUG_PRINT2 (" old_regstart: %d\n",
06696 POINTER_TO_OFFSET (old_regstart[*p]));
06697
06698 regstart[*p] = d;
06699 DEBUG_PRINT2 (" regstart: %d\n", POINTER_TO_OFFSET (regstart[*p]));
06700
06701 IS_ACTIVE (reg_info[*p]) = 1;
06702 MATCHED_SOMETHING (reg_info[*p]) = 0;
06703
06704
06705 set_regs_matched_done = 0;
06706
06707
06708 highest_active_reg = *p;
06709
06710
06711
06712 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
06713 lowest_active_reg = *p;
06714
06715
06716 p += 2;
06717 just_past_start_mem = p;
06718
06719 break;
06720
06721
06722
06723
06724
06725 case stop_memory:
06726 DEBUG_PRINT3 ("EXECUTING stop_memory %ld (%ld):\n",
06727 (long int) *p, (long int) p[1]);
06728
06729
06730
06731
06732
06733
06734 old_regend[*p] = REG_MATCH_NULL_STRING_P (reg_info[*p])
06735 ? REG_UNSET (regend[*p]) ? d : regend[*p]
06736 : regend[*p];
06737 DEBUG_PRINT2 (" old_regend: %d\n",
06738 POINTER_TO_OFFSET (old_regend[*p]));
06739
06740 regend[*p] = d;
06741 DEBUG_PRINT2 (" regend: %d\n", POINTER_TO_OFFSET (regend[*p]));
06742
06743
06744 IS_ACTIVE (reg_info[*p]) = 0;
06745
06746
06747 set_regs_matched_done = 0;
06748
06749
06750
06751 if (lowest_active_reg == highest_active_reg)
06752 {
06753 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
06754 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
06755 }
06756 else
06757 {
06758
06759
06760
06761 UCHAR_T r = *p - 1;
06762 while (r > 0 && !IS_ACTIVE (reg_info[r]))
06763 r--;
06764
06765
06766
06767
06768
06769
06770
06771
06772 if (r == 0)
06773 {
06774 lowest_active_reg = NO_LOWEST_ACTIVE_REG;
06775 highest_active_reg = NO_HIGHEST_ACTIVE_REG;
06776 }
06777 else
06778 highest_active_reg = r;
06779 }
06780
06781
06782
06783
06784
06785
06786 if ((!MATCHED_SOMETHING (reg_info[*p])
06787 || just_past_start_mem == p - 1)
06788 && (p + 2) < pend)
06789 {
06790 boolean is_a_jump_n = false;
06791
06792 p1 = p + 2;
06793 mcnt = 0;
06794 switch ((re_opcode_t) *p1++)
06795 {
06796 case jump_n:
06797 is_a_jump_n = true;
06798 case pop_failure_jump:
06799 case maybe_pop_jump:
06800 case jump:
06801 case dummy_failure_jump:
06802 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
06803 if (is_a_jump_n)
06804 p1 += OFFSET_ADDRESS_SIZE;
06805 break;
06806
06807 default:
06808 ;
06809 }
06810 p1 += mcnt;
06811
06812
06813
06814
06815
06816
06817 if (mcnt < 0 && (re_opcode_t) *p1 == on_failure_jump
06818 && (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == start_memory
06819 && p1[2+OFFSET_ADDRESS_SIZE] == *p)
06820 {
06821
06822
06823
06824
06825
06826
06827
06828
06829
06830
06831 if (EVER_MATCHED_SOMETHING (reg_info[*p]))
06832 {
06833 unsigned r;
06834
06835 EVER_MATCHED_SOMETHING (reg_info[*p]) = 0;
06836
06837
06838 for (r = *p; r < (unsigned) *p + (unsigned) *(p + 1);
06839 r++)
06840 {
06841 regstart[r] = old_regstart[r];
06842
06843
06844 if (old_regend[r] >= regstart[r])
06845 regend[r] = old_regend[r];
06846 }
06847 }
06848 p1++;
06849 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
06850 PUSH_FAILURE_POINT (p1 + mcnt, d, -2);
06851
06852 goto fail;
06853 }
06854 }
06855
06856
06857 p += 2;
06858 break;
06859
06860
06861
06862
06863 case duplicate:
06864 {
06865 register const CHAR_T *d2, *dend2;
06866 int regno = *p++;
06867 DEBUG_PRINT2 ("EXECUTING duplicate %d.\n", regno);
06868
06869
06870 if (REG_UNSET (regstart[regno]) || REG_UNSET (regend[regno]))
06871 goto fail;
06872
06873
06874 d2 = regstart[regno];
06875
06876
06877
06878
06879
06880
06881 dend2 = ((FIRST_STRING_P (regstart[regno])
06882 == FIRST_STRING_P (regend[regno]))
06883 ? regend[regno] : end_match_1);
06884 for (;;)
06885 {
06886
06887
06888 while (d2 == dend2)
06889 {
06890 if (dend2 == end_match_2) break;
06891 if (dend2 == regend[regno]) break;
06892
06893
06894 d2 = string2;
06895 dend2 = regend[regno];
06896 }
06897
06898 if (d2 == dend2) break;
06899
06900
06901 PREFETCH ();
06902
06903
06904 mcnt = dend - d;
06905
06906
06907
06908 if (mcnt > dend2 - d2)
06909 mcnt = dend2 - d2;
06910
06911
06912
06913 if (translate
06914 ? PREFIX(bcmp_translate) (d, d2, mcnt, translate)
06915 : memcmp (d, d2, mcnt*sizeof(UCHAR_T)))
06916 goto fail;
06917 d += mcnt, d2 += mcnt;
06918
06919
06920 SET_REGS_MATCHED ();
06921 }
06922 }
06923 break;
06924
06925
06926
06927
06928
06929 case begline:
06930 DEBUG_PRINT1 ("EXECUTING begline.\n");
06931
06932 if (AT_STRINGS_BEG (d))
06933 {
06934 if (!bufp->not_bol) break;
06935 }
06936 else if (d[-1] == '\n' && bufp->newline_anchor)
06937 {
06938 break;
06939 }
06940
06941 goto fail;
06942
06943
06944
06945 case endline:
06946 DEBUG_PRINT1 ("EXECUTING endline.\n");
06947
06948 if (AT_STRINGS_END (d))
06949 {
06950 if (!bufp->not_eol) break;
06951 }
06952
06953
06954 else if ((d == end1 ? *string2 : *d) == '\n'
06955 && bufp->newline_anchor)
06956 {
06957 break;
06958 }
06959 goto fail;
06960
06961
06962
06963 case begbuf:
06964 DEBUG_PRINT1 ("EXECUTING begbuf.\n");
06965 if (AT_STRINGS_BEG (d))
06966 break;
06967 goto fail;
06968
06969
06970
06971 case endbuf:
06972 DEBUG_PRINT1 ("EXECUTING endbuf.\n");
06973 if (AT_STRINGS_END (d))
06974 break;
06975 goto fail;
06976
06977
06978
06979
06980
06981
06982
06983
06984
06985
06986
06987
06988
06989
06990
06991
06992
06993
06994 case on_failure_keep_string_jump:
06995 DEBUG_PRINT1 ("EXECUTING on_failure_keep_string_jump");
06996
06997 EXTRACT_NUMBER_AND_INCR (mcnt, p);
06998 #ifdef _LIBC
06999 DEBUG_PRINT3 (" %d (to %p):\n", mcnt, p + mcnt);
07000 #else
07001 DEBUG_PRINT3 (" %d (to 0x%x):\n", mcnt, p + mcnt);
07002 #endif
07003
07004 PUSH_FAILURE_POINT (p + mcnt, NULL, -2);
07005 break;
07006
07007
07008
07009
07010
07011
07012
07013
07014
07015
07016
07017
07018
07019
07020 case on_failure_jump:
07021 on_failure:
07022 DEBUG_PRINT1 ("EXECUTING on_failure_jump");
07023
07024 EXTRACT_NUMBER_AND_INCR (mcnt, p);
07025 #ifdef _LIBC
07026 DEBUG_PRINT3 (" %d (to %p)", mcnt, p + mcnt);
07027 #else
07028 DEBUG_PRINT3 (" %d (to 0x%x)", mcnt, p + mcnt);
07029 #endif
07030
07031
07032
07033
07034
07035
07036
07037
07038
07039
07040 p1 = p;
07041
07042
07043
07044
07045
07046 while (p1 < pend && (re_opcode_t) *p1 == no_op)
07047 p1++;
07048
07049 if (p1 < pend && (re_opcode_t) *p1 == start_memory)
07050 {
07051
07052
07053
07054
07055 highest_active_reg = *(p1 + 1) + *(p1 + 2);
07056 if (lowest_active_reg == NO_LOWEST_ACTIVE_REG)
07057 lowest_active_reg = *(p1 + 1);
07058 }
07059
07060 DEBUG_PRINT1 (":\n");
07061 PUSH_FAILURE_POINT (p + mcnt, d, -2);
07062 break;
07063
07064
07065
07066
07067 case maybe_pop_jump:
07068 EXTRACT_NUMBER_AND_INCR (mcnt, p);
07069 DEBUG_PRINT2 ("EXECUTING maybe_pop_jump %d.\n", mcnt);
07070 {
07071 register UCHAR_T *p2 = p;
07072
07073
07074
07075
07076
07077
07078
07079
07080
07081
07082
07083
07084
07085
07086
07087
07088
07089
07090 while (1)
07091 {
07092 if (p2 + 2 < pend
07093 && ((re_opcode_t) *p2 == stop_memory
07094 || (re_opcode_t) *p2 == start_memory))
07095 p2 += 3;
07096 else if (p2 + 2 + 2 * OFFSET_ADDRESS_SIZE < pend
07097 && (re_opcode_t) *p2 == dummy_failure_jump)
07098 p2 += 2 + 2 * OFFSET_ADDRESS_SIZE;
07099 else
07100 break;
07101 }
07102
07103 p1 = p + mcnt;
07104
07105
07106
07107
07108
07109 if (p2 == pend)
07110 {
07111
07112
07113
07114 p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
07115 pop_failure_jump;
07116 DEBUG_PRINT1
07117 (" End of pattern: change to `pop_failure_jump'.\n");
07118 }
07119
07120 else if ((re_opcode_t) *p2 == exactn
07121 #ifdef MBS_SUPPORT
07122 || (re_opcode_t) *p2 == exactn_bin
07123 #endif
07124 || (bufp->newline_anchor && (re_opcode_t) *p2 == endline))
07125 {
07126 register UCHAR_T c
07127 = *p2 == (UCHAR_T) endline ? '\n' : p2[2];
07128
07129 if (((re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn
07130 #ifdef MBS_SUPPORT
07131 || (re_opcode_t) p1[1+OFFSET_ADDRESS_SIZE] == exactn_bin
07132 #endif
07133 ) && p1[3+OFFSET_ADDRESS_SIZE] != c)
07134 {
07135 p[-(1+OFFSET_ADDRESS_SIZE)] = (UCHAR_T)
07136 pop_failure_jump;
07137 #ifdef WCHAR
07138 DEBUG_PRINT3 (" %C != %C => pop_failure_jump.\n",
07139 (wint_t) c,
07140 (wint_t) p1[3+OFFSET_ADDRESS_SIZE]);
07141 #else
07142 DEBUG_PRINT3 (" %c != %c => pop_failure_jump.\n",
07143 (char) c,
07144 (char) p1[3+OFFSET_ADDRESS_SIZE]);
07145 #endif
07146 }
07147
07148 #ifndef WCHAR
07149 else if ((re_opcode_t) p1[3] == charset
07150 || (re_opcode_t) p1[3] == charset_not)
07151 {
07152 int not = (re_opcode_t) p1[3] == charset_not;
07153
07154 if (c < (unsigned) (p1[4] * BYTEWIDTH)
07155 && p1[5 + c / BYTEWIDTH] & (1 << (c % BYTEWIDTH)))
07156 not = !not;
07157
07158
07159
07160 if (!not)
07161 {
07162 p[-3] = (unsigned char) pop_failure_jump;
07163 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
07164 }
07165 }
07166 #endif
07167 }
07168 #ifndef WCHAR
07169 else if ((re_opcode_t) *p2 == charset)
07170 {
07171
07172
07173 if ((re_opcode_t) p1[3] == exactn
07174 && ! ((int) p2[1] * BYTEWIDTH > (int) p1[5]
07175 && (p2[2 + p1[5] / BYTEWIDTH]
07176 & (1 << (p1[5] % BYTEWIDTH)))))
07177 {
07178 p[-3] = (unsigned char) pop_failure_jump;
07179 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
07180 }
07181
07182 else if ((re_opcode_t) p1[3] == charset_not)
07183 {
07184 int idx;
07185
07186
07187 for (idx = 0; idx < (int) p2[1]; idx++)
07188 if (! (p2[2 + idx] == 0
07189 || (idx < (int) p1[4]
07190 && ((p2[2 + idx] & ~ p1[5 + idx]) == 0))))
07191 break;
07192
07193 if (idx == p2[1])
07194 {
07195 p[-3] = (unsigned char) pop_failure_jump;
07196 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
07197 }
07198 }
07199 else if ((re_opcode_t) p1[3] == charset)
07200 {
07201 int idx;
07202
07203
07204 for (idx = 0;
07205 idx < (int) p2[1] && idx < (int) p1[4];
07206 idx++)
07207 if ((p2[2 + idx] & p1[5 + idx]) != 0)
07208 break;
07209
07210 if (idx == p2[1] || idx == p1[4])
07211 {
07212 p[-3] = (unsigned char) pop_failure_jump;
07213 DEBUG_PRINT1 (" No match => pop_failure_jump.\n");
07214 }
07215 }
07216 }
07217 #endif
07218 }
07219 p -= OFFSET_ADDRESS_SIZE;
07220 if ((re_opcode_t) p[-1] != pop_failure_jump)
07221 {
07222 p[-1] = (UCHAR_T) jump;
07223 DEBUG_PRINT1 (" Match => jump.\n");
07224 goto unconditional_jump;
07225 }
07226
07227
07228
07229
07230
07231
07232
07233
07234
07235 case pop_failure_jump:
07236 {
07237
07238
07239
07240
07241
07242 active_reg_t dummy_low_reg, dummy_high_reg;
07243 UCHAR_T *pdummy = NULL;
07244 const CHAR_T *sdummy = NULL;
07245
07246 DEBUG_PRINT1 ("EXECUTING pop_failure_jump.\n");
07247 POP_FAILURE_POINT (sdummy, pdummy,
07248 dummy_low_reg, dummy_high_reg,
07249 reg_dummy, reg_dummy, reg_info_dummy);
07250 }
07251
07252
07253 unconditional_jump:
07254 #ifdef _LIBC
07255 DEBUG_PRINT2 ("\n%p: ", p);
07256 #else
07257 DEBUG_PRINT2 ("\n0x%x: ", p);
07258 #endif
07259
07260
07261
07262 case jump:
07263 EXTRACT_NUMBER_AND_INCR (mcnt, p);
07264 DEBUG_PRINT2 ("EXECUTING jump %d ", mcnt);
07265 p += mcnt;
07266 #ifdef _LIBC
07267 DEBUG_PRINT2 ("(to %p).\n", p);
07268 #else
07269 DEBUG_PRINT2 ("(to 0x%x).\n", p);
07270 #endif
07271 break;
07272
07273
07274
07275
07276 case jump_past_alt:
07277 DEBUG_PRINT1 ("EXECUTING jump_past_alt.\n");
07278 goto unconditional_jump;
07279
07280
07281
07282
07283
07284
07285
07286 case dummy_failure_jump:
07287 DEBUG_PRINT1 ("EXECUTING dummy_failure_jump.\n");
07288
07289
07290 PUSH_FAILURE_POINT (NULL, NULL, -2);
07291 goto unconditional_jump;
07292
07293
07294
07295
07296
07297
07298
07299 case push_dummy_failure:
07300 DEBUG_PRINT1 ("EXECUTING push_dummy_failure.\n");
07301
07302
07303 PUSH_FAILURE_POINT (NULL, NULL, -2);
07304 break;
07305
07306
07307
07308 case succeed_n:
07309 EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
07310 DEBUG_PRINT2 ("EXECUTING succeed_n %d.\n", mcnt);
07311
07312 assert (mcnt >= 0);
07313
07314 if (mcnt > 0)
07315 {
07316 mcnt--;
07317 p += OFFSET_ADDRESS_SIZE;
07318 STORE_NUMBER_AND_INCR (p, mcnt);
07319 #ifdef _LIBC
07320 DEBUG_PRINT3 (" Setting %p to %d.\n", p - OFFSET_ADDRESS_SIZE
07321 , mcnt);
07322 #else
07323 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p - OFFSET_ADDRESS_SIZE
07324 , mcnt);
07325 #endif
07326 }
07327 else if (mcnt == 0)
07328 {
07329 #ifdef _LIBC
07330 DEBUG_PRINT2 (" Setting two bytes from %p to no_op.\n",
07331 p + OFFSET_ADDRESS_SIZE);
07332 #else
07333 DEBUG_PRINT2 (" Setting two bytes from 0x%x to no_op.\n",
07334 p + OFFSET_ADDRESS_SIZE);
07335 #endif
07336
07337 #ifdef WCHAR
07338 p[1] = (UCHAR_T) no_op;
07339 #else
07340 p[2] = (UCHAR_T) no_op;
07341 p[3] = (UCHAR_T) no_op;
07342 #endif
07343 goto on_failure;
07344 }
07345 break;
07346
07347 case jump_n:
07348 EXTRACT_NUMBER (mcnt, p + OFFSET_ADDRESS_SIZE);
07349 DEBUG_PRINT2 ("EXECUTING jump_n %d.\n", mcnt);
07350
07351
07352 if (mcnt)
07353 {
07354 mcnt--;
07355 STORE_NUMBER (p + OFFSET_ADDRESS_SIZE, mcnt);
07356
07357 #ifdef _LIBC
07358 DEBUG_PRINT3 (" Setting %p to %d.\n", p + OFFSET_ADDRESS_SIZE,
07359 mcnt);
07360 #else
07361 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p + OFFSET_ADDRESS_SIZE,
07362 mcnt);
07363 #endif
07364 goto unconditional_jump;
07365 }
07366
07367 else
07368 p += 2 * OFFSET_ADDRESS_SIZE;
07369 break;
07370
07371 case set_number_at:
07372 {
07373 DEBUG_PRINT1 ("EXECUTING set_number_at.\n");
07374
07375 EXTRACT_NUMBER_AND_INCR (mcnt, p);
07376 p1 = p + mcnt;
07377 EXTRACT_NUMBER_AND_INCR (mcnt, p);
07378 #ifdef _LIBC
07379 DEBUG_PRINT3 (" Setting %p to %d.\n", p1, mcnt);
07380 #else
07381 DEBUG_PRINT3 (" Setting 0x%x to %d.\n", p1, mcnt);
07382 #endif
07383 STORE_NUMBER (p1, mcnt);
07384 break;
07385 }
07386
07387 #if 0
07388
07389
07390
07391
07392
07393 case wordbound:
07394 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
07395 if (AT_WORD_BOUNDARY (d))
07396 break;
07397 goto fail;
07398
07399 case notwordbound:
07400 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
07401 if (AT_WORD_BOUNDARY (d))
07402 goto fail;
07403 break;
07404 #else
07405 case wordbound:
07406 {
07407 boolean prevchar, thischar;
07408
07409 DEBUG_PRINT1 ("EXECUTING wordbound.\n");
07410 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
07411 break;
07412
07413 prevchar = WORDCHAR_P (d - 1);
07414 thischar = WORDCHAR_P (d);
07415 if (prevchar != thischar)
07416 break;
07417 goto fail;
07418 }
07419
07420 case notwordbound:
07421 {
07422 boolean prevchar, thischar;
07423
07424 DEBUG_PRINT1 ("EXECUTING notwordbound.\n");
07425 if (AT_STRINGS_BEG (d) || AT_STRINGS_END (d))
07426 goto fail;
07427
07428 prevchar = WORDCHAR_P (d - 1);
07429 thischar = WORDCHAR_P (d);
07430 if (prevchar != thischar)
07431 goto fail;
07432 break;
07433 }
07434 #endif
07435
07436 case wordbeg:
07437 DEBUG_PRINT1 ("EXECUTING wordbeg.\n");
07438 if (!AT_STRINGS_END (d) && WORDCHAR_P (d)
07439 && (AT_STRINGS_BEG (d) || !WORDCHAR_P (d - 1)))
07440 break;
07441 goto fail;
07442
07443 case wordend:
07444 DEBUG_PRINT1 ("EXECUTING wordend.\n");
07445 if (!AT_STRINGS_BEG (d) && WORDCHAR_P (d - 1)
07446 && (AT_STRINGS_END (d) || !WORDCHAR_P (d)))
07447 break;
07448 goto fail;
07449
07450 #ifdef emacs
07451 case before_dot:
07452 DEBUG_PRINT1 ("EXECUTING before_dot.\n");
07453 if (PTR_CHAR_POS ((unsigned char *) d) >= point)
07454 goto fail;
07455 break;
07456
07457 case at_dot:
07458 DEBUG_PRINT1 ("EXECUTING at_dot.\n");
07459 if (PTR_CHAR_POS ((unsigned char *) d) != point)
07460 goto fail;
07461 break;
07462
07463 case after_dot:
07464 DEBUG_PRINT1 ("EXECUTING after_dot.\n");
07465 if (PTR_CHAR_POS ((unsigned char *) d) <= point)
07466 goto fail;
07467 break;
07468
07469 case syntaxspec:
07470 DEBUG_PRINT2 ("EXECUTING syntaxspec %d.\n", mcnt);
07471 mcnt = *p++;
07472 goto matchsyntax;
07473
07474 case wordchar:
07475 DEBUG_PRINT1 ("EXECUTING Emacs wordchar.\n");
07476 mcnt = (int) Sword;
07477 matchsyntax:
07478 PREFETCH ();
07479
07480 d++;
07481 if (SYNTAX (d[-1]) != (enum syntaxcode) mcnt)
07482 goto fail;
07483 SET_REGS_MATCHED ();
07484 break;
07485
07486 case notsyntaxspec:
07487 DEBUG_PRINT2 ("EXECUTING notsyntaxspec %d.\n", mcnt);
07488 mcnt = *p++;
07489 goto matchnotsyntax;
07490
07491 case notwordchar:
07492 DEBUG_PRINT1 ("EXECUTING Emacs notwordchar.\n");
07493 mcnt = (int) Sword;
07494 matchnotsyntax:
07495 PREFETCH ();
07496
07497 d++;
07498 if (SYNTAX (d[-1]) == (enum syntaxcode) mcnt)
07499 goto fail;
07500 SET_REGS_MATCHED ();
07501 break;
07502
07503 #else
07504 case wordchar:
07505 DEBUG_PRINT1 ("EXECUTING non-Emacs wordchar.\n");
07506 PREFETCH ();
07507 if (!WORDCHAR_P (d))
07508 goto fail;
07509 SET_REGS_MATCHED ();
07510 d++;
07511 break;
07512
07513 case notwordchar:
07514 DEBUG_PRINT1 ("EXECUTING non-Emacs notwordchar.\n");
07515 PREFETCH ();
07516 if (WORDCHAR_P (d))
07517 goto fail;
07518 SET_REGS_MATCHED ();
07519 d++;
07520 break;
07521 #endif
07522
07523 default:
07524 abort ();
07525 }
07526 continue;
07527
07528
07529
07530 fail:
07531 if (!FAIL_STACK_EMPTY ())
07532 {
07533 DEBUG_PRINT1 ("\nFAIL:\n");
07534 POP_FAILURE_POINT (d, p,
07535 lowest_active_reg, highest_active_reg,
07536 regstart, regend, reg_info);
07537
07538
07539 if (!p)
07540 goto fail;
07541
07542
07543 assert (p <= pend);
07544 if (p < pend)
07545 {
07546 boolean is_a_jump_n = false;
07547
07548
07549
07550 switch ((re_opcode_t) *p)
07551 {
07552 case jump_n:
07553 is_a_jump_n = true;
07554 case maybe_pop_jump:
07555 case pop_failure_jump:
07556 case jump:
07557 p1 = p + 1;
07558 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07559 p1 += mcnt;
07560
07561 if ((is_a_jump_n && (re_opcode_t) *p1 == succeed_n)
07562 || (!is_a_jump_n
07563 && (re_opcode_t) *p1 == on_failure_jump))
07564 goto fail;
07565 break;
07566 default:
07567 ;
07568 }
07569 }
07570
07571 if (d >= string1 && d <= end1)
07572 dend = end_match_1;
07573 }
07574 else
07575 break;
07576 }
07577
07578 if (best_regs_set)
07579 goto restore_best_regs;
07580
07581 FREE_VARIABLES ();
07582
07583 return -1;
07584 }
07585
07586
07587
07588
07589
07590
07591
07592
07593
07594
07595
07596
07597
07598
07599 static boolean
07600 PREFIX(group_match_null_string_p) (p, end, reg_info)
07601 UCHAR_T **p, *end;
07602 PREFIX(register_info_type) *reg_info;
07603 {
07604 int mcnt;
07605
07606 UCHAR_T *p1 = *p + 2;
07607
07608 while (p1 < end)
07609 {
07610
07611
07612
07613
07614 switch ((re_opcode_t) *p1)
07615 {
07616
07617 case on_failure_jump:
07618 p1++;
07619 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07620
07621
07622
07623
07624 if (mcnt >= 0)
07625 {
07626
07627
07628
07629
07630
07631
07632
07633
07634
07635
07636
07637
07638
07639
07640
07641
07642
07643
07644 while ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] ==
07645 jump_past_alt)
07646 {
07647
07648
07649
07650
07651 if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt -
07652 (1 + OFFSET_ADDRESS_SIZE),
07653 reg_info))
07654 return false;
07655
07656
07657
07658 p1 += mcnt;
07659
07660
07661
07662 if ((re_opcode_t) *p1 != on_failure_jump)
07663 break;
07664
07665
07666
07667 p1++;
07668 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07669 if ((re_opcode_t) p1[mcnt-(1+OFFSET_ADDRESS_SIZE)] !=
07670 jump_past_alt)
07671 {
07672
07673 p1 -= 1 + OFFSET_ADDRESS_SIZE;
07674 break;
07675 }
07676 }
07677
07678
07679
07680
07681 EXTRACT_NUMBER (mcnt, p1 - OFFSET_ADDRESS_SIZE);
07682
07683 if (!PREFIX(alt_match_null_string_p) (p1, p1 + mcnt, reg_info))
07684 return false;
07685
07686 p1 += mcnt;
07687 }
07688 break;
07689
07690
07691 case stop_memory:
07692 assert (p1[1] == **p);
07693 *p = p1 + 2;
07694 return true;
07695
07696
07697 default:
07698 if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
07699 return false;
07700 }
07701 }
07702
07703 return false;
07704 }
07705
07706
07707
07708
07709
07710
07711 static boolean
07712 PREFIX(alt_match_null_string_p) (p, end, reg_info)
07713 UCHAR_T *p, *end;
07714 PREFIX(register_info_type) *reg_info;
07715 {
07716 int mcnt;
07717 UCHAR_T *p1 = p;
07718
07719 while (p1 < end)
07720 {
07721
07722
07723
07724 switch ((re_opcode_t) *p1)
07725 {
07726
07727 case on_failure_jump:
07728 p1++;
07729 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07730 p1 += mcnt;
07731 break;
07732
07733 default:
07734 if (!PREFIX(common_op_match_null_string_p) (&p1, end, reg_info))
07735 return false;
07736 }
07737 }
07738
07739 return true;
07740 }
07741
07742
07743
07744
07745
07746
07747
07748 static boolean
07749 PREFIX(common_op_match_null_string_p) (p, end, reg_info)
07750 UCHAR_T **p, *end;
07751 PREFIX(register_info_type) *reg_info;
07752 {
07753 int mcnt;
07754 boolean ret;
07755 int reg_no;
07756 UCHAR_T *p1 = *p;
07757
07758 switch ((re_opcode_t) *p1++)
07759 {
07760 case no_op:
07761 case begline:
07762 case endline:
07763 case begbuf:
07764 case endbuf:
07765 case wordbeg:
07766 case wordend:
07767 case wordbound:
07768 case notwordbound:
07769 #ifdef emacs
07770 case before_dot:
07771 case at_dot:
07772 case after_dot:
07773 #endif
07774 break;
07775
07776 case start_memory:
07777 reg_no = *p1;
07778 assert (reg_no > 0 && reg_no <= MAX_REGNUM);
07779 ret = PREFIX(group_match_null_string_p) (&p1, end, reg_info);
07780
07781
07782
07783
07784 if (REG_MATCH_NULL_STRING_P (reg_info[reg_no]) == MATCH_NULL_UNSET_VALUE)
07785 REG_MATCH_NULL_STRING_P (reg_info[reg_no]) = ret;
07786
07787 if (!ret)
07788 return false;
07789 break;
07790
07791
07792 case jump:
07793 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07794 if (mcnt >= 0)
07795 p1 += mcnt;
07796 else
07797 return false;
07798 break;
07799
07800 case succeed_n:
07801
07802 p1 += OFFSET_ADDRESS_SIZE;
07803 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07804
07805 if (mcnt == 0)
07806 {
07807 p1 -= 2 * OFFSET_ADDRESS_SIZE;
07808 EXTRACT_NUMBER_AND_INCR (mcnt, p1);
07809 p1 += mcnt;
07810 }
07811 else
07812 return false;
07813 break;
07814
07815 case duplicate:
07816 if (!REG_MATCH_NULL_STRING_P (reg_info[*p1]))
07817 return false;
07818 break;
07819
07820 case set_number_at:
07821 p1 += 2 * OFFSET_ADDRESS_SIZE;
07822
07823 default:
07824
07825 return false;
07826 }
07827
07828 *p = p1;
07829 return true;
07830 }
07831
07832
07833
07834
07835
07836 static int
07837 PREFIX(bcmp_translate) (s1, s2, len, translate)
07838 const CHAR_T *s1, *s2;
07839 register int len;
07840 RE_TRANSLATE_TYPE translate;
07841 {
07842 register const UCHAR_T *p1 = (const UCHAR_T *) s1;
07843 register const UCHAR_T *p2 = (const UCHAR_T *) s2;
07844 while (len)
07845 {
07846 #ifdef WCHAR
07847 if (((*p1<=0xff)?translate[*p1++]:*p1++)
07848 != ((*p2<=0xff)?translate[*p2++]:*p2++))
07849 return 1;
07850 #else
07851 if (translate[*p1++] != translate[*p2++]) return 1;
07852 #endif
07853 len--;
07854 }
07855 return 0;
07856 }
07857
07858
07859 #else
07860
07861
07862
07863
07864
07865
07866
07867
07868
07869
07870
07871
07872 const char *
07873 re_compile_pattern (pattern, length, bufp)
07874 const char *pattern;
07875 size_t length;
07876 struct re_pattern_buffer *bufp;
07877 {
07878 reg_errcode_t ret;
07879
07880
07881
07882 bufp->regs_allocated = REGS_UNALLOCATED;
07883
07884
07885
07886
07887 bufp->no_sub = 0;
07888
07889
07890 bufp->newline_anchor = 1;
07891
07892 # ifdef MBS_SUPPORT
07893 if (MB_CUR_MAX != 1)
07894 ret = wcs_regex_compile (pattern, length, re_syntax_options, bufp);
07895 else
07896 # endif
07897 ret = byte_regex_compile (pattern, length, re_syntax_options, bufp);
07898
07899 if (!ret)
07900 return NULL;
07901 return gettext (re_error_msgid[(int) ret]);
07902 }
07903 #ifdef _LIBC
07904 weak_alias (__re_compile_pattern, re_compile_pattern)
07905 #endif
07906
07907
07908
07909
07910 #if defined _REGEX_RE_COMP || defined _LIBC
07911
07912
07913 static struct re_pattern_buffer re_comp_buf;
07914
07915 char *
07916 #ifdef _LIBC
07917
07918
07919
07920 weak_function
07921 #endif
07922 re_comp (s)
07923 const char *s;
07924 {
07925 reg_errcode_t ret;
07926
07927 if (!s)
07928 {
07929 if (!re_comp_buf.buffer)
07930 return gettext ("No previous regular expression");
07931 return 0;
07932 }
07933
07934 if (!re_comp_buf.buffer)
07935 {
07936 re_comp_buf.buffer = (unsigned char *) malloc (200);
07937 if (re_comp_buf.buffer == NULL)
07938 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
07939 re_comp_buf.allocated = 200;
07940
07941 re_comp_buf.fastmap = (char *) malloc (1 << BYTEWIDTH);
07942 if (re_comp_buf.fastmap == NULL)
07943 return (char *) gettext (re_error_msgid[(int) REG_ESPACE]);
07944 }
07945
07946
07947
07948
07949
07950 re_comp_buf.newline_anchor = 1;
07951
07952 # ifdef MBS_SUPPORT
07953 if (MB_CUR_MAX != 1)
07954 ret = wcs_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
07955 else
07956 # endif
07957 ret = byte_regex_compile (s, strlen (s), re_syntax_options, &re_comp_buf);
07958
07959 if (!ret)
07960 return NULL;
07961
07962
07963 return (char *) gettext (re_error_msgid[(int) ret]);
07964 }
07965
07966
07967 int
07968 #ifdef _LIBC
07969 weak_function
07970 #endif
07971 re_exec (s)
07972 const char *s;
07973 {
07974 const int len = strlen (s);
07975 return
07976 0 <= re_search (&re_comp_buf, s, len, 0, len, (struct re_registers *) 0);
07977 }
07978
07979 #endif
07980
07981
07982
07983 #ifndef emacs
07984
07985
07986
07987
07988
07989
07990
07991
07992
07993
07994
07995
07996
07997
07998
07999
08000
08001
08002
08003
08004
08005
08006
08007
08008
08009
08010
08011
08012
08013
08014
08015
08016
08017
08018
08019
08020 int
08021 regcomp (preg, pattern, cflags)
08022 regex_t *preg;
08023 const char *pattern;
08024 int cflags;
08025 {
08026 reg_errcode_t ret;
08027 reg_syntax_t syntax
08028 = (cflags & REG_EXTENDED) ?
08029 RE_SYNTAX_POSIX_EXTENDED : RE_SYNTAX_POSIX_BASIC;
08030
08031
08032 preg->buffer = 0;
08033 preg->allocated = 0;
08034 preg->used = 0;
08035
08036
08037 preg->fastmap = (char *) malloc (1 << BYTEWIDTH);
08038
08039 if (cflags & REG_ICASE)
08040 {
08041 unsigned i;
08042
08043 preg->translate
08044 = (RE_TRANSLATE_TYPE) malloc (CHAR_SET_SIZE
08045 * sizeof (*(RE_TRANSLATE_TYPE)0));
08046 if (preg->translate == NULL)
08047 return (int) REG_ESPACE;
08048
08049
08050 for (i = 0; i < CHAR_SET_SIZE; i++)
08051 preg->translate[i] = ISUPPER (i) ? TOLOWER (i) : (int) i;
08052 }
08053 else
08054 preg->translate = NULL;
08055
08056
08057 if (cflags & REG_NEWLINE)
08058 {
08059 syntax &= ~RE_DOT_NEWLINE;
08060 syntax |= RE_HAT_LISTS_NOT_NEWLINE;
08061
08062 preg->newline_anchor = 1;
08063 }
08064 else
08065 preg->newline_anchor = 0;
08066
08067 preg->no_sub = !!(cflags & REG_NOSUB);
08068
08069
08070
08071 # ifdef MBS_SUPPORT
08072 if (MB_CUR_MAX != 1)
08073 ret = wcs_regex_compile (pattern, strlen (pattern), syntax, preg);
08074 else
08075 # endif
08076 ret = byte_regex_compile (pattern, strlen (pattern), syntax, preg);
08077
08078
08079
08080 if (ret == REG_ERPAREN) ret = REG_EPAREN;
08081
08082 if (ret == REG_NOERROR && preg->fastmap)
08083 {
08084
08085
08086 if (re_compile_fastmap (preg) == -2)
08087 {
08088
08089
08090 free (preg->fastmap);
08091 preg->fastmap = NULL;
08092 }
08093 }
08094
08095 return (int) ret;
08096 }
08097 #ifdef _LIBC
08098 weak_alias (__regcomp, regcomp)
08099 #endif
08100
08101
08102
08103
08104
08105
08106
08107
08108
08109
08110
08111
08112
08113
08114
08115
08116 int
08117 regexec (preg, string, nmatch, pmatch, eflags)
08118 const regex_t *preg;
08119 const char *string;
08120 size_t nmatch;
08121 regmatch_t pmatch[];
08122 int eflags;
08123 {
08124 int ret;
08125 struct re_registers regs;
08126 regex_t private_preg;
08127 int len = strlen (string);
08128 boolean want_reg_info = !preg->no_sub && nmatch > 0;
08129
08130 private_preg = *preg;
08131
08132 private_preg.not_bol = !!(eflags & REG_NOTBOL);
08133 private_preg.not_eol = !!(eflags & REG_NOTEOL);
08134
08135
08136
08137
08138 private_preg.regs_allocated = REGS_FIXED;
08139
08140 if (want_reg_info)
08141 {
08142 regs.num_regs = nmatch;
08143 regs.start = TALLOC (nmatch * 2, regoff_t);
08144 if (regs.start == NULL)
08145 return (int) REG_NOMATCH;
08146 regs.end = regs.start + nmatch;
08147 }
08148
08149
08150 ret = re_search (&private_preg, string, len,
08151 0, len,
08152 want_reg_info ? ®s : (struct re_registers *) 0);
08153
08154
08155 if (want_reg_info)
08156 {
08157 if (ret >= 0)
08158 {
08159 unsigned r;
08160
08161 for (r = 0; r < nmatch; r++)
08162 {
08163 pmatch[r].rm_so = regs.start[r];
08164 pmatch[r].rm_eo = regs.end[r];
08165 }
08166 }
08167
08168
08169 free (regs.start);
08170 }
08171
08172
08173 return ret >= 0 ? (int) REG_NOERROR : (int) REG_NOMATCH;
08174 }
08175 #ifdef _LIBC
08176 weak_alias (__regexec, regexec)
08177 #endif
08178
08179
08180
08181
08182
08183 size_t
08184 regerror (errcode, preg, errbuf, errbuf_size)
08185 int errcode;
08186 const regex_t *preg ATTRIBUTE_UNUSED;
08187 char *errbuf;
08188 size_t errbuf_size;
08189 {
08190 const char *msg;
08191 size_t msg_size;
08192
08193 if (errcode < 0
08194 || errcode >= (int) (sizeof (re_error_msgid)
08195 / sizeof (re_error_msgid[0])))
08196
08197
08198
08199
08200 abort ();
08201
08202 msg = gettext (re_error_msgid[errcode]);
08203
08204 msg_size = strlen (msg) + 1;
08205
08206 if (errbuf_size != 0)
08207 {
08208 if (msg_size > errbuf_size)
08209 {
08210 #if defined HAVE_MEMPCPY || defined _LIBC
08211 *((char *) mempcpy (errbuf, msg, errbuf_size - 1)) = '\0';
08212 #else
08213 memcpy (errbuf, msg, errbuf_size - 1);
08214 errbuf[errbuf_size - 1] = 0;
08215 #endif
08216 }
08217 else
08218 memcpy (errbuf, msg, msg_size);
08219 }
08220
08221 return msg_size;
08222 }
08223 #ifdef _LIBC
08224 weak_alias (__regerror, regerror)
08225 #endif
08226
08227
08228
08229
08230 void
08231 regfree (preg)
08232 regex_t *preg;
08233 {
08234 if (preg->buffer != NULL)
08235 free (preg->buffer);
08236 preg->buffer = NULL;
08237
08238 preg->allocated = 0;
08239 preg->used = 0;
08240
08241 if (preg->fastmap != NULL)
08242 free (preg->fastmap);
08243 preg->fastmap = NULL;
08244 preg->fastmap_accurate = 0;
08245
08246 if (preg->translate != NULL)
08247 free (preg->translate);
08248 preg->translate = NULL;
08249 }
08250 #ifdef _LIBC
08251 weak_alias (__regfree, regfree)
08252 #endif
08253
08254 #endif
08255
08256 #endif
08257
08258
08259 #undef STORE_NUMBER
08260 #undef STORE_NUMBER_AND_INCR
08261 #undef EXTRACT_NUMBER
08262 #undef EXTRACT_NUMBER_AND_INCR
08263
08264 #undef DEBUG_PRINT_COMPILED_PATTERN
08265 #undef DEBUG_PRINT_DOUBLE_STRING
08266
08267 #undef INIT_FAIL_STACK
08268 #undef RESET_FAIL_STACK
08269 #undef DOUBLE_FAIL_STACK
08270 #undef PUSH_PATTERN_OP
08271 #undef PUSH_FAILURE_POINTER
08272 #undef PUSH_FAILURE_INT
08273 #undef PUSH_FAILURE_ELT
08274 #undef POP_FAILURE_POINTER
08275 #undef POP_FAILURE_INT
08276 #undef POP_FAILURE_ELT
08277 #undef DEBUG_PUSH
08278 #undef DEBUG_POP
08279 #undef PUSH_FAILURE_POINT
08280 #undef POP_FAILURE_POINT
08281
08282 #undef REG_UNSET_VALUE
08283 #undef REG_UNSET
08284
08285 #undef PATFETCH
08286 #undef PATFETCH_RAW
08287 #undef PATUNFETCH
08288 #undef TRANSLATE
08289
08290 #undef INIT_BUF_SIZE
08291 #undef GET_BUFFER_SPACE
08292 #undef BUF_PUSH
08293 #undef BUF_PUSH_2
08294 #undef BUF_PUSH_3
08295 #undef STORE_JUMP
08296 #undef STORE_JUMP2
08297 #undef INSERT_JUMP
08298 #undef INSERT_JUMP2
08299 #undef EXTEND_BUFFER
08300 #undef GET_UNSIGNED_NUMBER
08301 #undef FREE_STACK_RETURN
08302
08303 # undef POINTER_TO_OFFSET
08304 # undef MATCHING_IN_FRST_STRING
08305 # undef PREFETCH
08306 # undef AT_STRINGS_BEG
08307 # undef AT_STRINGS_END
08308 # undef WORDCHAR_P
08309 # undef FREE_VAR
08310 # undef FREE_VARIABLES
08311 # undef NO_HIGHEST_ACTIVE_REG
08312 # undef NO_LOWEST_ACTIVE_REG
08313
08314 # undef CHAR_T
08315 # undef UCHAR_T
08316 # undef COMPILED_BUFFER_VAR
08317 # undef OFFSET_ADDRESS_SIZE
08318 # undef CHAR_CLASS_SIZE
08319 # undef PREFIX
08320 # undef ARG_PREFIX
08321 # undef PUT_CHAR
08322 # undef BYTE
08323 # undef WCHAR
08324
08325 # define DEFINED_ONCE